c c FORTRAN PROGRAM TO ESTIMATE THE AREA UNDER Two ROC CURVEs c estimated from the same sample of CLUSTERED DATA: c CLUSTERBI.FOR c c c implicit real (a-z) integer n_clust,cl_size,i,j,k,l,m,truth(500,10),id(500) integer count,sim,abn,nor,kount,compar dimension test(2,500,10),vx(2,500,10),vy(2,500,10), + area(2),s_10(2),s_01(2),s_11(2),var(2) logical d_nor,d_abn c c open(unit=1, file='/home/mlieber/data.dat', status='old') c open(unit=2, file='cluster.out', status='new') c do 1000, sim=1,1 c print *,'Enter the total # of patients (i.e. # of clusters)' read *,n_clust print *,'Enter the maximum # of observations per patient' read *,cl_size abn=0 nor=0 c 20 format(i3,1x,i2) 21 format(i3,t5,f3.0,t9,i1,t11,f3.0,t15,i1) c do 50, i=1, n_clust c do 45, k=1,cl_size read(1,21)id(i),test(1,i,1),truth(i,1),test(1,i,2),truth(i,2) read(1,21)id(i),test(2,i,1),truth(i,1),test(2,i,2),truth(i,2) 45 continue 50 continue c c c Compute the X-structural components c do 60, i=1,n_clust do 59, k=1,cl_size do 58, j=1,2 vx(j,i,k)=99 58 continue 59 continue 60 continue c c c do 110, m=1,2 c do 100, i=1,n_clust do 95, k=1,cl_size sum=0.0 count=0 c if(truth(i,k) .eq. 0 .or. truth(i,k) .eq. 9)goto 95 if(m .eq. 1)abn=abn+1 c do 90, j=1,n_clust do 85, l=1,cl_size c if(truth(j,l) .eq. 1 .or. truth(j,l) .eq. 9)goto 85 c count=count+1 if(test(m,i,k) .gt. test(m,j,l))sum=sum+1 if(test(m,i,k) .eq. test(m,j,l))sum=sum+0.5 c c 85 continue 90 continue c c vx(m,i,k)=sum/real(count) c c 95 continue 100 continue c 110 continue c c c Compute the Y-structural components c c do 130, m=1,2 do 120, i=1,n_clust do 119, k=1,cl_size vy(m,i,k)=99 119 continue 120 continue 130 continue c c do 210, m=1,2 c do 200, i=1,n_clust do 195, k=1,cl_size sum=0.0 count=0 c if(truth(i,k) .eq. 1 .or. truth(i,k) .eq. 9)goto 195 if(m .eq. 1)nor=nor+1 c do 190, j=1,n_clust do 185, l=1,cl_size c if(truth(j,l) .eq. 0 .or. truth(j,l) .eq. 9)goto 185 c count=count+1 if(test(m,i,k) .lt. test(m,j,l))sum=sum+1 if(test(m,i,k) .eq. test(m,j,l))sum=sum+0.5 c c 185 continue 190 continue c c vy(m,i,k)=sum/real(count) c c 195 continue 200 continue c 210 continue c c c Compute the areas c c do 251, m=1,2 c count=0 sum=0.0 c do 250, i=1,n_clust do 249, k=1,cl_size if(vx(m,i,k) .gt. 1.0)goto 249 sum=sum+vx(m,i,k) count=count+1 249 continue 250 continue c area(m)=sum/real(count) print *,'The estimated area for test ',m,' is ',area(m) c 251 continue c c c Compute the variance terms S_10 c c do 276, m=1,2 c sqsum=0.0 kount=0 c c do 275, i=1,n_clust count=0 sum=0.0 do 265, k=1,cl_size if(vx(m,i,k) .le. 1.0)then count=count+1 sum=sum+vx(m,i,k) endif 265 continue if(count .eq. 0)goto 275 kount=kount+1 sqsum=sqsum+(sum-real(count)*area(m))**2 275 continue c s_10(m)=sqsum*real(kount)/real((abn)*(kount-1)) c 276 continue c c c Compute the variance terms S_01 c c do 296, m=1,2 c sqsum=0.0 kount=0 c c do 295, i=1,n_clust count=0 sum=0.0 c do 285, k=1,cl_size if(vy(m,i,k) .le. 1.0)then count=count+1 sum=sum+vy(m,i,k) endif 285 continue if(count .eq. 0)goto 295 kount=kount+1 sqsum=sqsum+(sum-real(count)*area(m))**2 295 continue c s_01(m)=sqsum*real(kount)/real((nor)*(kount-1)) c c 296 continue c c Compute the covariance terms, S_11 c c do 330, m=1,2 c sqsum=0.0 kount=0 compar=0 c c do 320, i=1,n_clust c count1=0 sum1=0.0 count2=0 sum2=0.0 d_abn=.false. d_nor=.false. c do 310, k=1,cl_size if(vx(m,i,k) .le. 1.0)then d_abn=.true. count1=count1+1 sum1=sum1+vx(m,i,k) endif if(vy(m,i,k) .le. 1.0)then d_nor=.true. count2=count2+1 sum2=sum2+vy(m,i,k) endif 310 continue c if(d_abn .eq. .false. .or. d_nor .eq. .false.)goto 320 kount=kount+1 compar=compar+(count1*count2) sqsum=sqsum+(sum1-real(count1)*area(m))*(sum2- + real(count2)*area(m)) 320 continue c if(kount .eq. 0)then s_11(m)=0.00 compar=99 goto 325 endif s_11(m)=sqsum*real(n_clust)/real(n_clust-1) c c 325 continue c c c Compute the Variances of the areas c c c var(m)=(1.0/real(abn))*s_10(m)+(1.0/real(nor))*s_01(m)+ + (2.0/real(abn*nor))*s_11(m) print *,'variance for test ',m,var(m) c 330 continue c c c Compute the covariance between the two curves: c c first term: c sqsum=0.0 kount=0 c c do 350, i=1,n_clust c count1=0 sum1=0.0 sum2=0.0 d_abn=.false. c do 340, k=1,cl_size if(vx(1,i,k) .le. 1.0)then d_abn=.true. count1=count1+1 sum1=sum1+vx(1,i,k) sum2=sum2+vx(2,i,k) endif 340 continue c if(d_abn .eq. .false.)goto 350 kount=kount+1 sqsum=sqsum+(sum1-real(count1)*area(1))*(sum2- + real(count1)*area(2)) 350 continue c if(kount .eq. 0)then s_100=0.00 goto 355 endif s_100=sqsum*real(kount)/real((abn)* + (kount-1)) print *,'s_100 ',s_100 c 355 continue c c c second term: c sqsum=0.0 kount=0 c c do 370, i=1,n_clust c count1=0 sum1=0.0 sum2=0.0 d_nor=.false. c do 360, k=1,cl_size if(vy(1,i,k) .le. 1.0)then d_nor=.true. count1=count1+1 sum1=sum1+vy(1,i,k) sum2=sum2+vy(2,i,k) endif 360 continue c if(d_nor .eq. .false.)goto 370 kount=kount+1 sqsum=sqsum+(sum1-real(count1)*area(1))*(sum2- + real(count1)*area(2)) 370 continue c if(kount .eq. 0)then s_010=0.00 goto 375 endif s_010=sqsum*real(kount)/real((nor)* + (kount-1)) print *,'s_010 ',s_010 c 375 continue c c c third component c c sqsum=0.0 kount=0 compar=0 c c do 390, i=1,n_clust c count1=0 sum1=0.0 count2=0 sum2=0.0 d_abn=.false. d_nor=.false. c do 380, k=1,cl_size if(vx(1,i,k) .le. 1.0)then d_abn=.true. count1=count1+1 sum1=sum1+vx(1,i,k) endif if(vy(2,i,k) .le. 1.0)then d_nor=.true. count2=count2+1 sum2=sum2+vy(2,i,k) endif 380 continue c if(d_abn .eq. .false. .or. d_nor .eq. .false.)goto 390 kount=kount+1 compar=compar+(count1*count2) sqsum=sqsum+(sum1-real(count1)*area(1))*(sum2- + real(count2)*area(2)) 390 continue c if(kount .eq. 0)then s_110=0.00 compar=99 goto 395 endif s_110=sqsum*real(n_clust)/real(n_clust-1) print *,'s_110 ',s_110 c c 395 continue c c c fourth component c c sqsum=0.0 kount=0 compar=0 c c do 410, i=1,n_clust c count1=0 sum1=0.0 count2=0 sum2=0.0 d_abn=.false. d_nor=.false. c do 400, k=1,cl_size if(vx(2,i,k) .le. 1.0)then d_abn=.true. count1=count1+1 sum1=sum1+vx(2,i,k) endif if(vy(1,i,k) .le. 1.0)then d_nor=.true. count2=count2+1 sum2=sum2+vy(1,i,k) endif 400 continue c if(d_abn .eq. .false. .or. d_nor .eq. .false.)goto 410 kount=kount+1 compar=compar+(count1*count2) sqsum=sqsum+(sum2-real(count2)*area(1))*(sum1- + real(count1)*area(2)) 410 continue c if(kount .eq. 0)then s_111=0.00 compar=99 goto 415 endif s_111=sqsum*real(n_clust)/real(n_clust-1) print *,'s_111 ',s_111 c c 415 continue c c c c Then, the covariance is: c covar=s_100/real(abn)+s_010/real(nor)+ + s_110/real(nor*abn)+s_111/real(abn*nor) print *,'covariance is ',covar c c c Then, the variance of the difference is: c vardif=var(1)+var(2)-2*covar print *,'The variance of the estimated difference between' print *,'the areas is ',vardif c c c write(2,500)sim,area(1),var(1),area(2),var(2), c + covar,vardif,nor,abn c 500 format(i4,1x,2(f6.4,1x,f8.6,1x),2(f10.8,1x), + 2(i3,1x)) c c c 1000 continue c stop end