/* * This is a SAS, largely SAS/IML, program to compute Mann-Whitney * statistics and component based estimates of their variance * covariance matrix. The method is closely related to the jackknife. * The method of components is well described in chapter 3 of Puri and * Sen, Multivariate Nonparametric Methods and Randle and Wolf's book * on nonparametrics. This program should run on either version * 5 or version 6 of SAS. To use this program you must set the * following macro variables: * * &indata set to the name of the data set. * &vars set to the names of the variables. * &ind set to a zero/one indicator variable * &contrast set to a contrast of interest. The * variables are assumed to be in the * same order as given in the &vars macro. * * This data set was used in the paper: * "Comparing the Areas Under Two or More Correlated Receiver * Operating Characteristic Curves: A Nonparametric Approach", * ER DeLong, DM DeLong and DL Clarke-Pearson, Biometrics 44, * p. 837-845. * The results vary somewhat from the paper because this program * compute components based on pairwise deletion of missing * observations, whereas the orginal analysis used pairwise deletion * after the components were calculated. The current strategy is * felt to be more conservative with respect to possible association * of response with the probability of the response not being observed. *************************************************************************/ %let indata = roc ; * name of data set; %let vars = totscore alb tp; * variables ; %let ind = popind; * population indicator variable; * contrast ; %let contrast = %str( { 1 -1 0, 1 0 -1} ); data roc; input alb tp totscore popind; totscore = 10 - totscore; cards; 3.0 5.8 10 0 3.2 6.3 5 1 3.9 6.8 3 1 2.8 4.8 6 0 3.2 5.8 3 1 0.9 4.0 5 0 2.5 5.7 8 0 1.6 5.6 5 1 3.8 5.7 5 1 3.7 6.7 6 1 . . 6 1 3.2 5.4 4 1 3.8 6.6 6 1 4.1 6.6 5 1 3.6 5.7 5 1 4.3 7.0 4 1 3.6 6.7 4 0 2.3 4.4 6 1 4.2 7.6 4 0 4.0 6.6 6 0 3.5 5.8 6 1 3.8 6.8 7 1 3.0 4.7 8 0 4.5 7.4 5 1 3.7 7.4 5 1 3.1 6.6 6 1 4.1 8.2 6 1 4.3 7.0 5 1 4.3 6.5 4 1 3.2 5.1 5 1 2.6 4.7 6 1 3.3 6.8 6 0 1.7 4.0 7 0 . . 6 1 3.7 6.1 5 1 3.3 6.3 7 1 4.2 7.7 6 1 3.5 6.2 5 1 2.9 5.7 9 0 2.1 4.8 7 1 . . 8 1 2.8 6.2 8 0 . . 7 1 . . 7 1 4.0 7.0 7 1 3.3 5.7 6 1 3.7 6.9 5 1 2.0 . 7 1 3.6 6.6 5 1 ; data comp; set &indata(keep = &vars &ind); if &ind ^= 0 and &ind ^= 1 then do; put 'indicator is not zero/one'; delete; end; proc iml; start mwcomp(psi,z); *; * program to compute the man-whitney components ; * z is (nn by 2); * z[,1] is the column of data values; * z[,2] is the column of indicator variables; * z[i,2]=1 if the observation is from the x population; * z[i,2]=0 if the observation is from the y population; * * psi is the returned vector of u-statistic components; rz = ranktie( z[,1] ); * average ranks; nx = sum( z[,2] ); * num. of x's ; ny = nrow(z)-nx; * num of y's ; loc = loc( z[,2]=1 ); * x indexes ; psi = j(nrow(z),1,0); psi[loc] = (rz[loc] - ranktie(z[loc,1]))/ny; * x components ; loc = loc( z[,2]=0 ); * y indexes ; psi[loc] = ( nx+ranktie(z[loc,1])-rz[loc])/nx; * y components ; free rz loc nx ny; * free space ; finish; start mwvar(t,v,nx,ny,z); *; * compute man-whitney statistics and variance; * input z, n by (k+1); * z[,1:k] are the different variables; * z[,k+1] are indicator values, * 1 if the observation is from population x and ; * 0 if the observation is from population y; * t is the k by k vector of estimated statistics; * the (i,j) entry is the MannWhitney statistic for the * i-th column when used with the j-th column. The only * observations with nonmissing values in each column are * used. The diagonal elements are, hence, based only on the * single column of values. * v is the k by k estimated variance matrix; * nx is the matrix of x population counts on a pairwise basis; * ny is the matrix of y population counts on a pairwise basis; k = ncol(z)-1; ind = z[,k+1]; v = j(k,k,0); t=v; nx=v; ny=v; * The following computes components after pairwise deletion of * observations with missing values. If either there are no missing * values or it is desired to use the components without doing * pairwise deletion first, the nested do loops could be evaded. *; do i=1 to k; do j=1 to i; who = loc( (z[,i]^=.)#(z[,j]^=.) ); * nonmissing pairs; run mwcomp(psii,(z[,i]||ind)[who,]); * components; run mwcomp(psij,(z[,j]||ind)[who,]); inow = ind[who,]; * x's and y's; m = inow[+]; * current x's; n = nrow(psii)-m; * current y's; nx[i,j] = m; ny[i,j] = n; mi = (psii#inow)[+] / m; * means; mj = (psij#inow)[+] / m; t[i,j] = mi; t[j,i] = mj; psii = psii-mi; psij = psij-mj; * center; v[i,j] = (psii#psij#inow)[+] / (m#(m-1)) + (psii#psij#(1-inow))[+] / (n#(n-1)); v[j,i] = v[i,j]; end; end; free psii psij inow ind who; finish; /* start of execution of the IML program */ use comp var {&vars &ind}; read all into data [colname=names]; run mwvar(t,v,nx,ny,data); * estimates and variances; vname = names[1:(ncol(names)-1)]; manwhit = vecdiag(t); print 'Pairwise Deletion Mann-Whitney Statistics', t [colname=vname rowname=vname]; print 'Mann-Whitney Statistics', manwhit[ rowname=vname]; print 'Estimated Variance Matrix', v [colname=vname rowname=vname]; c=sqrt( vecdiag(v) ); c=v / (c@c`); print 'Estimated Correlations', c [colname=vname rowname=vname]; print 'X populations sample sizes', nx [colname=vname rowname=vname]; print 'Y populations sample sizes', ny [colname=vname rowname=vname]; l = &contrast ; print 'Contrast of Interest', l [colname=vname]; lt=l*manwhit; print 'Estimates of Contrast', lt; lv=l*v*l`; print 'Variance Estimates of Contrast', lv; c = ginv(lv); chisq = lt`*c*lt; df = trace(c*lv); p = 1 - probchi( chisq, df ); chiname = {'Chisq' 'DF' 'Prob>Chi'}; print (chisq||df||p) [colname=chiname]; quit;