1 #include "mrilib.h"
2
3 /*----------------------------------------------------------------------------*/
4
cf_nor(double x)5 static double cf_nor( double x ){ return 1.0-0.5*erfc(x/1.414213562373095); }
6
7 /*----------------------------------------------------------------------------*/
8
anderson_darling_statistic(int npt,double * val,double (* cf)(double))9 double anderson_darling_statistic( int npt, double *val, double (*cf)(double) )
10 {
11 double *yyy , asum , ccc ; int ii ;
12
13 if( npt < 5 || val == NULL || cf == NULL ) return 0.0 ;
14
15 yyy = (double *)malloc(sizeof(double)*npt) ;
16 memcpy( yyy , val , sizeof(double)*npt ) ;
17 qsort_double( npt , yyy ) ;
18
19 asum = 0.0 ;
20 for( ii=0 ; ii < npt ; ii++ ){
21 ccc = cf(yyy[ii]) ;
22 if( ccc > 0.0 && ccc < 1.0 )
23 asum += (2*ii+1) * log(ccc) + (2*(npt-ii)-1) * log(1.0-ccc) ;
24 }
25
26 free(yyy) ; asum = -npt - asum / npt ; return asum ;
27 }
28
29 /*----------------------------------------------------------------------------*/
30
anderson_darling_normal(int npt,double * xxx)31 double anderson_darling_normal( int npt , double *xxx )
32 {
33 double *vvv , xm , xq , ad ; int ii ;
34
35 if( npt < 5 || xxx == NULL ) return 0.0 ;
36
37 vvv = (double *)malloc(sizeof(double)*npt) ;
38 memcpy( vvv , xxx , sizeof(double)*npt ) ;
39
40 xm = 0.0 ;
41 for( ii=0 ; ii < npt ; ii++ ) xm += vvv[ii] ;
42
43 xm /= npt ; xq = 0.0 ;
44 for( ii=0 ; ii < npt ; ii++ ) xq += (vvv[ii]-xm)*(vvv[ii]-xm) ;
45 if( xq <= 0.0 ){ free(vvv) ; return 0.0 ; }
46
47 xq = sqrt( (npt-1.0) / xq ) ;
48 for( ii=0 ; ii < npt ; ii++ ) vvv[ii] = (vvv[ii]-xm) * xq ;
49
50 ad = anderson_darling_statistic( npt , vvv , cf_nor ) ;
51 ad *= ( 1.0 + 4.0/npt - 25.0/(npt*npt) ) ;
52 free(vvv) ; return ad ;
53 }
54
55 /*----------------------------------------------------------------------------*/
56
57 #include "zgaussian.c"
58
anderson_darling_simulate(int npt,int ntrial)59 float * anderson_darling_simulate( int npt , int ntrial )
60 {
61 float *ad ; double *xxx ; int ii , jj ;
62
63 if( npt < 5 || ntrial <= 0 ) return NULL ;
64
65 ad = (float * )malloc(sizeof(float)*ntrial) ;
66 xxx = (double *)malloc(sizeof(double)*npt) ;
67 for( jj=0 ; jj < ntrial ; jj++ ){
68 for( ii=0 ; ii < npt ; ii++ )
69 xxx[ii] = (double)(zgaussian()+zgaussian()+zgaussian()+1.0f) ;
70 ad[jj] = - (float)anderson_darling_normal( npt , xxx ) ;
71 }
72 qsort_float( ntrial , ad ) ;
73 for( jj=0 ; jj < ntrial ; jj++ ) ad[jj] = -ad[jj] ;
74 return ad ;
75 }
76
77 /*----------------------------------------------------------------------------*/
78
anderson_darling_expify(int naval,float * aval,int natr,float * atr,int dopval)79 void anderson_darling_expify( int naval, float *aval, int natr, float *atr, int dopval )
80 {
81 float *qaval , avv , pfac,pval,dd ; int *kaval , jj,kk ;
82
83 if( naval <= 0 || aval == NULL || natr < 100 || atr == NULL ) return ;
84
85 qaval = (float *)malloc(sizeof(float)*naval) ;
86 kaval = (int * )malloc(sizeof(int) *naval) ;
87 for( kk=0 ; kk < naval ; kk++ ){ qaval[kk] = -aval[kk]; kaval[kk] = kk; }
88
89 qsort_floatint( naval , qaval , kaval ) ;
90 for( kk=0 ; kk < naval ; kk++ ) qaval[kk] = -qaval[kk];
91
92 pfac = 1.0f / (1.0f+natr) ;
93
94 /* process aval[] entries larger than the largest atr value */
95
96 for( kk=0 ; kk < naval ; kk++ ){
97 if( qaval[kk] >= atr[0] ){
98 pval = pfac * atr[0] / qaval[kk] ; /* make up a small p-value */
99 aval[kaval[kk]] = (dopval) ? pval : -logf(pval) ; /* convert to exp */
100 } else {
101 break ;
102 }
103 }
104
105 /* process the remaining aval[] entries, starting at aval[kk] */
106
107 for( jj=0 ; kk < naval ; kk++ ){
108 avv = qaval[kk] ;
109 for( ; jj < natr && avv <= atr[jj] ; jj++ ) ; /* get bounding atr[] vals */
110 if( jj == natr ) break ;
111 pval = jj ; dd = atr[jj] - atr[jj-1] ;
112 if( dd != 0.0f ) pval += (atr[jj-1]-avv) / dd ; /* interpolate a pval */
113 pval *= pfac ;
114 aval[kaval[kk]] = (dopval) ? pval : -logf(pval) ; /* convert to exp */
115 }
116 /** ININFO_message("kk = %d/%d after middle step",kk,naval) ; **/
117
118 dd = -logf(1.0f-pfac) ;
119 for( ; kk < naval ; kk++ ) aval[kaval[kk]] = dd ;
120
121 free(kaval) ; free(qaval) ; return ;
122 }
123
124 /*----------------------------------------------------------------------------*/
125
main(int argc,char * argv[])126 int main( int argc , char *argv[] )
127 {
128 THD_3dim_dataset *inset=NULL , *outset ;
129 int nvox , nvals , nopt=1 , ii,jj , nbad , ntr , convertize=1 , dopval=0 ;
130 char *prefix="NormTest" ;
131 float *avar , *dval , *atr ; double *eval ;
132
133 if( argc < 2 || strcasecmp(argv[1],"-help") == 0 ){
134 printf(
135 "Program: 3dNormalityTest\n"
136 "\n"
137 "* This program tests the input values at each voxel for normality,\n"
138 " using the Anderson-Darling method:\n"
139 " http://en.wikipedia.org/wiki/Anderson-Darling_test\n"
140 "\n"
141 "* Each voxel must have at least 5 values (sub-bricks).\n"
142 "\n"
143 "* The resulting dataset has the Anderson-Darling statistic converted\n"
144 " to an exponentially distributed variable, so it can be thresholded\n"
145 " with the AFNI slider and display a nominal p-value below. If you\n"
146 " want the A-D statistic un-converted, use the '-noexp' option.\n"
147 "\n"
148 "* Conversion of the A-D statistic to a p-value is done via simulation\n"
149 " of the null distribution.\n"
150 "\n"
151 "OPTIONS:\n"
152 "--------\n"
153 " -input dset = Specifies the input dataset.\n"
154 " Alternatively, the input dataset can be given as the\n"
155 " last argument on the command line, after all other\n"
156 " options.\n"
157 "\n"
158 " -prefix ppp = Specifies the name for the output dataset.\n"
159 "\n"
160 " -noexp = Do not convert the A-D statistic to an exponentially\n"
161 " distributed value -- just leave the raw A-D score in\n"
162 " the output dataset.\n"
163 " -pval = Output the results as a pure (estimated) p-value.\n"
164 "\n"
165 "EXAMPLES:\n"
166 "---------\n"
167 "(1) Simulate a 2D square dataset with the values being normal on one\n"
168 "edge and exponentially distributed on the other, and mixed in-between.\n"
169 "\n"
170 " 3dUndump -dimen 101 101 1 -prefix UUU\n"
171 " 3dcalc -datum float -a UUU+orig -b '1D: 0 0 0 0 0 0 0 0 0 0' -prefix NNN \\\n"
172 " -expr 'i*gran(0,1.4)+(100-i)*eran(4)'\n"
173 " rm -f UUU+orig.*\n"
174 " 3dNormalityTest -prefix Ntest -input NNN+orig\n"
175 " afni -com 'OPEN_WINDOW axialimage' Ntest+orig\n"
176 "\n"
177 "In the above script, the UUU+orig dataset is created just to provide a spatial\n"
178 "template for 3dcalc. The '1D: 0 ... 0' input to 3dcalc is a time template\n"
179 "to create a dataset with 10 time points. The values are random deviates,\n"
180 "ranging from pure Gaussian where i=100 to pure exponential at i=0.\n"
181 "\n"
182 "(2) Simulate a single logistic random variable into a 1D file and compute\n"
183 "the A-D nominal p-value:\n"
184 "\n"
185 " 1deval -num 200 -expr 'lran(2)' > logg.1D\n"
186 " 3dNormalityTest -input logg.1D\\' -prefix stdout: -pval\n"
187 "\n"
188 "Note the necessity to transpose the logg.1D file (with the \\' operator),\n"
189 "since 3D programs interpret each 1D file row as a voxel time series.\n"
190 "\n"
191 "++ March 2012 -- by The Ghost of Carl Friedrich Gauss\n"
192 ) ;
193 PRINT_COMPILE_DATE ; exit(0) ;
194 }
195
196 /*--- deal with options ---*/
197
198 nopt = 1 ;
199 while( nopt < argc && argv[nopt][0] == '-' ){
200
201 if( strcasecmp(argv[nopt],"-noexp") == 0 ){
202 convertize = 0 ; nopt++ ; continue ;
203 }
204 if( strcasecmp(argv[nopt],"-pval") == 0 ){
205 convertize = 1 ; dopval = 1 ; nopt++ ; continue ;
206 }
207
208 if( strcasecmp(argv[nopt],"-input") == 0 ){
209 if( inset != NULL ) ERROR_exit("Can't have 2 -input options!") ;
210 if( ++nopt >= argc ) ERROR_exit("Need an argument after -input!") ;
211 inset = THD_open_dataset(argv[nopt]) ;
212 CHECK_OPEN_ERROR(inset,argv[nopt]) ;
213 nopt++ ; continue ;
214 }
215
216 if( strcasecmp(argv[nopt],"-prefix") == 0 ){
217 if( ++nopt >= argc ) ERROR_exit("Need an argument after -prefix!") ;
218 prefix = strdup(argv[nopt]) ;
219 if( !THD_filename_ok(prefix) )
220 ERROR_exit("-prefix '%s' has illegal characters :-(",prefix) ;
221 nopt++ ; continue ;
222 }
223
224 ERROR_message("Unknown option: %s",argv[nopt]) ;
225 suggest_best_prog_option(argv[0], argv[nopt]);
226 exit(1);
227
228 } /* end of loop over options */
229
230 mainENTRY("3dNormalityTest main"); machdep(); AFNI_logger("3dNormalityTest",argc,argv);
231 PRINT_VERSION("3dNormalityTest"); AUTHOR("The Ghost of Gauss");
232
233 /*--- deal with input ---*/
234
235 if( inset == NULL ){
236 if( nopt < argc ){
237 inset = THD_open_dataset(argv[nopt]) ;
238 CHECK_OPEN_ERROR(inset,argv[nopt]) ;
239 } else {
240 ERROR_exit("No input dataset???") ;
241 }
242 }
243 nvals = DSET_NVALS(inset) ;
244 nvox = DSET_NVOX(inset) ;
245 if( nvals < 5 )
246 ERROR_exit("Input dataset '%s' has %d sub-bricks, less than 5 :-(",
247 argv[nopt] , nvals ) ;
248
249 EDIT_floatize_dataset(inset) ;
250 if( DSET_pure_type(inset) != MRI_float )
251 ERROR_exit("Couldn't load input dataset properly :-(") ;
252
253 /*--- create output dataset ---*/
254
255 outset = EDIT_empty_copy(inset) ;
256 EDIT_dset_items( outset ,
257 ADN_prefix , prefix ,
258 ADN_nvals , 1 ,
259 ADN_ntt , 0 ,
260 ADN_type , ISHEAD(inset) ? HEAD_FUNC_TYPE :
261 GEN_FUNC_TYPE ,
262 ADN_func_type , FUNC_BUCK_TYPE ,
263 ADN_none ) ;
264 EDIT_substitute_brick( outset , 0 , MRI_float , NULL ) ;
265 avar = DSET_BRICK_ARRAY(outset,0) ;
266 if( avar == NULL ) ERROR_exit("Can't create output dataset array?!") ;
267
268 dval = (float * )malloc(sizeof(float )*nvals) ;
269 eval = (double *)malloc(sizeof(double)*nvals) ;
270
271 INFO_message("Computing Anderson-Darling statistics for all voxels") ;
272
273 for( nbad=ii=0 ; ii < nvox ; ii++ ){
274 THD_extract_array( ii , inset , 0 , dval ) ;
275 for( jj=1 ; jj < nvals ; jj++ ) if( dval[jj] != dval[0] ) break ;
276 if( jj == nvals ){ avar[ii] = 0.0f ; nbad++ ; continue ; }
277 for( jj=0 ; jj < nvals ; jj++ ) eval[jj] = (double)dval[jj] ;
278 avar[ii] = anderson_darling_normal( nvals , eval ) ;
279 }
280
281 if( nbad > 0 )
282 ININFO_message("%d voxel%s were constant ==> skipped test there",
283 nbad , (nbad > 1) ? "s" : "\0" ) ;
284
285 DSET_delete(inset) ; free(dval) ; free(eval) ;
286
287 if( convertize ){
288 ntr = 9*nvox ;
289 if( ntr < 300000 ) ntr = 300000 ;
290 else if( ntr > 3000000 ) ntr = 3000000 ;
291
292 INFO_message("Simulating A-D null distribution: %d trials",ntr) ;
293
294 atr = anderson_darling_simulate( nvals , ntr ) ;
295 if( atr == NULL ) ERROR_exit("Simulation failed!?") ;
296 /** ININFO_message("range %g .. %g",atr[0],atr[ntr-1]) ; **/
297
298 if( dopval )
299 INFO_message("Converting A-D statistics to p-values") ;
300 else
301 INFO_message("Converting A-D statistics to exponential distribution") ;
302
303 anderson_darling_expify( nvox , avar , ntr , atr , dopval ) ;
304 free(atr) ;
305
306 EDIT_BRICK_TO_FIGT( outset , 0 , 1.0f , 1.0f ) ;
307 EDIT_BRICK_LABEL ( outset , 0 , "A-D_expify" ) ;
308 } else {
309 EDIT_BRICK_LABEL ( outset , 0 , "A-D_statistic" ) ;
310 }
311
312 DSET_write( outset ) ;
313 WROTE_DSET( outset ) ;
314 exit(0) ;
315 }
316