1 #include "mrilib.h"
2 
3 /*----------------------------------------------------------------------------*/
4 
cf_nor(double x)5 static double cf_nor( double x ){ return 1.0-0.5*erfc(x/1.414213562373095); }
6 
7 /*----------------------------------------------------------------------------*/
8 
anderson_darling_statistic(int npt,double * val,double (* cf)(double))9 double anderson_darling_statistic( int npt, double *val, double (*cf)(double) )
10 {
11    double *yyy , asum , ccc ; int ii ;
12 
13    if( npt < 5 || val == NULL || cf == NULL ) return 0.0 ;
14 
15    yyy = (double *)malloc(sizeof(double)*npt) ;
16    memcpy( yyy , val , sizeof(double)*npt ) ;
17    qsort_double( npt , yyy ) ;
18 
19    asum = 0.0 ;
20    for( ii=0 ; ii < npt ; ii++ ){
21      ccc   = cf(yyy[ii]) ;
22      if( ccc > 0.0 && ccc < 1.0 )
23        asum += (2*ii+1) * log(ccc) + (2*(npt-ii)-1) * log(1.0-ccc) ;
24    }
25 
26    free(yyy) ; asum = -npt - asum / npt ; return asum ;
27 }
28 
29 /*----------------------------------------------------------------------------*/
30 
anderson_darling_normal(int npt,double * xxx)31 double anderson_darling_normal( int npt , double *xxx )
32 {
33    double *vvv , xm , xq , ad ; int ii ;
34 
35    if( npt < 5 || xxx == NULL ) return 0.0 ;
36 
37    vvv = (double *)malloc(sizeof(double)*npt) ;
38    memcpy( vvv , xxx , sizeof(double)*npt ) ;
39 
40    xm = 0.0 ;
41    for( ii=0 ; ii < npt ; ii++ ) xm += vvv[ii] ;
42 
43    xm /= npt ; xq = 0.0 ;
44    for( ii=0 ; ii < npt ; ii++ ) xq += (vvv[ii]-xm)*(vvv[ii]-xm) ;
45    if( xq <= 0.0 ){ free(vvv) ; return 0.0 ; }
46 
47    xq = sqrt( (npt-1.0) / xq ) ;
48    for( ii=0 ; ii < npt ; ii++ ) vvv[ii] = (vvv[ii]-xm) * xq ;
49 
50    ad = anderson_darling_statistic( npt , vvv , cf_nor ) ;
51    ad *= ( 1.0 + 4.0/npt - 25.0/(npt*npt) ) ;
52    free(vvv) ; return ad ;
53 }
54 
55 /*----------------------------------------------------------------------------*/
56 
57 #include "zgaussian.c"
58 
anderson_darling_simulate(int npt,int ntrial)59 float * anderson_darling_simulate( int npt , int ntrial )
60 {
61    float *ad ; double *xxx ; int ii , jj ;
62 
63    if( npt < 5 || ntrial <= 0 ) return NULL ;
64 
65    ad  = (float * )malloc(sizeof(float)*ntrial) ;
66    xxx = (double *)malloc(sizeof(double)*npt) ;
67    for( jj=0 ; jj < ntrial ; jj++ ){
68      for( ii=0 ; ii < npt ; ii++ )
69        xxx[ii] = (double)(zgaussian()+zgaussian()+zgaussian()+1.0f) ;
70      ad[jj] = - (float)anderson_darling_normal( npt , xxx ) ;
71    }
72    qsort_float( ntrial , ad ) ;
73    for( jj=0 ; jj < ntrial ; jj++ ) ad[jj] = -ad[jj] ;
74    return ad ;
75 }
76 
77 /*----------------------------------------------------------------------------*/
78 
anderson_darling_expify(int naval,float * aval,int natr,float * atr,int dopval)79 void anderson_darling_expify( int naval, float *aval, int natr, float *atr, int dopval )
80 {
81    float *qaval , avv , pfac,pval,dd ; int *kaval , jj,kk ;
82 
83    if( naval <= 0 || aval == NULL || natr < 100 || atr == NULL ) return ;
84 
85    qaval = (float *)malloc(sizeof(float)*naval) ;
86    kaval = (int *  )malloc(sizeof(int)  *naval) ;
87    for( kk=0 ; kk < naval ; kk++ ){ qaval[kk] = -aval[kk]; kaval[kk] = kk; }
88 
89    qsort_floatint( naval , qaval , kaval ) ;
90    for( kk=0 ; kk < naval ; kk++ ) qaval[kk] = -qaval[kk];
91 
92    pfac = 1.0f / (1.0f+natr) ;
93 
94    /* process aval[] entries larger than the largest atr value */
95 
96    for( kk=0 ; kk < naval ; kk++ ){
97      if( qaval[kk] >= atr[0] ){
98        pval = pfac * atr[0] / qaval[kk] ;         /* make up a small p-value */
99        aval[kaval[kk]] = (dopval) ? pval : -logf(pval) ;   /* convert to exp */
100      } else {
101        break ;
102      }
103    }
104 
105    /* process the remaining aval[] entries, starting at aval[kk] */
106 
107    for( jj=0 ; kk < naval ; kk++ ){
108      avv = qaval[kk] ;
109      for( ; jj < natr && avv <= atr[jj] ; jj++ ) ; /* get bounding atr[] vals */
110      if( jj == natr ) break ;
111      pval = jj ; dd = atr[jj] - atr[jj-1] ;
112      if( dd != 0.0f ) pval += (atr[jj-1]-avv) / dd ;    /* interpolate a pval */
113      pval *= pfac ;
114      aval[kaval[kk]] = (dopval) ? pval : -logf(pval) ;      /* convert to exp */
115    }
116    /** ININFO_message("kk = %d/%d after middle step",kk,naval) ; **/
117 
118    dd = -logf(1.0f-pfac) ;
119    for( ; kk < naval ; kk++ ) aval[kaval[kk]] = dd ;
120 
121    free(kaval) ; free(qaval) ; return ;
122 }
123 
124 /*----------------------------------------------------------------------------*/
125 
main(int argc,char * argv[])126 int main( int argc , char *argv[] )
127 {
128    THD_3dim_dataset *inset=NULL , *outset ;
129    int nvox , nvals , nopt=1 , ii,jj , nbad , ntr , convertize=1 , dopval=0 ;
130    char *prefix="NormTest" ;
131    float *avar , *dval , *atr ; double *eval ;
132 
133    if( argc < 2 || strcasecmp(argv[1],"-help") == 0 ){
134      printf(
135        "Program: 3dNormalityTest\n"
136        "\n"
137        "* This program tests the input values at each voxel for normality,\n"
138        "  using the Anderson-Darling method:\n"
139        "    http://en.wikipedia.org/wiki/Anderson-Darling_test\n"
140        "\n"
141        "* Each voxel must have at least 5 values (sub-bricks).\n"
142        "\n"
143        "* The resulting dataset has the Anderson-Darling statistic converted\n"
144        "  to an exponentially distributed variable, so it can be thresholded\n"
145        "  with the AFNI slider and display a nominal p-value below.  If you\n"
146        "  want the A-D statistic un-converted, use the '-noexp' option.\n"
147        "\n"
148        "* Conversion of the A-D statistic to a p-value is done via simulation\n"
149        "  of the null distribution.\n"
150        "\n"
151        "OPTIONS:\n"
152        "--------\n"
153        " -input dset  = Specifies the input dataset.\n"
154        "                Alternatively, the input dataset can be given as the\n"
155        "                last argument on the command line, after all other\n"
156        "                options.\n"
157        "\n"
158        " -prefix ppp  = Specifies the name for the output dataset.\n"
159        "\n"
160        " -noexp       = Do not convert the A-D statistic to an exponentially\n"
161        "                distributed value -- just leave the raw A-D score in\n"
162        "                the output dataset.\n"
163        " -pval        = Output the results as a pure (estimated) p-value.\n"
164        "\n"
165        "EXAMPLES:\n"
166        "---------\n"
167        "(1) Simulate a 2D square dataset with the values being normal on one\n"
168        "edge and exponentially distributed on the other, and mixed in-between.\n"
169        "\n"
170        "  3dUndump -dimen 101 101 1 -prefix UUU\n"
171        "  3dcalc -datum float -a UUU+orig -b '1D: 0 0 0 0 0 0 0 0 0 0' -prefix NNN \\\n"
172        "         -expr 'i*gran(0,1.4)+(100-i)*eran(4)'\n"
173        "  rm -f UUU+orig.*\n"
174        "  3dNormalityTest -prefix Ntest -input NNN+orig\n"
175        "  afni -com 'OPEN_WINDOW axialimage' Ntest+orig\n"
176        "\n"
177        "In the above script, the UUU+orig dataset is created just to provide a spatial\n"
178        "template for 3dcalc.  The '1D: 0 ... 0' input to 3dcalc is a time template\n"
179        "to create a dataset with 10 time points.  The values are random deviates,\n"
180        "ranging from pure Gaussian where i=100 to pure exponential at i=0.\n"
181        "\n"
182        "(2) Simulate a single logistic random variable into a 1D file and compute\n"
183        "the A-D nominal p-value:\n"
184        "\n"
185        "  1deval -num 200 -expr 'lran(2)' > logg.1D\n"
186        "  3dNormalityTest -input logg.1D\\' -prefix stdout: -pval\n"
187        "\n"
188        "Note the necessity to transpose the logg.1D file (with the \\' operator),\n"
189        "since 3D programs interpret each 1D file row as a voxel time series.\n"
190        "\n"
191        "++ March 2012 -- by The Ghost of Carl Friedrich Gauss\n"
192      ) ;
193      PRINT_COMPILE_DATE ; exit(0) ;
194    }
195 
196    /*--- deal with options ---*/
197 
198    nopt = 1 ;
199    while( nopt < argc && argv[nopt][0] == '-' ){
200 
201      if( strcasecmp(argv[nopt],"-noexp") == 0 ){
202        convertize = 0 ; nopt++ ; continue ;
203      }
204      if( strcasecmp(argv[nopt],"-pval") == 0 ){
205        convertize = 1 ; dopval = 1 ; nopt++ ; continue ;
206      }
207 
208      if( strcasecmp(argv[nopt],"-input") == 0 ){
209        if( inset != NULL ) ERROR_exit("Can't have 2 -input options!") ;
210        if( ++nopt >= argc ) ERROR_exit("Need an argument after -input!") ;
211        inset = THD_open_dataset(argv[nopt]) ;
212        CHECK_OPEN_ERROR(inset,argv[nopt]) ;
213        nopt++ ; continue ;
214      }
215 
216      if( strcasecmp(argv[nopt],"-prefix") == 0 ){
217        if( ++nopt >= argc ) ERROR_exit("Need an argument after -prefix!") ;
218        prefix = strdup(argv[nopt]) ;
219        if( !THD_filename_ok(prefix) )
220          ERROR_exit("-prefix '%s' has illegal characters :-(",prefix) ;
221        nopt++ ; continue ;
222      }
223 
224      ERROR_message("Unknown option: %s",argv[nopt]) ;
225      suggest_best_prog_option(argv[0], argv[nopt]);
226      exit(1);
227 
228    } /* end of loop over options */
229 
230    mainENTRY("3dNormalityTest main"); machdep(); AFNI_logger("3dNormalityTest",argc,argv);
231    PRINT_VERSION("3dNormalityTest"); AUTHOR("The Ghost of Gauss");
232 
233    /*--- deal with input ---*/
234 
235    if( inset == NULL ){
236      if( nopt < argc ){
237        inset = THD_open_dataset(argv[nopt]) ;
238        CHECK_OPEN_ERROR(inset,argv[nopt]) ;
239      } else {
240        ERROR_exit("No input dataset???") ;
241      }
242    }
243    nvals = DSET_NVALS(inset) ;
244    nvox  = DSET_NVOX(inset) ;
245    if( nvals < 5 )
246      ERROR_exit("Input dataset '%s' has %d sub-bricks, less than 5 :-(",
247                     argv[nopt] , nvals ) ;
248 
249    EDIT_floatize_dataset(inset) ;
250    if( DSET_pure_type(inset) != MRI_float )
251      ERROR_exit("Couldn't load input dataset properly :-(") ;
252 
253    /*--- create output dataset ---*/
254 
255    outset = EDIT_empty_copy(inset) ;
256    EDIT_dset_items( outset ,
257                       ADN_prefix , prefix ,
258                       ADN_nvals  , 1 ,
259                       ADN_ntt    , 0 ,
260                       ADN_type      , ISHEAD(inset) ? HEAD_FUNC_TYPE :
261                                                       GEN_FUNC_TYPE ,
262                       ADN_func_type , FUNC_BUCK_TYPE ,
263                     ADN_none ) ;
264    EDIT_substitute_brick( outset , 0 , MRI_float , NULL ) ;
265    avar = DSET_BRICK_ARRAY(outset,0) ;
266    if( avar == NULL ) ERROR_exit("Can't create output dataset array?!") ;
267 
268    dval = (float * )malloc(sizeof(float )*nvals) ;
269    eval = (double *)malloc(sizeof(double)*nvals) ;
270 
271    INFO_message("Computing Anderson-Darling statistics for all voxels") ;
272 
273    for( nbad=ii=0 ; ii < nvox ; ii++ ){
274      THD_extract_array( ii , inset , 0 , dval ) ;
275      for( jj=1 ; jj < nvals ; jj++ ) if( dval[jj] != dval[0] ) break ;
276      if( jj == nvals ){ avar[ii] = 0.0f ; nbad++ ; continue ; }
277      for( jj=0 ; jj < nvals ; jj++ ) eval[jj] = (double)dval[jj] ;
278      avar[ii] = anderson_darling_normal( nvals , eval ) ;
279    }
280 
281    if( nbad > 0 )
282      ININFO_message("%d voxel%s were constant ==> skipped test there",
283                     nbad , (nbad > 1) ? "s" : "\0" ) ;
284 
285    DSET_delete(inset) ; free(dval) ; free(eval) ;
286 
287    if( convertize ){
288      ntr = 9*nvox ;
289           if( ntr <  300000 ) ntr =  300000 ;
290      else if( ntr > 3000000 ) ntr = 3000000 ;
291 
292      INFO_message("Simulating A-D null distribution: %d trials",ntr) ;
293 
294      atr = anderson_darling_simulate( nvals , ntr ) ;
295      if( atr == NULL ) ERROR_exit("Simulation failed!?") ;
296      /** ININFO_message("range %g .. %g",atr[0],atr[ntr-1]) ; **/
297 
298      if( dopval )
299        INFO_message("Converting A-D statistics to p-values") ;
300      else
301        INFO_message("Converting A-D statistics to exponential distribution") ;
302 
303      anderson_darling_expify( nvox , avar , ntr , atr , dopval ) ;
304      free(atr) ;
305 
306      EDIT_BRICK_TO_FIGT( outset , 0 , 1.0f , 1.0f ) ;
307      EDIT_BRICK_LABEL  ( outset , 0 , "A-D_expify" ) ;
308    } else {
309      EDIT_BRICK_LABEL  ( outset , 0 , "A-D_statistic" ) ;
310    }
311 
312    DSET_write( outset ) ;
313    WROTE_DSET( outset ) ;
314    exit(0) ;
315 }
316