1 #include "mrilib.h"
2 
3 /*------------------------------------------------------------
4   Set the one-sided tail probability at which we will cutoff
5   the unusuality test.
6 --------------------------------------------------------------*/
7 
8 static float zstar = 0.0 ;            /* the actual cutoff */
9 static float pstar = 0.0 ;            /* tail probability  */
10 
set_unusuality_tail(float p)11 void set_unusuality_tail( float p )
12 {
13    if( p > 0.0 && p < 1.0 ){
14       zstar = qginv(p) ;
15       pstar = p ;
16    }
17    return ;
18 }
19 
20 /*------------------------------------------------------------
21   Inputs: rr[0..nr-1] = array of correlation coefficients.
22 --------------------------------------------------------------*/
23 
24 #undef TANHALL
25 
unusuality(int nr,float * rr)26 float unusuality( int nr , float * rr )
27 {
28    int ii , nzero , mzero ;
29    float * zz , * aa ;
30    float zmid,zsig,zmed, uval, fac, zrat, ff,fp, ss,ds,pp,ee , sigma ;
31 #ifndef TANHALL
32    float rmid , rcut ;
33 #endif
34 
35    if( nr < 1000 || rr == NULL ) return 0.0 ;
36 
37    /*-- make workspace --*/
38 
39    zz = (float *) malloc(sizeof(float)*nr*2) ; aa = zz + nr ;
40 
41    if( zstar <= 0.0 ){
42       char * cp = getenv("PTAIL") ;
43       float pp = 0.01 ;
44       if( cp != NULL ){
45          float xx = strtod( cp , NULL ) ;
46          if( xx > 0.0 && xx < 1.0 ) pp = xx ;
47       }
48       set_unusuality_tail( pp ) ;
49    }
50 
51    /*-- copy data into workspace, converting to atanh --*/
52 
53    memcpy( zz , rr , sizeof(float)*nr ) ;
54    qsort_float( nr , zz ) ;                           /* sort now */
55 
56    /*- trim off 1's (perfect correlations) -*/
57 
58    for( ii=nr-1 ; ii > 0 && zz[ii] > 0.999 ; ii-- ) ; /* nada */
59    if( ii == 0 ){ free(zz) ; return 0.0 ; }           /* shouldn't happen */
60    nr = ii+1 ;                                        /* the trim */
61 
62 #ifdef TANHALL
63    for( ii=0 ; ii < nr ; ii++ ) zz[ii] = atanh(rr[ii]) ;
64 #endif
65 
66    /*-- find median of zz [brute force sort] --*/
67 
68    if( nr%2 == 1 )              /* median */
69       zmid = zz[nr/2] ;
70    else
71       zmid = 0.5 * ( zz[nr/2] + zz[nr/2-1] ) ;
72 
73 #ifdef TANHALL
74    for( ii=0 ; ii < nr ; ii++ ) aa[ii] = fabs(zz[ii]-zmid) ;
75 #else
76    rmid = zmid ; zmid = atanh(zmid) ;
77    for( ii=0 ; ii < nr ; ii++ )
78       aa[ii] = fabs( (zz[ii]-rmid)/(1.0-zz[ii]*rmid) ) ;
79 #endif
80 
81    /*-- find MAD of zz --*/
82 
83    qsort_float( nr , aa ) ;
84    if( nr%2 == 1 )              /* MAD = median absolute deviation */
85       zmed = aa[nr/2] ;
86    else
87       zmed = 0.5 * ( aa[nr/2] + aa[nr/2-1] ) ;
88 
89 #ifndef TANHALL
90    zmed = atanh(zmed) ;
91 #endif
92 
93    zsig = 1.4826 * zmed ;           /* estimate standard deviation of zz */
94                                     /* 1/1.4826 = sqrt(2)*erfinv(0.5)    */
95 
96    if( zsig <= 0.0 ){               /* should not happen */
97       free(zz) ; return 0.0 ;
98    }
99 
100    /*-- normalize zz (is already sorted) --*/
101    /*-- then, find values >= zstar       --*/
102 
103    fac = 1.0 / zsig ;
104 #ifdef TANHALL
105    for( ii=0 ; ii < nr ; ii++ ) zz[ii] = fac * ( zz[ii] - zmid ) ;
106    for( ii=nr-1 ; ii > 0 ; ii-- ) if( zz[ii] < zstar ) break ;
107    nzero = ii+1 ; mzero = nr - nzero ;
108 #else
109    rcut = tanh( zsig * zstar + zmid ) ;
110    for( ii=nr-1 ; ii > 0 ; ii-- ){
111       if( zz[ii] < rcut ) break ;
112       else                zz[ii] = fac * ( atanh(zz[ii]) - zmid ) ;
113    }
114    nzero = ii+1 ; mzero = nr - nzero ;
115 
116 #if 0
117    fprintf(stderr,"uuu: nr=%d rcut=%g mzero=%d\n",nr,rcut,mzero) ;
118 #endif
119 #endif
120 
121    if( nzero < 2 || mzero < MAX(1.0,pstar*nr) ){          /* too weird */
122       free(zz) ; return 0.0 ;
123    }
124 
125    /*-- compute sigma-tilde squared --*/
126 
127    zsig = 0.0 ;
128    for( ii=nzero ; ii < nr ; ii++ ) zsig += zz[ii]*zz[ii] ;
129    zsig = zsig / mzero ;
130 
131    /*-- set up to compute f(s) --*/
132 
133 #define SQRT_2PI 2.5066283
134 
135    zrat = zstar*zstar / zsig ;
136    fac  = ( zrat * nzero ) / ( SQRT_2PI * mzero ) ;
137 
138    ss   = zstar ;          /* initial guess for s = zstar/sigma */
139 
140    /*-- Newton's method [almost] --*/
141 
142 #undef  PHI
143 #define PHI(s) (1.0-0.5*normal_t2p(ss))           /* N(0,1) cdf */
144 
145    for( ii=0 ; ii < 5 ; ii++ ){
146       pp = PHI(ss) ;                              /* Phi(ss) \approx 1 */
147       ee = exp(-0.5*ss*ss) ;
148 
149       ff = ss*ss - (fac/pp) * ss * ee - zrat ;    /* f(s) */
150 
151       fp = 2.0*ss + (fac/pp) * ee * (ss*ss-1.0) ; /* f'(s) */
152 
153       ds = ff / fp ;                              /* Newton step */
154 
155 #if 0
156       fprintf(stderr,"Newton: ss=%g ds=%g ff=%g fp=%g pp=%g\n",ss,ds,ff,fp,pp) ;
157 #endif
158 
159       ss -= ds ;                                  /* update */
160    }
161 
162    sigma = zstar / ss ;                           /* actual estimate of sigma */
163                                                   /* from the upper tail data */
164 
165    if( sigma <= 1.0 ){                            /* the boring case */
166       free(zz) ; return 0.0 ;
167    }
168 
169    /*-- compute the log-likelihood difference next --*/
170 
171    uval =  nzero * log( PHI(ss)/(1.0-pstar) )
172          - mzero * ( log(sigma) + 0.5 * zsig * (1.0/(sigma*sigma)-1.0) ) ;
173 
174    /*-- done! --*/
175 
176    free(zz) ; return uval ;
177 }
178