1 /**********************************************************
2      Chi-Square Probability Function and Kolmogorv-Smirnov
3 **********************************************************/
4 
5 #include <iostream>
6 #include <cstdio>
7 #include <cmath>
8 #include "util.h"
9 #if __GNUC__ > 3
10  #include <string.h>
11 #endif
12 
13 #ifdef SPRNG_MPI
14 #include <mpi.h>
15 #endif
16 
17 using namespace std;
18 
19 
20 #define ITMAX 10000000        /* maximum allowed number of iterations */
21 #define EPS 3.0e-7            /* relative accuracy                    */
22 #define FPMIN 1.0e-30         /* number near the smallest             */
23                               /* representable floating-point number  */
24 
25 #ifndef max
26 #define max(a,b) (a<b?b:a)
27 #endif
28 #ifndef min
29 #define min(a,b) (a>b?b:a)
30 #endif
31 
32 const char *errorMessage;
33 long degrees_of_freedom=1;
34 long KS_n = 1;
35 
36 
37 void set_d_of_f (long df);
38 double chiF (double chiSq);
39 double chisquare (long *actual,double *probability,long n,long k, int *nb);
40 double chipercent (double chiSq, long fr);
41 double gammp (double a, double x);
42 double KSpercent (double value, long n);
43 double KS (double *V, long n, double (*F)(double));
44 
45 void  gser (double *gamser, double a, double x, double *gln);
46 void  gcf (double *gammcf, double a, double x, double *gln);
47 double gammln (double xx);
48 
49 void  chiGammaClearErrMess (void);
50 const char  *chiGammaReadErrMess (void);
51 void  chiGammaFlagError (const char *errorText);
52 
53 double cum_normal(double sample, double mean, double stddev);
54 void set_normal_params(double mu, double sd);
55 double normalF(double x);
56 
57 void mean_sd(double *x, int n, double *mean, double *sd);
58 
59 
KS(double * X,long n,double (* F)(double))60 double KS(double *X, long n, double (*F)(double) )
61 {
62   double *a, *b, Y, rminus, rplus, value;
63   long *c, m, k, j;
64 
65   m = n+1;
66 
67   a = new double[m+1];
68   b = new double[m+1];
69   c = new long[m+1];
70 
71   memset(c,0,(m+1)*sizeof(long));
72   for(k=0; k<m+1; k++)
73   {
74     a[k] = 1.0;
75     b[k] = 0.0;
76   }
77 
78   for(j=0; j<n; j++)
79   {
80     Y = F(X[j]);
81 
82     k = static_cast<long int>(m*Y);
83     a[k] = min(a[k],Y);
84     b[k] = max(b[k],Y);
85     c[k]++;
86   }
87 
88   for(k=j=0, rplus=rminus=0.0; k<m; k++)
89   {
90     if(c[k] <= 0)
91       continue;
92 
93     rminus = max(rminus,a[k] - (double) j/n);
94     j += c[k];
95     rplus = max(rplus,(double) j/n - b[k]);
96   }
97 
98   value = sqrt((double) n)*max(rplus,rminus);
99 #ifdef ONE_SIDED
100   value = sqrt((double) n)*rplus;
101 #endif
102 
103   delete [] a;
104   delete [] b;
105   delete [] c;
106 
107   return value;
108 }
109 
set_KS_n(long n)110 void set_KS_n(long n)
111 {
112   KS_n = n;
113 }
114 
KSF(double value)115 double KSF(double value)
116 {
117   return KSpercent(value, KS_n);
118 }
119 
120 #ifndef ONE_SIDED
KSpercent(double value,long n)121 double KSpercent(double value, long n)
122 {
123   int i;
124   double mult, temp, term, previous_term, percent;
125 
126   value /= sqrt((double) n);
127 
128   value *= 0.12 + sqrt((double) n) + 0.11/sqrt((double) n);
129 
130   mult = 2.0;
131   percent = 0.0;
132   temp = -2.0*value*value;
133   previous_term = 0.0;
134 
135   for(i=1; i<150; i++)
136   {
137     term = mult*exp(temp*i*i);
138     percent += term;
139     if( fabs(term)<0.005*fabs(previous_term) || fabs(term)<0.0000001*percent)
140       return 1.0 - percent;
141 
142     mult = -mult;
143     previous_term = term;
144   }
145 
146   fprintf(stderr,"WARNING: KSpercent failed to converge\n");
147 
148   return max(percent,0.0);
149 }
150 #endif
151 
152 
153 #ifdef ONE_SIDED
KSpercent(double value,long n)154 double KSpercent(double value, long n)
155 {
156   int Nmax;
157 
158   value *= sqrt((double) n);
159 
160   if(value <= 0.0)
161     return 0.0;
162 
163   Nmax = 30;			/* between 30 and 50 */
164 
165   if(n > Nmax)
166   {
167     double yp, p;
168 
169     yp = value + 1.0/6.0/sqrt((double) n);
170     p = 1 - exp(-2.0*yp*yp);
171     return 1.0 - 2.0*(1.0-p);
172     /*    return p;*/
173   }
174   else
175   {
176     static double K[16][9] = {{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0},
177 		       {0.0,0.01,0.05,0.25,0.5,0.75,0.95,0.99,1.001},
178 		       {0.0,0.014,0.06749,0.2929,0.5176,0.7071,1.098,1.2728,
179 			  1.415},
180 		       {0.0,0.01699,0.07919,0.3112,0.5147,0.7539,1.1017,1.3589,
181 			  1.733},
182 		       {0.0,0.01943,0.08789,0.3202,0.5110,0.7642,1.1304,1.3777,
183 			  2.01},
184 		       {0.0,0.02152,0.09471,0.3249,0.5245,0.7674,1.1392,1.4024,
185 			  2.237},
186 		       {0.0,0.02336,0.1002,0.3272,0.5319,0.7703,1.1463,1.4144,
187 			  2.45},
188 		       {0.0,0.02501,0.1048,0.328,0.5364,0.7755,1.1537,1.4246,
189 			  2.647},
190 		       {0.0,0.0265,0.1086,0.328,0.5392,0.7797,1.1586,1.4327,
191 			  2.829},
192 		       {0.0,0.02786,0.1119,0.3274,0.5411,0.7825,1.1624,1.4388,
193 			  3.01},
194 		       {0.0,0.02912,0.1147,0.3297,0.5426,0.7845,1.1658,1.444,
195 			  3.163},
196 		       {0.0,0.03028,0.1172,0.333,0.5439,0.7863,1.1688,1.4484,
197 			  3.318},
198 		       {0.0,0.03137,0.1193,0.3357,0.5453,0.788,1.1714,1.4521,
199 			  3.465},
200 		       {0.0,0.03424,0.1244,0.3412,0.55,0.7926,1.1773,1.4606,
201 			  3.874},
202 		       {0.0,0.03807,0.1298,0.3461,0.5547,0.7975,1.1839,1.4698,
203 			  4.473},
204 		       {0.0,0.04354,0.1351,0.3509,0.5605,0.8036,1.1916,1.4801,
205 			  5.478}};
206     static int index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,15,20,30};
207     static double p[9] = {0.0,0.01,0.05,0.25,0.5,0.75,0.95,0.99, 1.0};
208     int i, j;
209     double percent1, percent2, dist1, dist2, percentage;
210 
211     i = 1;
212     while(index[i] < n)		/* look up correct row for 'n' in table */
213       i++;
214 
215     j = 1;
216     while(K[i][j] < value)	/* look up percentage in table */
217       j++;
218 
219     dist1 = K[i][j]-K[i][j-1];
220     dist2 = K[i][j]-value;
221     percent1 = p[j]*(dist1-dist2)/dist1 + p[j-1]*dist2/dist1;
222 
223     if(index[i]==n)
224       return percent1;
225     else
226     {
227       j = 1;
228       while(K[i-1][j] < value)	/* look up percentage in table */
229 	j++;
230 
231       dist1 = K[i-1][j]-K[i-1][j-1];
232       dist2 = K[i-1][j]-value;
233       percent2 = p[j]*(dist1-dist2)/dist1 + p[j-1]*dist2/dist1;
234 
235       dist1 = (double) (index[i]-index[i-1]);
236       dist2 = (double) (index[i]-n);
237       percentage = percent1*(dist1-dist2)/dist1+ percent2*dist2/dist1;
238       /*      return percentage;*/
239       return 1.0-2.0*(1.0-percentage);
240     }
241   }
242 }
243 #endif
244 
chisquare(long * actual,double * probability,long n,long k,int * nb)245 double chisquare(long *actual, double *probability, long n, long k, int *nb)
246 {
247   double V, prob;
248   long i, n_actual, nbins;
249 
250   V = 0.0;
251   for(i=n_actual=nbins=0, prob=0.0; i<k; i++)
252   {
253     n_actual += actual[i];
254     prob += probability[i];
255     /*printf("\t %ld. %ld %ld %f %f %ld %ld\n", i, n_actual, actual[i], prob, probability[i], n, nbins);
256       ch = getchar();*/
257 
258     if(n*prob >=5 || i==k-1)
259     {
260       V += (n_actual-n*prob)*(n_actual-n*prob)/
261 	(n*prob);
262       nbins++;
263       n_actual = 0;
264       prob = 0.0;
265     }
266   }
267 
268   *nb = nbins;
269 
270   if(nbins == 1)
271     fprintf(stderr,"WARNING: Only one bin was used in Chisquare\n");
272   else if(nbins==0)
273     fprintf(stderr,"WARNING: No bin was used in Chisquare => n was <= 5.\n");
274   else if(nbins != k)
275     fprintf(stderr,"WARNING: The effective number of bins in Chisquare: %ld, was less than the given number of bins: %ld, due to the effect of combining bins in order to make the expected number of hits per bin sufficiently large.\n", nbins, k);
276 
277   return V;
278 }
279 
set_d_of_f(long df)280 void set_d_of_f(long df)
281 {
282   degrees_of_freedom = df;
283 }
284 
285 
chiF(double chiSq)286 double chiF(double chiSq) /* returns the chi-square prob.   */
287 {
288   return chipercent(chiSq,degrees_of_freedom);
289 }
290 
291 
chipercent(double chiSq,long fr)292 double chipercent(double chiSq, long fr) /* returns the chi-square prob.   */
293                                            /* function with chi-square chiSq */
294 {                                          /* & degrees of freedom fr        */
295   return (gammp(fr/2.0, chiSq/2.0));
296 }
297 
298 
gammp(double a,double x)299 double gammp(double a, double x)           /* returns the incomplete gamma  */
300                                            /* function P(a,x)      */
301 {
302 
303   double gamser, gammcf, gln;
304 
305   if (x < 0.0 || a <= 0.0)
306   {
307     chiGammaFlagError("Invalid arguments in routine gammp");
308     return (0.0);
309   }
310 
311   if (x < (a + 1.0))
312   {                   /* use the series representation */
313     gser(&gamser,a,x,&gln);
314     return (gamser);
315   }
316   else
317   {                               /* use the continued fraction    */
318     gcf(&gammcf,a,x,&gln);            /* representation */
319     return (1.0 - gammcf);            /* and take its complement       */
320   }
321 }
322 
gser(double * gamser,double a,double x,double * gln)323 void gser(double *gamser, double a, double x, /* Returns the incomplete gamma*/
324           double *gln)                    /* function P(a,x) evaluated by  */
325 {                                         /* its series representation as  */
326                                           /* gamser.  Also returns         */
327                                           /* ln(gamma(a)) as gln.          */
328   int n;
329   double sum, del, ap;
330 
331   *gln=gammln(a);
332   if (x <= 0.0)
333   {
334     if (x < 0.0)
335       chiGammaFlagError("x less than 0 in routine gser");
336     *gamser = 0.0;
337     return;
338   }
339   else
340   {
341     ap = a;
342     del = sum = 1.0/a;
343     for (n=1; n<=ITMAX; n++)
344     {
345       ++ap;
346       del *= x/ap;
347       sum += del;
348       if (fabs(del) < fabs(sum)*EPS)
349       {
350 	*gamser = sum * exp(-x + a * log(x) - (*gln));
351 	return;
352       }
353     }
354     chiGammaFlagError("a too large, ITMAX too small in routine gser");
355     return;
356   }
357 }
358 
gcf(double * gammcf,double a,double x,double * gln)359 void gcf(double *gammcf, double a, double x, /* Returns the complement  */
360          double *gln)                        /* imcomplete gamma function  */
361 {                                            /* Q(a,x) evaluated by its  */
362                                              /* continued fraction rep. as  */
363                                              /* gammcf.  Also returns  */
364                                              /* ln(gamma(a)) as gln   */
365   int i;
366   double an, b, c, d, del, h;
367 
368   *gln = gammln(a);
369 
370   b = x + 1.0 - a;                   /* set up for evaluating   */
371   c = 1.0 / FPMIN;                   /* continued fraction by modified*/
372   d = 1.0 / b;                       /* Lentz' method with b0=0       */
373   h = d;
374 
375   for (i=1;i<=ITMAX;i++)
376   {            /* iterate to convergence        */
377     an = -i * (i - a);
378     b += 2.0;
379     d = an * d + b;
380     if (fabs(d) < FPMIN) d = FPMIN;
381     c = b + an / c;
382     if (fabs(c) < FPMIN) c = FPMIN;
383     d = 1.0 / d;
384     del = d * c;
385     h *= del;
386     if (fabs(del-1.0) < EPS)
387       break;
388   }
389 
390   if (i > ITMAX)
391   {
392     chiGammaFlagError("a too large, ITMAX too small in gcf");
393     return;
394   }
395 
396   *gammcf = exp(-x + a * log(x) - (*gln)) * h; /* put factors in front  */
397 }
398 
399 
gammln(double xx)400 double gammln(double xx)                          /* returns the value       */
401 {                                                 /* ln(gamma(xx)) for xx > 0*/
402   double x, y, tmp, ser;
403   static double cof[6] = { 76.18009172947146,
404                              -86.50532032941677,
405 			     24.01409824083091,
406 			     -1.231739572450155,
407 			     0.1208650973866179e-2,
408 			     -0.5395239384953e-5};
409   int j;
410 
411   y = x = xx;
412   tmp = x + 5.5;
413   tmp -= (x + 0.5) * log(tmp);
414   ser = 1.000000000190015;
415 
416   for (j=0; j<=5; j++)
417     ser += cof[j]/++y;
418 
419   return (-tmp + log(2.5066282746310005 * ser / x));
420 }
421 
422 void chiGammaClearErrMess(void)  /* clears error message buffer   */
423 {
424   errorMessage = NULL;
425 }
426 
427 const char *chiGammaReadErrMess(void)  /* returns the error message     */
428 {                                /* from the buffer               */
429   return (errorMessage);
430 }
431 
432 void chiGammaFlagError(const char *errorText) /* writes to the error message   */
433 {                                       /* buffer                        */
434   errorMessage = errorText;
435 }
436 
437 #if 0
438 int main(int argc, char *argv[])
439 {
440 
441   double p[1000000], value;
442   long obs[1000000], n;
443   double X[10];
444   int i, nb;
445 
446    set_d_of_f(99999);
447 
448   /*printf("\nChi square probability for V = %f, degrees of freedom = %d is: %f\n\n", 34.76,50,chiF(34.76));*/
449 
450   /*for(i=0; i<1000000; i++)
451     {
452     p[i] = (double) 1/1000000;
453     obs[i] = 10;
454     }*/
455 
456     p[0] = 0.2;
457   p[1] = 0.5;
458   p[2] = 0.3;
459   obs[0] = 30;
460   obs[1] = 30;
461   obs[2] = 40;
462 
463   /*printf("\n chisquare value = %f\n\n", chisquare(obs,p,10000000L,1000000L, &nb));*/
464   printf("\n chisquare value = %f, nbins = %d\n\n", chisquare(obs,p,100,3, &nb), nb); /*** wrong values of nb may be obtained because function argument evaluation order is not defined. Beware! ***/
465   /*value = atof(argv[1]);
466   n = atoi(argv[2]);
467 
468   printf("\nd = %f,KS = %f\n\n", value, KSpercent(value,n));*/
469 
470   set_d_of_f(1000);
471   /*X[0] = 10.0;
472   X[1] = 13.0;
473   X[2] = 5.0;
474   X[3] = 15.0;
475   X[4] = 11.5;
476   X[5] = 4.0;
477   X[6] = 8.0;
478   X[7] = 9.0;
479   X[8] = 6.0;
480   X[9] = 7.0;*/
481 
482   /*printf("KS = %.14f\n", KS(p,1000000,chiF));*/
483 
484   return 0;
485 }
486 #endif
487 
488 
489 /* Reference: NIST -- NORCDF available on netlib */
490 
491 double cum_normal(double sample, double mean, double stddev)
492 {
493   double a, b, c, d, e, f, g, h, temp;
494 
495   a = 0.319381530 ;
496   b = -0.356563782;
497   c = 1.781477937;
498   d = -1.821255978;
499   e = 1.330274429;
500   f = .2316419;
501 
502   if(stddev <= 0.0)
503     return -1.0;
504 
505   sample = (sample - mean)/stddev;
506 
507   if(sample < 0.0)
508     temp = -sample;
509   else
510     temp = sample;
511 
512   g = 1.0/(1.0+f*temp);
513   h = 1.0 - (0.39894228040143*exp(-0.5*temp*temp))
514     *(a*g + b*g*g + c*g*g*g + d*g*g*g*g + e*g*g*g*g*g);
515 
516   if(sample < 0.0)
517     h=1.0-h;
518 
519   return h;
520 }
521 
522 
523 static double normal_mu;
524 static double normal_sd;
525 
526 void set_normal_params(double mu, double sd)
527 {
528   normal_mu = mu;
529   normal_sd = sd;
530 }
531 
532 
533 double normalF(double x)
534 {
535   return cum_normal(x,normal_mu,normal_sd);
536 }
537 
538 
539 void mean_sd(double *x, int n, double *mean, double *sd)
540 {
541   double ave, var, diff, error;
542   int i;
543 
544   ave = diff = var = error = 0.0;
545 
546   for(i=0; i<n; i++)
547     ave += x[i];
548   ave /= n;
549 
550   for(i=0; i<n; i++)
551   {
552     diff = x[i] - ave;
553     error += diff;
554     var +=diff*diff;
555   }
556 
557   var = (var-error*error/n)/(n-1);
558 
559   *mean = ave;
560   *sd = sqrt(var);
561 }
562 
563