1 /**********************************************************
2 Chi-Square Probability Function and Kolmogorv-Smirnov
3 **********************************************************/
4
5 #include <iostream>
6 #include <cstdio>
7 #include <cmath>
8 #include "util.h"
9 #if __GNUC__ > 3
10 #include <string.h>
11 #endif
12
13 #ifdef SPRNG_MPI
14 #include <mpi.h>
15 #endif
16
17 using namespace std;
18
19
20 #define ITMAX 10000000 /* maximum allowed number of iterations */
21 #define EPS 3.0e-7 /* relative accuracy */
22 #define FPMIN 1.0e-30 /* number near the smallest */
23 /* representable floating-point number */
24
25 #ifndef max
26 #define max(a,b) (a<b?b:a)
27 #endif
28 #ifndef min
29 #define min(a,b) (a>b?b:a)
30 #endif
31
32 const char *errorMessage;
33 long degrees_of_freedom=1;
34 long KS_n = 1;
35
36
37 void set_d_of_f (long df);
38 double chiF (double chiSq);
39 double chisquare (long *actual,double *probability,long n,long k, int *nb);
40 double chipercent (double chiSq, long fr);
41 double gammp (double a, double x);
42 double KSpercent (double value, long n);
43 double KS (double *V, long n, double (*F)(double));
44
45 void gser (double *gamser, double a, double x, double *gln);
46 void gcf (double *gammcf, double a, double x, double *gln);
47 double gammln (double xx);
48
49 void chiGammaClearErrMess (void);
50 const char *chiGammaReadErrMess (void);
51 void chiGammaFlagError (const char *errorText);
52
53 double cum_normal(double sample, double mean, double stddev);
54 void set_normal_params(double mu, double sd);
55 double normalF(double x);
56
57 void mean_sd(double *x, int n, double *mean, double *sd);
58
59
KS(double * X,long n,double (* F)(double))60 double KS(double *X, long n, double (*F)(double) )
61 {
62 double *a, *b, Y, rminus, rplus, value;
63 long *c, m, k, j;
64
65 m = n+1;
66
67 a = new double[m+1];
68 b = new double[m+1];
69 c = new long[m+1];
70
71 memset(c,0,(m+1)*sizeof(long));
72 for(k=0; k<m+1; k++)
73 {
74 a[k] = 1.0;
75 b[k] = 0.0;
76 }
77
78 for(j=0; j<n; j++)
79 {
80 Y = F(X[j]);
81
82 k = static_cast<long int>(m*Y);
83 a[k] = min(a[k],Y);
84 b[k] = max(b[k],Y);
85 c[k]++;
86 }
87
88 for(k=j=0, rplus=rminus=0.0; k<m; k++)
89 {
90 if(c[k] <= 0)
91 continue;
92
93 rminus = max(rminus,a[k] - (double) j/n);
94 j += c[k];
95 rplus = max(rplus,(double) j/n - b[k]);
96 }
97
98 value = sqrt((double) n)*max(rplus,rminus);
99 #ifdef ONE_SIDED
100 value = sqrt((double) n)*rplus;
101 #endif
102
103 delete [] a;
104 delete [] b;
105 delete [] c;
106
107 return value;
108 }
109
set_KS_n(long n)110 void set_KS_n(long n)
111 {
112 KS_n = n;
113 }
114
KSF(double value)115 double KSF(double value)
116 {
117 return KSpercent(value, KS_n);
118 }
119
120 #ifndef ONE_SIDED
KSpercent(double value,long n)121 double KSpercent(double value, long n)
122 {
123 int i;
124 double mult, temp, term, previous_term, percent;
125
126 value /= sqrt((double) n);
127
128 value *= 0.12 + sqrt((double) n) + 0.11/sqrt((double) n);
129
130 mult = 2.0;
131 percent = 0.0;
132 temp = -2.0*value*value;
133 previous_term = 0.0;
134
135 for(i=1; i<150; i++)
136 {
137 term = mult*exp(temp*i*i);
138 percent += term;
139 if( fabs(term)<0.005*fabs(previous_term) || fabs(term)<0.0000001*percent)
140 return 1.0 - percent;
141
142 mult = -mult;
143 previous_term = term;
144 }
145
146 fprintf(stderr,"WARNING: KSpercent failed to converge\n");
147
148 return max(percent,0.0);
149 }
150 #endif
151
152
153 #ifdef ONE_SIDED
KSpercent(double value,long n)154 double KSpercent(double value, long n)
155 {
156 int Nmax;
157
158 value *= sqrt((double) n);
159
160 if(value <= 0.0)
161 return 0.0;
162
163 Nmax = 30; /* between 30 and 50 */
164
165 if(n > Nmax)
166 {
167 double yp, p;
168
169 yp = value + 1.0/6.0/sqrt((double) n);
170 p = 1 - exp(-2.0*yp*yp);
171 return 1.0 - 2.0*(1.0-p);
172 /* return p;*/
173 }
174 else
175 {
176 static double K[16][9] = {{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0},
177 {0.0,0.01,0.05,0.25,0.5,0.75,0.95,0.99,1.001},
178 {0.0,0.014,0.06749,0.2929,0.5176,0.7071,1.098,1.2728,
179 1.415},
180 {0.0,0.01699,0.07919,0.3112,0.5147,0.7539,1.1017,1.3589,
181 1.733},
182 {0.0,0.01943,0.08789,0.3202,0.5110,0.7642,1.1304,1.3777,
183 2.01},
184 {0.0,0.02152,0.09471,0.3249,0.5245,0.7674,1.1392,1.4024,
185 2.237},
186 {0.0,0.02336,0.1002,0.3272,0.5319,0.7703,1.1463,1.4144,
187 2.45},
188 {0.0,0.02501,0.1048,0.328,0.5364,0.7755,1.1537,1.4246,
189 2.647},
190 {0.0,0.0265,0.1086,0.328,0.5392,0.7797,1.1586,1.4327,
191 2.829},
192 {0.0,0.02786,0.1119,0.3274,0.5411,0.7825,1.1624,1.4388,
193 3.01},
194 {0.0,0.02912,0.1147,0.3297,0.5426,0.7845,1.1658,1.444,
195 3.163},
196 {0.0,0.03028,0.1172,0.333,0.5439,0.7863,1.1688,1.4484,
197 3.318},
198 {0.0,0.03137,0.1193,0.3357,0.5453,0.788,1.1714,1.4521,
199 3.465},
200 {0.0,0.03424,0.1244,0.3412,0.55,0.7926,1.1773,1.4606,
201 3.874},
202 {0.0,0.03807,0.1298,0.3461,0.5547,0.7975,1.1839,1.4698,
203 4.473},
204 {0.0,0.04354,0.1351,0.3509,0.5605,0.8036,1.1916,1.4801,
205 5.478}};
206 static int index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,15,20,30};
207 static double p[9] = {0.0,0.01,0.05,0.25,0.5,0.75,0.95,0.99, 1.0};
208 int i, j;
209 double percent1, percent2, dist1, dist2, percentage;
210
211 i = 1;
212 while(index[i] < n) /* look up correct row for 'n' in table */
213 i++;
214
215 j = 1;
216 while(K[i][j] < value) /* look up percentage in table */
217 j++;
218
219 dist1 = K[i][j]-K[i][j-1];
220 dist2 = K[i][j]-value;
221 percent1 = p[j]*(dist1-dist2)/dist1 + p[j-1]*dist2/dist1;
222
223 if(index[i]==n)
224 return percent1;
225 else
226 {
227 j = 1;
228 while(K[i-1][j] < value) /* look up percentage in table */
229 j++;
230
231 dist1 = K[i-1][j]-K[i-1][j-1];
232 dist2 = K[i-1][j]-value;
233 percent2 = p[j]*(dist1-dist2)/dist1 + p[j-1]*dist2/dist1;
234
235 dist1 = (double) (index[i]-index[i-1]);
236 dist2 = (double) (index[i]-n);
237 percentage = percent1*(dist1-dist2)/dist1+ percent2*dist2/dist1;
238 /* return percentage;*/
239 return 1.0-2.0*(1.0-percentage);
240 }
241 }
242 }
243 #endif
244
chisquare(long * actual,double * probability,long n,long k,int * nb)245 double chisquare(long *actual, double *probability, long n, long k, int *nb)
246 {
247 double V, prob;
248 long i, n_actual, nbins;
249
250 V = 0.0;
251 for(i=n_actual=nbins=0, prob=0.0; i<k; i++)
252 {
253 n_actual += actual[i];
254 prob += probability[i];
255 /*printf("\t %ld. %ld %ld %f %f %ld %ld\n", i, n_actual, actual[i], prob, probability[i], n, nbins);
256 ch = getchar();*/
257
258 if(n*prob >=5 || i==k-1)
259 {
260 V += (n_actual-n*prob)*(n_actual-n*prob)/
261 (n*prob);
262 nbins++;
263 n_actual = 0;
264 prob = 0.0;
265 }
266 }
267
268 *nb = nbins;
269
270 if(nbins == 1)
271 fprintf(stderr,"WARNING: Only one bin was used in Chisquare\n");
272 else if(nbins==0)
273 fprintf(stderr,"WARNING: No bin was used in Chisquare => n was <= 5.\n");
274 else if(nbins != k)
275 fprintf(stderr,"WARNING: The effective number of bins in Chisquare: %ld, was less than the given number of bins: %ld, due to the effect of combining bins in order to make the expected number of hits per bin sufficiently large.\n", nbins, k);
276
277 return V;
278 }
279
set_d_of_f(long df)280 void set_d_of_f(long df)
281 {
282 degrees_of_freedom = df;
283 }
284
285
chiF(double chiSq)286 double chiF(double chiSq) /* returns the chi-square prob. */
287 {
288 return chipercent(chiSq,degrees_of_freedom);
289 }
290
291
chipercent(double chiSq,long fr)292 double chipercent(double chiSq, long fr) /* returns the chi-square prob. */
293 /* function with chi-square chiSq */
294 { /* & degrees of freedom fr */
295 return (gammp(fr/2.0, chiSq/2.0));
296 }
297
298
gammp(double a,double x)299 double gammp(double a, double x) /* returns the incomplete gamma */
300 /* function P(a,x) */
301 {
302
303 double gamser, gammcf, gln;
304
305 if (x < 0.0 || a <= 0.0)
306 {
307 chiGammaFlagError("Invalid arguments in routine gammp");
308 return (0.0);
309 }
310
311 if (x < (a + 1.0))
312 { /* use the series representation */
313 gser(&gamser,a,x,&gln);
314 return (gamser);
315 }
316 else
317 { /* use the continued fraction */
318 gcf(&gammcf,a,x,&gln); /* representation */
319 return (1.0 - gammcf); /* and take its complement */
320 }
321 }
322
gser(double * gamser,double a,double x,double * gln)323 void gser(double *gamser, double a, double x, /* Returns the incomplete gamma*/
324 double *gln) /* function P(a,x) evaluated by */
325 { /* its series representation as */
326 /* gamser. Also returns */
327 /* ln(gamma(a)) as gln. */
328 int n;
329 double sum, del, ap;
330
331 *gln=gammln(a);
332 if (x <= 0.0)
333 {
334 if (x < 0.0)
335 chiGammaFlagError("x less than 0 in routine gser");
336 *gamser = 0.0;
337 return;
338 }
339 else
340 {
341 ap = a;
342 del = sum = 1.0/a;
343 for (n=1; n<=ITMAX; n++)
344 {
345 ++ap;
346 del *= x/ap;
347 sum += del;
348 if (fabs(del) < fabs(sum)*EPS)
349 {
350 *gamser = sum * exp(-x + a * log(x) - (*gln));
351 return;
352 }
353 }
354 chiGammaFlagError("a too large, ITMAX too small in routine gser");
355 return;
356 }
357 }
358
gcf(double * gammcf,double a,double x,double * gln)359 void gcf(double *gammcf, double a, double x, /* Returns the complement */
360 double *gln) /* imcomplete gamma function */
361 { /* Q(a,x) evaluated by its */
362 /* continued fraction rep. as */
363 /* gammcf. Also returns */
364 /* ln(gamma(a)) as gln */
365 int i;
366 double an, b, c, d, del, h;
367
368 *gln = gammln(a);
369
370 b = x + 1.0 - a; /* set up for evaluating */
371 c = 1.0 / FPMIN; /* continued fraction by modified*/
372 d = 1.0 / b; /* Lentz' method with b0=0 */
373 h = d;
374
375 for (i=1;i<=ITMAX;i++)
376 { /* iterate to convergence */
377 an = -i * (i - a);
378 b += 2.0;
379 d = an * d + b;
380 if (fabs(d) < FPMIN) d = FPMIN;
381 c = b + an / c;
382 if (fabs(c) < FPMIN) c = FPMIN;
383 d = 1.0 / d;
384 del = d * c;
385 h *= del;
386 if (fabs(del-1.0) < EPS)
387 break;
388 }
389
390 if (i > ITMAX)
391 {
392 chiGammaFlagError("a too large, ITMAX too small in gcf");
393 return;
394 }
395
396 *gammcf = exp(-x + a * log(x) - (*gln)) * h; /* put factors in front */
397 }
398
399
gammln(double xx)400 double gammln(double xx) /* returns the value */
401 { /* ln(gamma(xx)) for xx > 0*/
402 double x, y, tmp, ser;
403 static double cof[6] = { 76.18009172947146,
404 -86.50532032941677,
405 24.01409824083091,
406 -1.231739572450155,
407 0.1208650973866179e-2,
408 -0.5395239384953e-5};
409 int j;
410
411 y = x = xx;
412 tmp = x + 5.5;
413 tmp -= (x + 0.5) * log(tmp);
414 ser = 1.000000000190015;
415
416 for (j=0; j<=5; j++)
417 ser += cof[j]/++y;
418
419 return (-tmp + log(2.5066282746310005 * ser / x));
420 }
421
422 void chiGammaClearErrMess(void) /* clears error message buffer */
423 {
424 errorMessage = NULL;
425 }
426
427 const char *chiGammaReadErrMess(void) /* returns the error message */
428 { /* from the buffer */
429 return (errorMessage);
430 }
431
432 void chiGammaFlagError(const char *errorText) /* writes to the error message */
433 { /* buffer */
434 errorMessage = errorText;
435 }
436
437 #if 0
438 int main(int argc, char *argv[])
439 {
440
441 double p[1000000], value;
442 long obs[1000000], n;
443 double X[10];
444 int i, nb;
445
446 set_d_of_f(99999);
447
448 /*printf("\nChi square probability for V = %f, degrees of freedom = %d is: %f\n\n", 34.76,50,chiF(34.76));*/
449
450 /*for(i=0; i<1000000; i++)
451 {
452 p[i] = (double) 1/1000000;
453 obs[i] = 10;
454 }*/
455
456 p[0] = 0.2;
457 p[1] = 0.5;
458 p[2] = 0.3;
459 obs[0] = 30;
460 obs[1] = 30;
461 obs[2] = 40;
462
463 /*printf("\n chisquare value = %f\n\n", chisquare(obs,p,10000000L,1000000L, &nb));*/
464 printf("\n chisquare value = %f, nbins = %d\n\n", chisquare(obs,p,100,3, &nb), nb); /*** wrong values of nb may be obtained because function argument evaluation order is not defined. Beware! ***/
465 /*value = atof(argv[1]);
466 n = atoi(argv[2]);
467
468 printf("\nd = %f,KS = %f\n\n", value, KSpercent(value,n));*/
469
470 set_d_of_f(1000);
471 /*X[0] = 10.0;
472 X[1] = 13.0;
473 X[2] = 5.0;
474 X[3] = 15.0;
475 X[4] = 11.5;
476 X[5] = 4.0;
477 X[6] = 8.0;
478 X[7] = 9.0;
479 X[8] = 6.0;
480 X[9] = 7.0;*/
481
482 /*printf("KS = %.14f\n", KS(p,1000000,chiF));*/
483
484 return 0;
485 }
486 #endif
487
488
489 /* Reference: NIST -- NORCDF available on netlib */
490
491 double cum_normal(double sample, double mean, double stddev)
492 {
493 double a, b, c, d, e, f, g, h, temp;
494
495 a = 0.319381530 ;
496 b = -0.356563782;
497 c = 1.781477937;
498 d = -1.821255978;
499 e = 1.330274429;
500 f = .2316419;
501
502 if(stddev <= 0.0)
503 return -1.0;
504
505 sample = (sample - mean)/stddev;
506
507 if(sample < 0.0)
508 temp = -sample;
509 else
510 temp = sample;
511
512 g = 1.0/(1.0+f*temp);
513 h = 1.0 - (0.39894228040143*exp(-0.5*temp*temp))
514 *(a*g + b*g*g + c*g*g*g + d*g*g*g*g + e*g*g*g*g*g);
515
516 if(sample < 0.0)
517 h=1.0-h;
518
519 return h;
520 }
521
522
523 static double normal_mu;
524 static double normal_sd;
525
526 void set_normal_params(double mu, double sd)
527 {
528 normal_mu = mu;
529 normal_sd = sd;
530 }
531
532
533 double normalF(double x)
534 {
535 return cum_normal(x,normal_mu,normal_sd);
536 }
537
538
539 void mean_sd(double *x, int n, double *mean, double *sd)
540 {
541 double ave, var, diff, error;
542 int i;
543
544 ave = diff = var = error = 0.0;
545
546 for(i=0; i<n; i++)
547 ave += x[i];
548 ave /= n;
549
550 for(i=0; i<n; i++)
551 {
552 diff = x[i] - ave;
553 error += diff;
554 var +=diff*diff;
555 }
556
557 var = (var-error*error/n)/(n-1);
558
559 *mean = ave;
560 *sd = sqrt(var);
561 }
562
563