1 /*
2  * See copyright in copyright.h and the accompanying file COPYING
3  */
4 
5 /*
6  *========================================================================
7  * This is the Kolmogorov-Smirnov test for uniformity on the interval
8  * [0,1).  p-values from a (large) number of independent trials should
9  * be uniformly distributed on the interval [0,1) if the underlying
10  * result is consistent with the hypothesis that the rng in use is
11  * not correlated.  Deviation from uniformity is a sign of the failure
12  * of the hypothesis that the generator is "good".  Here we simply
13  * input a vector of p-values and a count, and output an overall
14  * p-value for the set of tests.
15  *========================================================================
16  */
17 
18 #include <dieharder/libdieharder.h>
19 #define KCOUNTMAX 4999
20 
21 double p_ks_new(int n,double d);
22 
kstest(double * pvalue,int count)23 double kstest(double *pvalue,int count)
24 {
25 
26  int i;
27  double y,d,d1,d2,dmax,csqrt;
28  double p,x;
29 
30  /* First, handle degenerate cases. */
31  if (count < 1) return -1.0;
32  if (count == 1) return *pvalue;
33 
34  /*
35   * We start by sorting the list of pvalues.
36   */
37  gsl_sort(pvalue,1,count);
38 
39  /*
40   * Here's the test.  For each (sorted) pvalue, its index is the
41   * number of values cumulated to the left of it.  d is the distance
42   * between that number and the straight line representing a uniform
43   * accumulation.  We save the maximum d across all cumulated samples
44   * and transform it into a p-value at the end.
45   */
46  dmax = 0.0;
47  if(verbose == D_KSTEST || verbose == D_ALL){
48    printf("       p             y              d             d1           d2         dmax\n");
49  }
50  for(i=1;i<=count;i++){
51    y = (double) i/(count+1.0);
52    /*
53     * d = fabs(pvalue[i] - y);
54     *
55     * Correction by David Bauer, pulled from R code for KS.
56     * Apparently the above line is right/left biased and this
57     * handles the position more symmetrically.   This fix is
58     * CRUCIAL for small sample sizes, and can be validated with:
59     *   dieharder -d 204 -t 100 -p 10000 -D default -D histogram
60     *
61     * Note:  Without the fabs, pvalue could be LESS than y
62     * and be ignored by fmax.  Also, I don't really like the end
63     * points -- y[0] shouldn't be zero, y[count] shouldn't be one.  This
64     * sort of thing seems as thought it might matter at very high
65     * precision.  Let's try running from 1 to count and dividing by count
66     * plus 1.
67     */
68    d1 = pvalue[i-1] - y;
69    d2 = fabs(1.0/(count+1.0) - d1);
70    d1 = fabs(d1);
71    d = fmax(d1,d2);
72 
73    if(d1 > dmax) dmax = d1;
74    if(verbose == D_KSTEST || verbose == D_ALL){
75      printf("%11.6f   %11.6f    %11.6f   %11.6f  %11.6f  %11.6f\n",pvalue[i-1],y,d,d1,d2,dmax);
76    }
77 
78  }
79 
80  /*
81   * Here's where we have to make a few choices:
82   *
83   * ks_test = 0
84   * Use the new algorithm when count is less than KCOUNTMAX, but
85   * for large values of the count use the old q_ks(), valid for
86   * asymptotically large counts and MUCH faster.  This will
87   * will introduce a SMALL error in the distribution of pvalues
88   * for all of the tests together, but it will be negligible for
89   * any given single test.
90   *
91   * ks_test = 1
92   * Use the new (mostly exact) algorithm exactly as is.  This is
93   * QUITE SLOW although it is "sped up" at the expense of some
94   * precision.  By slow I mean 220 seconds at -k 1, 2.5 seconds at
95   * (default) -k 0.
96   *
97   * ks_test = 2
98   * Use the exact (7 digit accurate) version of the new code, but
99   * be prepared for "long" runtimes.  Empirically I only got 230
100   * seconds -- not enough to worry about, really.
101   *
102   */
103 
104  /*
105   * We only need this test here (and can adjust KCOUNTMAX to play
106   * with accuracy vs speed, although I've verified that 5000 isn't
107   * terrible).  The ks_test = 1 or 2 option are fallthrough, with
108   * 1 being "normal", 2 omitting the speedup step below to get FULL
109   * precision.
110   */
111  if(ks_test == 0 && count > KCOUNTMAX){
112    csqrt = sqrt(count);
113    x = (csqrt + 0.12 + 0.11/csqrt)*dmax;
114    p = q_ks(x);
115    if(verbose == D_KSTEST || verbose == D_ALL){
116      printf("# kstest: returning p = %f\n",p);
117    }
118    return(p);
119  }
120 
121  /*
122   * This uses the new "exact" kolmogorov distribution, which appears to
123   * work!  It's moderately expensive computationally, but I think it will
124   * be in bounds for typical dieharder test ranges, and I also expect that
125   * it can be sped up pretty substantially.
126   */
127 
128  if(verbose == D_KSTEST || verbose == D_ALL){
129    printf("# kstest: calling p_ks_new(count = %d,dmax = %f)\n",count,dmax);
130  }
131  p = p_ks_new(count,dmax);
132  if(verbose == D_KSTEST || verbose == D_ALL){
133    printf("# kstest: returning p = %f\n",p);
134  }
135 
136  return(p);
137 
138 }
139 
140 
q_ks(double x)141 double q_ks(double x)
142 {
143 
144  int i,sign;
145  double qsum;
146  double kappa;
147 
148  kappa = -2.0*x*x;
149  sign = -1;
150  qsum = 0.0;
151  for(i=1;i<100;i++){
152    sign *= -1;
153    qsum += (double)sign*exp(kappa*(double)i*(double)i);
154    if(verbose == D_KSTEST || verbose == D_ALL){
155      printf("Q_ks %d: %f\n",i,2.0*qsum);
156    }
157  }
158 
159  if(verbose == D_KSTEST || verbose == D_ALL){
160    printf("Q_ks returning %f\n",2.0*qsum);
161  }
162  return(2.0*qsum);
163 
164 }
165 
166 
167 /*
168  *========================================================================
169  * The following routines are from the paper "Evaluating Kolmogorov's
170  * Distribution" by Marsaglia, Tsang and Wang.  They should permit kstest
171  * to return precise pvalues for more or less arbitrary numbers of samples
172  * ranging from n = 10 out to n approaching the asymptotic form where
173  * Kolmogorov's original result is valid.  It contains some cutoffs
174  * intended to prevent excessive runtimes in the rare cases where p being
175  * returned is close to 1 and n is large (at which point the asymptotic
176  * form is generally adequate anyway).
177  *
178  * This is so far a first pass -- I'm guessing that we can improve the
179  * linear algebra using the GSL or otherwise.  The code is therefore
180  * more or less straight from the paper.
181  *========================================================================
182  */
mMultiply(double * A,double * B,double * C,int m)183 void mMultiply(double *A,double *B,double *C,int m)
184 {
185   int i,j,k;
186   double s;
187   for(i=0; i<m; i++){
188     for(j=0; j<m; j++){
189       s=0.0;
190       for(k=0; k<m; k++){
191         s+=A[i*m+k]*B[k*m+j];
192 	C[i*m+j]=s;
193       }
194     }
195   }
196 }
197 
mPower(double * A,int eA,double * V,int * eV,int m,int n)198 void mPower(double *A,int eA,double *V,int *eV,int m,int n)
199 {
200   double *B;
201   int eB,i,j;
202 
203   /*
204    * n == 1: first power just returns A.
205    */
206   if(n == 1){
207     for(i=0;i<m*m;i++){
208       V[i]=A[i];*eV=eA;
209     }
210     return;
211   }
212 
213   /*
214    * This is a recursive call.  Either n/2 will equal 1 (and the line
215    * above will return and the recursion will terminate) or it won't
216    * and we will cumulate the product.
217    */
218   mPower(A,eA,V,eV,m,n/2);
219   /* printf("n = %d  mP eV = %d\n",n/2,*eV); */
220   B=(double*)malloc((m*m)*sizeof(double));
221   mMultiply(V,V,B,m);
222   eB=2*(*eV);
223   if(n%2==0){
224     for(i=0;i<m*m;i++){
225       V[i]=B[i];
226     }
227     *eV=eB;
228     /* printf("n = %d (even) eV = %d\n",n,*eV); */
229   } else {
230     mMultiply(A,B,V,m);
231     *eV=eA+eB;
232     /* printf("n = %d (odd) eV = %d\n",n,*eV); */
233   }
234 
235   /*
236    * Rescale as needed to avoid overflow.  Note that we check
237    * EVERY element of V to make sure NONE of them exceed the
238    * threshold (and if any do, rescale the whole thing).
239    */
240   for(i=0;i<m*m;i++) {
241     if( V[i] > 1.0e140 ) {
242       for(j=0;j<m*m;j++) {
243         V[j]=V[j]*1.0e-140;
244       }
245       *eV+=140;
246       /* printf("rescale eV = %d\n",*eV); */
247     }
248   }
249 
250   free(B);
251 
252 }
253 
254 /*
255  * Marsaglia's definition is K = 1 - p.  I convert it to p, as p is
256  * what we want in dieharder.
257  */
p_ks_new(int n,double d)258 double p_ks_new(int n,double d)
259 {
260 
261   int k,m,i,j,g,eH,eQ;
262   double h,s,*H,*Q;
263 
264   /*
265    * The next fragment is used if ks_test is not 2.  This is faster
266    * than going to convergence, but is still really slow compared to
267    * switching to the asymptotic form.
268    *
269    * If you require >7 digit accuracy in the right tail use ks_test = 2
270    * but be prepared for occasional long runtimes.
271    */
272   s=d*d*n;
273   if(ks_test != 2 && ( s>7.24 || ( s>3.76 && n>99 ))) {
274     if(n == 10400) printf("Returning the easy way\n");
275     return 2.0*exp(-(2.000071+.331/sqrt(n)+1.409/n)*s);
276   }
277 
278   /*
279    * If ks_test = 2, we always execute the following code and work to
280    * convergence.
281    */
282   k=(int)(n*d)+1;
283   m=2*k-1;
284   h=k-n*d;
285   /* printf("p_ks_new:  n = %d  k = %d  m = %d  h = %f\n",n,k,m,h); */
286   H=(double*)malloc((m*m)*sizeof(double));
287   Q=(double*)malloc((m*m)*sizeof(double));
288   for(i=0;i<m;i++){
289     for(j=0;j<m;j++){
290       if(i-j+1<0){
291         H[i*m+j]=0;
292       } else {
293         H[i*m+j]=1;
294       }
295     }
296   }
297 
298   for(i=0;i<m;i++){
299     H[i*m]-=pow(h,i+1);
300     H[(m-1)*m+i]-=pow(h,(m-i));
301   }
302 
303   H[(m-1)*m]+=(2*h-1>0?pow(2*h-1,m):0);
304   for(i=0;i<m;i++){
305     for(j=0;j<m;j++){
306       if(i-j+1>0){
307         for(g=1;g<=i-j+1;g++){
308 	  H[i*m+j]/=g;
309 	}
310       }
311     }
312   }
313 
314   eH=0;
315   mPower(H,eH,Q,&eQ,m,n);
316   /* printf("p_ks_new eQ = %d\n",eQ); */
317   s=Q[(k-1)*m+k-1];
318   /* printf("s = %16.8e\n",s); */
319   for(i=1;i<=n;i++){
320     s=s*i/n;
321     /* printf("i = %d: s = %16.8e\n",i,s); */
322     if(s<1e-140){
323       /* printf("Oops, starting to have underflow problems: s = %16.8e\n",s); */
324       s*=1e140;
325       eQ-=140;
326     }
327   }
328 
329   /* printf("I'll bet this is it: s = %16.8e  eQ = %d\n",s,eQ); */
330   s*=pow(10.,eQ);
331   s = 1.0 - s;
332   free(H);
333   free(Q);
334   return s;
335 
336 }
337 
338 /*
339  *========================================================================
340  * This is the Kuiper variant of KS.  It is symmetric, that is,
341  * it isn't biased as to where the region tested is started or stopped
342  * on a ring of values.  However, we simply cannot evaluate the
343  * CDF below to the same precision that we can for the KS test above.
344  * For that reason this code is basically obsolete.  We'll leave it
345  * in for now in case somebody figures out how to evaluate the
346  * q_ks_kuiper() to high precision for arbitrary count but we really
347  * don't need it anymore unless it turns out to be faster AND precise.
348  *========================================================================
349  */
kstest_kuiper(double * pvalue,int count)350 double kstest_kuiper(double *pvalue,int count)
351 {
352 
353  int i;
354  double y,v,vmax,vmin,csqrt;
355  double p,x;
356 
357  /*
358   * We start by sorting the list of pvalues.
359   */
360  if(verbose == D_KSTEST || verbose == D_ALL){
361    printf("# kstest_kuiper(): Computing Kuiper KS pvalue for:\n");
362    for(i=0;i<count;i++){
363      printf("# kstest_kuiper(): %3d    %10.5f\n",i,pvalue[i]);
364    }
365  }
366 
367  /*
368   * This test is useless if there is only one pvalue.  In fact, it appears
369   * to return a wrong answer in this case, as it cannot set BOTH vmin
370   * AND vmax correctly, or so it appears.  So one solution is to just
371   * return the one pvalue and skip the rest of the test.
372   */
373  if(count == 1) return pvalue[0];
374  gsl_sort(pvalue,1,count);
375 
376  /*
377   * Here's the test.  For each (sorted) pvalue, its index is the number of
378   * values cumulated to the left of it.  v is the distance between that
379   * number and the straight line representing a uniform accumulation.  We
380   * save the maximum AND minimum v across all cumulated samples and
381   * transform it into a p-value at the end.
382   */
383  if(verbose == D_KSTEST || verbose == D_ALL){
384    printf("    obs       exp           v        vmin         vmax\n");
385  }
386  vmin = 0.0;
387  vmax = 0.0;
388  for(i=0;i<count;i++){
389    y = (double) i/count;
390    v = pvalue[i] - y;
391    /* can only do one OR the other here, not AND the other. */
392    if(v > vmax) {
393      vmax = v;
394    } else if(v < vmin) {
395      vmin = v;
396    }
397    if(verbose == D_KSTEST || verbose == D_ALL){
398      printf("%8.3f   %8.3f    %16.6e   %16.6e    %16.6e\n",pvalue[i],y,v,vmin,vmax);
399    }
400  }
401  v = fabs(vmax) + fabs(vmin);
402  csqrt = sqrt(count);
403  x = (csqrt + 0.155 + 0.24/csqrt)*v;
404  if(verbose == D_KSTEST || verbose == D_ALL){
405    printf("Kuiper's V = %8.3f, evaluating q_ks_kuiper(%6.2f)\n",v,x);
406  }
407  p = q_ks_kuiper(x,count);
408 
409  if(verbose == D_KSTEST || verbose == D_ALL){
410    if(p < 0.0001){
411      printf("# kstest_kuiper(): Test Fails!  Visually inspect p-values:\n");
412      for(i=0;i<count;i++){
413        printf("# kstest_kuiper(): %3d    %10.5f\n",i,pvalue[i]);
414      }
415    }
416  }
417 
418  return(p);
419 
420 }
421 
q_ks_kuiper(double x,int count)422 double q_ks_kuiper(double x,int count)
423 {
424 
425  uint m,msq;
426  double xsq,preturn,q,q_last,p,p_last;
427 
428  /*
429   * OK, Numerical Recipes screwed us even in terms of the algorithm.
430   * This one includes BOTH terms.
431   *   Q = 2\sum_{m=1}^\infty (4m^2x^2 - 1)exp(-2m^2x^2)
432   *   P = 8x/3\sqrt{N}\sum_{m=i}^\infty m^2(4m^2x^2 - 3)
433   *   Q = Q - P (and leaving off P has consistently biased us HIGH!
434   * To get the infinite sum, we simply sum to double precision convergence.
435   */
436  m = 0;
437  q = 0.0;
438  q_last = -1.0;
439  while(q != q_last){
440    m++;
441    msq = m*m;
442    xsq = x*x;
443    q_last = q;
444    q += (4.0*msq*xsq - 1.0)*exp(-2.0*msq*xsq);
445  }
446  q = 2.0*q;
447 
448  m = 0;
449  p = 0.0;
450  p_last = -1.0;
451  while(p != p_last){
452    m++;
453    msq = m*m;
454    xsq = x*x;
455    p_last = p;
456    p += msq*(4.0*msq*xsq - 3.0)*exp(-2.0*msq*xsq);
457  }
458  p = (8.0*x*p)/(3.0*sqrt(count));
459 
460  preturn = q - p;
461 
462  if(verbose == D_KSTEST || verbose == D_ALL){
463    printf("Q_ks yields preturn = %f:  q = %f  p = %f\n",preturn,q,p);
464  }
465  return(preturn);
466 
467 }
468 
469