1 /*
2 * See copyright in copyright.h and the accompanying file COPYING
3 */
4
5 /*
6 *========================================================================
7 * This is the Kolmogorov-Smirnov test for uniformity on the interval
8 * [0,1). p-values from a (large) number of independent trials should
9 * be uniformly distributed on the interval [0,1) if the underlying
10 * result is consistent with the hypothesis that the rng in use is
11 * not correlated. Deviation from uniformity is a sign of the failure
12 * of the hypothesis that the generator is "good". Here we simply
13 * input a vector of p-values and a count, and output an overall
14 * p-value for the set of tests.
15 *========================================================================
16 */
17
18 #include <dieharder/libdieharder.h>
19 #define KCOUNTMAX 4999
20
21 double p_ks_new(int n,double d);
22
kstest(double * pvalue,int count)23 double kstest(double *pvalue,int count)
24 {
25
26 int i;
27 double y,d,d1,d2,dmax,csqrt;
28 double p,x;
29
30 /* First, handle degenerate cases. */
31 if (count < 1) return -1.0;
32 if (count == 1) return *pvalue;
33
34 /*
35 * We start by sorting the list of pvalues.
36 */
37 gsl_sort(pvalue,1,count);
38
39 /*
40 * Here's the test. For each (sorted) pvalue, its index is the
41 * number of values cumulated to the left of it. d is the distance
42 * between that number and the straight line representing a uniform
43 * accumulation. We save the maximum d across all cumulated samples
44 * and transform it into a p-value at the end.
45 */
46 dmax = 0.0;
47 if(verbose == D_KSTEST || verbose == D_ALL){
48 printf(" p y d d1 d2 dmax\n");
49 }
50 for(i=1;i<=count;i++){
51 y = (double) i/(count+1.0);
52 /*
53 * d = fabs(pvalue[i] - y);
54 *
55 * Correction by David Bauer, pulled from R code for KS.
56 * Apparently the above line is right/left biased and this
57 * handles the position more symmetrically. This fix is
58 * CRUCIAL for small sample sizes, and can be validated with:
59 * dieharder -d 204 -t 100 -p 10000 -D default -D histogram
60 *
61 * Note: Without the fabs, pvalue could be LESS than y
62 * and be ignored by fmax. Also, I don't really like the end
63 * points -- y[0] shouldn't be zero, y[count] shouldn't be one. This
64 * sort of thing seems as thought it might matter at very high
65 * precision. Let's try running from 1 to count and dividing by count
66 * plus 1.
67 */
68 d1 = pvalue[i-1] - y;
69 d2 = fabs(1.0/(count+1.0) - d1);
70 d1 = fabs(d1);
71 d = fmax(d1,d2);
72
73 if(d1 > dmax) dmax = d1;
74 if(verbose == D_KSTEST || verbose == D_ALL){
75 printf("%11.6f %11.6f %11.6f %11.6f %11.6f %11.6f\n",pvalue[i-1],y,d,d1,d2,dmax);
76 }
77
78 }
79
80 /*
81 * Here's where we have to make a few choices:
82 *
83 * ks_test = 0
84 * Use the new algorithm when count is less than KCOUNTMAX, but
85 * for large values of the count use the old q_ks(), valid for
86 * asymptotically large counts and MUCH faster. This will
87 * will introduce a SMALL error in the distribution of pvalues
88 * for all of the tests together, but it will be negligible for
89 * any given single test.
90 *
91 * ks_test = 1
92 * Use the new (mostly exact) algorithm exactly as is. This is
93 * QUITE SLOW although it is "sped up" at the expense of some
94 * precision. By slow I mean 220 seconds at -k 1, 2.5 seconds at
95 * (default) -k 0.
96 *
97 * ks_test = 2
98 * Use the exact (7 digit accurate) version of the new code, but
99 * be prepared for "long" runtimes. Empirically I only got 230
100 * seconds -- not enough to worry about, really.
101 *
102 */
103
104 /*
105 * We only need this test here (and can adjust KCOUNTMAX to play
106 * with accuracy vs speed, although I've verified that 5000 isn't
107 * terrible). The ks_test = 1 or 2 option are fallthrough, with
108 * 1 being "normal", 2 omitting the speedup step below to get FULL
109 * precision.
110 */
111 if(ks_test == 0 && count > KCOUNTMAX){
112 csqrt = sqrt(count);
113 x = (csqrt + 0.12 + 0.11/csqrt)*dmax;
114 p = q_ks(x);
115 if(verbose == D_KSTEST || verbose == D_ALL){
116 printf("# kstest: returning p = %f\n",p);
117 }
118 return(p);
119 }
120
121 /*
122 * This uses the new "exact" kolmogorov distribution, which appears to
123 * work! It's moderately expensive computationally, but I think it will
124 * be in bounds for typical dieharder test ranges, and I also expect that
125 * it can be sped up pretty substantially.
126 */
127
128 if(verbose == D_KSTEST || verbose == D_ALL){
129 printf("# kstest: calling p_ks_new(count = %d,dmax = %f)\n",count,dmax);
130 }
131 p = p_ks_new(count,dmax);
132 if(verbose == D_KSTEST || verbose == D_ALL){
133 printf("# kstest: returning p = %f\n",p);
134 }
135
136 return(p);
137
138 }
139
140
q_ks(double x)141 double q_ks(double x)
142 {
143
144 int i,sign;
145 double qsum;
146 double kappa;
147
148 kappa = -2.0*x*x;
149 sign = -1;
150 qsum = 0.0;
151 for(i=1;i<100;i++){
152 sign *= -1;
153 qsum += (double)sign*exp(kappa*(double)i*(double)i);
154 if(verbose == D_KSTEST || verbose == D_ALL){
155 printf("Q_ks %d: %f\n",i,2.0*qsum);
156 }
157 }
158
159 if(verbose == D_KSTEST || verbose == D_ALL){
160 printf("Q_ks returning %f\n",2.0*qsum);
161 }
162 return(2.0*qsum);
163
164 }
165
166
167 /*
168 *========================================================================
169 * The following routines are from the paper "Evaluating Kolmogorov's
170 * Distribution" by Marsaglia, Tsang and Wang. They should permit kstest
171 * to return precise pvalues for more or less arbitrary numbers of samples
172 * ranging from n = 10 out to n approaching the asymptotic form where
173 * Kolmogorov's original result is valid. It contains some cutoffs
174 * intended to prevent excessive runtimes in the rare cases where p being
175 * returned is close to 1 and n is large (at which point the asymptotic
176 * form is generally adequate anyway).
177 *
178 * This is so far a first pass -- I'm guessing that we can improve the
179 * linear algebra using the GSL or otherwise. The code is therefore
180 * more or less straight from the paper.
181 *========================================================================
182 */
mMultiply(double * A,double * B,double * C,int m)183 void mMultiply(double *A,double *B,double *C,int m)
184 {
185 int i,j,k;
186 double s;
187 for(i=0; i<m; i++){
188 for(j=0; j<m; j++){
189 s=0.0;
190 for(k=0; k<m; k++){
191 s+=A[i*m+k]*B[k*m+j];
192 C[i*m+j]=s;
193 }
194 }
195 }
196 }
197
mPower(double * A,int eA,double * V,int * eV,int m,int n)198 void mPower(double *A,int eA,double *V,int *eV,int m,int n)
199 {
200 double *B;
201 int eB,i,j;
202
203 /*
204 * n == 1: first power just returns A.
205 */
206 if(n == 1){
207 for(i=0;i<m*m;i++){
208 V[i]=A[i];*eV=eA;
209 }
210 return;
211 }
212
213 /*
214 * This is a recursive call. Either n/2 will equal 1 (and the line
215 * above will return and the recursion will terminate) or it won't
216 * and we will cumulate the product.
217 */
218 mPower(A,eA,V,eV,m,n/2);
219 /* printf("n = %d mP eV = %d\n",n/2,*eV); */
220 B=(double*)malloc((m*m)*sizeof(double));
221 mMultiply(V,V,B,m);
222 eB=2*(*eV);
223 if(n%2==0){
224 for(i=0;i<m*m;i++){
225 V[i]=B[i];
226 }
227 *eV=eB;
228 /* printf("n = %d (even) eV = %d\n",n,*eV); */
229 } else {
230 mMultiply(A,B,V,m);
231 *eV=eA+eB;
232 /* printf("n = %d (odd) eV = %d\n",n,*eV); */
233 }
234
235 /*
236 * Rescale as needed to avoid overflow. Note that we check
237 * EVERY element of V to make sure NONE of them exceed the
238 * threshold (and if any do, rescale the whole thing).
239 */
240 for(i=0;i<m*m;i++) {
241 if( V[i] > 1.0e140 ) {
242 for(j=0;j<m*m;j++) {
243 V[j]=V[j]*1.0e-140;
244 }
245 *eV+=140;
246 /* printf("rescale eV = %d\n",*eV); */
247 }
248 }
249
250 free(B);
251
252 }
253
254 /*
255 * Marsaglia's definition is K = 1 - p. I convert it to p, as p is
256 * what we want in dieharder.
257 */
p_ks_new(int n,double d)258 double p_ks_new(int n,double d)
259 {
260
261 int k,m,i,j,g,eH,eQ;
262 double h,s,*H,*Q;
263
264 /*
265 * The next fragment is used if ks_test is not 2. This is faster
266 * than going to convergence, but is still really slow compared to
267 * switching to the asymptotic form.
268 *
269 * If you require >7 digit accuracy in the right tail use ks_test = 2
270 * but be prepared for occasional long runtimes.
271 */
272 s=d*d*n;
273 if(ks_test != 2 && ( s>7.24 || ( s>3.76 && n>99 ))) {
274 if(n == 10400) printf("Returning the easy way\n");
275 return 2.0*exp(-(2.000071+.331/sqrt(n)+1.409/n)*s);
276 }
277
278 /*
279 * If ks_test = 2, we always execute the following code and work to
280 * convergence.
281 */
282 k=(int)(n*d)+1;
283 m=2*k-1;
284 h=k-n*d;
285 /* printf("p_ks_new: n = %d k = %d m = %d h = %f\n",n,k,m,h); */
286 H=(double*)malloc((m*m)*sizeof(double));
287 Q=(double*)malloc((m*m)*sizeof(double));
288 for(i=0;i<m;i++){
289 for(j=0;j<m;j++){
290 if(i-j+1<0){
291 H[i*m+j]=0;
292 } else {
293 H[i*m+j]=1;
294 }
295 }
296 }
297
298 for(i=0;i<m;i++){
299 H[i*m]-=pow(h,i+1);
300 H[(m-1)*m+i]-=pow(h,(m-i));
301 }
302
303 H[(m-1)*m]+=(2*h-1>0?pow(2*h-1,m):0);
304 for(i=0;i<m;i++){
305 for(j=0;j<m;j++){
306 if(i-j+1>0){
307 for(g=1;g<=i-j+1;g++){
308 H[i*m+j]/=g;
309 }
310 }
311 }
312 }
313
314 eH=0;
315 mPower(H,eH,Q,&eQ,m,n);
316 /* printf("p_ks_new eQ = %d\n",eQ); */
317 s=Q[(k-1)*m+k-1];
318 /* printf("s = %16.8e\n",s); */
319 for(i=1;i<=n;i++){
320 s=s*i/n;
321 /* printf("i = %d: s = %16.8e\n",i,s); */
322 if(s<1e-140){
323 /* printf("Oops, starting to have underflow problems: s = %16.8e\n",s); */
324 s*=1e140;
325 eQ-=140;
326 }
327 }
328
329 /* printf("I'll bet this is it: s = %16.8e eQ = %d\n",s,eQ); */
330 s*=pow(10.,eQ);
331 s = 1.0 - s;
332 free(H);
333 free(Q);
334 return s;
335
336 }
337
338 /*
339 *========================================================================
340 * This is the Kuiper variant of KS. It is symmetric, that is,
341 * it isn't biased as to where the region tested is started or stopped
342 * on a ring of values. However, we simply cannot evaluate the
343 * CDF below to the same precision that we can for the KS test above.
344 * For that reason this code is basically obsolete. We'll leave it
345 * in for now in case somebody figures out how to evaluate the
346 * q_ks_kuiper() to high precision for arbitrary count but we really
347 * don't need it anymore unless it turns out to be faster AND precise.
348 *========================================================================
349 */
kstest_kuiper(double * pvalue,int count)350 double kstest_kuiper(double *pvalue,int count)
351 {
352
353 int i;
354 double y,v,vmax,vmin,csqrt;
355 double p,x;
356
357 /*
358 * We start by sorting the list of pvalues.
359 */
360 if(verbose == D_KSTEST || verbose == D_ALL){
361 printf("# kstest_kuiper(): Computing Kuiper KS pvalue for:\n");
362 for(i=0;i<count;i++){
363 printf("# kstest_kuiper(): %3d %10.5f\n",i,pvalue[i]);
364 }
365 }
366
367 /*
368 * This test is useless if there is only one pvalue. In fact, it appears
369 * to return a wrong answer in this case, as it cannot set BOTH vmin
370 * AND vmax correctly, or so it appears. So one solution is to just
371 * return the one pvalue and skip the rest of the test.
372 */
373 if(count == 1) return pvalue[0];
374 gsl_sort(pvalue,1,count);
375
376 /*
377 * Here's the test. For each (sorted) pvalue, its index is the number of
378 * values cumulated to the left of it. v is the distance between that
379 * number and the straight line representing a uniform accumulation. We
380 * save the maximum AND minimum v across all cumulated samples and
381 * transform it into a p-value at the end.
382 */
383 if(verbose == D_KSTEST || verbose == D_ALL){
384 printf(" obs exp v vmin vmax\n");
385 }
386 vmin = 0.0;
387 vmax = 0.0;
388 for(i=0;i<count;i++){
389 y = (double) i/count;
390 v = pvalue[i] - y;
391 /* can only do one OR the other here, not AND the other. */
392 if(v > vmax) {
393 vmax = v;
394 } else if(v < vmin) {
395 vmin = v;
396 }
397 if(verbose == D_KSTEST || verbose == D_ALL){
398 printf("%8.3f %8.3f %16.6e %16.6e %16.6e\n",pvalue[i],y,v,vmin,vmax);
399 }
400 }
401 v = fabs(vmax) + fabs(vmin);
402 csqrt = sqrt(count);
403 x = (csqrt + 0.155 + 0.24/csqrt)*v;
404 if(verbose == D_KSTEST || verbose == D_ALL){
405 printf("Kuiper's V = %8.3f, evaluating q_ks_kuiper(%6.2f)\n",v,x);
406 }
407 p = q_ks_kuiper(x,count);
408
409 if(verbose == D_KSTEST || verbose == D_ALL){
410 if(p < 0.0001){
411 printf("# kstest_kuiper(): Test Fails! Visually inspect p-values:\n");
412 for(i=0;i<count;i++){
413 printf("# kstest_kuiper(): %3d %10.5f\n",i,pvalue[i]);
414 }
415 }
416 }
417
418 return(p);
419
420 }
421
q_ks_kuiper(double x,int count)422 double q_ks_kuiper(double x,int count)
423 {
424
425 uint m,msq;
426 double xsq,preturn,q,q_last,p,p_last;
427
428 /*
429 * OK, Numerical Recipes screwed us even in terms of the algorithm.
430 * This one includes BOTH terms.
431 * Q = 2\sum_{m=1}^\infty (4m^2x^2 - 1)exp(-2m^2x^2)
432 * P = 8x/3\sqrt{N}\sum_{m=i}^\infty m^2(4m^2x^2 - 3)
433 * Q = Q - P (and leaving off P has consistently biased us HIGH!
434 * To get the infinite sum, we simply sum to double precision convergence.
435 */
436 m = 0;
437 q = 0.0;
438 q_last = -1.0;
439 while(q != q_last){
440 m++;
441 msq = m*m;
442 xsq = x*x;
443 q_last = q;
444 q += (4.0*msq*xsq - 1.0)*exp(-2.0*msq*xsq);
445 }
446 q = 2.0*q;
447
448 m = 0;
449 p = 0.0;
450 p_last = -1.0;
451 while(p != p_last){
452 m++;
453 msq = m*m;
454 xsq = x*x;
455 p_last = p;
456 p += msq*(4.0*msq*xsq - 3.0)*exp(-2.0*msq*xsq);
457 }
458 p = (8.0*x*p)/(3.0*sqrt(count));
459
460 preturn = q - p;
461
462 if(verbose == D_KSTEST || verbose == D_ALL){
463 printf("Q_ks yields preturn = %f: q = %f p = %f\n",preturn,q,p);
464 }
465 return(preturn);
466
467 }
468
469