1 /*
2 * See copyright in copyright.h and the accompanying file COPYING
3 */
4
5 /*
6 *========================================================================
7 * This is the Diehard Runs test, rewritten from the description
8 * in tests.txt on George Marsaglia's diehard site.
9 *
10 * Here is the test description from diehard_tests.txt:
11 *
12 * This is the RUNS test. It counts runs up, and runs down,in a sequence
13 * of uniform [0,1) variables, obtained by floating the 32-bit integers
14 * in the specified file. This example shows how runs are counted:
15 * .123, .357, .789, .425,. 224, .416, .95
16 * contains an up-run of length 3, a down-run of length 2 and an up-run
17 * of (at least) 2, depending on the next values. The covariance matrices
18 * for the runs-up and runs-down are well-known, leading to chisquare tests
19 * for quadratic forms in the weak inverses of the covariance matrices.
20 * Runs are counted for sequences of length 10,000. This is done ten times,
21 * then repeated.
22 *
23 * Comment
24 *
25 * I modify this the following ways. First, I let the sequence length be
26 * the variable -t (vector length) instead of fixing it at 10,000. This
27 * lets one test sequences that are much longer (entirely possible with
28 * a modern CPU even for a fairly slow RNG). Second, I repeat this for
29 * the variable -s (samples) times, default 100 and not just 10. Third,
30 * because RNG's often have "bad seeds" for which they misbehave, the
31 * individual sequences can be optionally -i reseeded for each sample.
32 * Because this CAN let bad behavior be averaged out to where
33 * it isn't apparent for many samples with few bad seeds, we may need to
34 * plot the actual distribution of p-values for this and other tests where
35 * this option is used. Fourth, it is silly to convert integers into floats
36 * in order to do this test. Up sequences in integers are down sequences in
37 * floats once one divides by the largest integer available to the generator,
38 * period. Integer arithmetic is much faster than float AND one skips the
39 * very costly division associated with conversion.
40 * *========================================================================
41 */
42
43
44 #include <dieharder/libdieharder.h>
45 /*
46 * The following are the definitions and parameters for runs, based on
47 * Journal of Applied Statistics v30, Algorithm AS 157, 1981:
48 * The Runs-Up and Runs-Down Tests, by R. G. T. Grafton.
49 * (and before that Knuth's The Art of Programming v. 2).
50 */
51
52 #define RUN_MAX 6
53
54 /*
55 * a_ij
56 */
57 static double a[6][6] = {
58 { 4529.4, 9044.9, 13568.0, 18091.0, 22615.0, 27892.0},
59 { 9044.9, 18097.0, 27139.0, 36187.0, 45234.0, 55789.0},
60 {13568.0, 27139.0, 40721.0, 54281.0, 67852.0, 83685.0},
61 {18091.0, 36187.0, 54281.0, 72414.0, 90470.0, 111580.0},
62 {22615.0, 45234.0, 67852.0, 90470.0, 113262.0, 139476.0},
63 {27892.0, 55789.0, 83685.0, 111580.0, 139476.0, 172860.0}
64 };
65
66 /*
67 * b_i
68 */
69 static double b[6] = {
70 1.0/6.0,
71 5.0/24.0,
72 11.0/120.0,
73 19.0/720.0,
74 29.0/5040.0,
75 1.0/840.0,};
76
77 uint *runs_rand;
78
diehard_runs(Test ** test,int irun)79 int diehard_runs(Test **test, int irun)
80 {
81
82 int i,j,k,t,ns;
83 unsigned int ucount,dcount,increased;
84 int upruns[RUN_MAX],downruns[RUN_MAX];
85 double uv,dv,up_pks,dn_pks;
86 double *uv_pvalue,*dv_pvalue;
87
88 runs_rand = (uint *)malloc(test[0]->tsamples*sizeof(uint));
89 /*
90 * This is just for display.
91 */
92 test[0]->ntuple = 0;
93 test[1]->ntuple = 0;
94
95 /*
96 * Clear up and down run bins
97 */
98 for(k=0;k<RUN_MAX;k++){
99 upruns[k] = 0;
100 downruns[k] = 0;
101 }
102
103 /*
104 * Now count up and down runs and increment the bins. Note
105 * that each successive up counts as a run of one down, and
106 * each successive down counts as a run of one up.
107 */
108 ucount = dcount = 1;
109 if(verbose){
110 printf("j rand ucount dcount\n");
111 }
112 runs_rand[0] = gsl_rng_get(rng);
113 for(t=1;t<test[0]->tsamples;t++) {
114 runs_rand[t] = gsl_rng_get(rng);
115 if(verbose){
116 printf("%d: %10u %u %u\n",t,runs_rand[t],ucount,dcount);
117 }
118
119 /*
120 * Did we increase?
121 */
122 if(runs_rand[t] > runs_rand[t-1]){
123 ucount++;
124 if(ucount > RUN_MAX) ucount = RUN_MAX;
125 downruns[dcount-1]++;
126 dcount = 1;
127 } else {
128 dcount++;
129 if(dcount > RUN_MAX) dcount = RUN_MAX;
130 upruns[ucount-1]++;
131 ucount = 1;
132 }
133 }
134 if(runs_rand[test[0]->tsamples-1] > runs_rand[0]){
135 ucount++;
136 if(ucount > RUN_MAX) ucount = RUN_MAX;
137 downruns[dcount-1]++;
138 dcount = 1;
139 } else {
140 dcount++;
141 if(dcount > RUN_MAX) dcount = RUN_MAX;
142 upruns[ucount-1]++;
143 ucount = 1;
144 }
145
146 /*
147 * This ends a single sample.
148 * Compute the test statistic for up and down runs.
149 */
150 uv=0.0;
151 dv=0.0;
152 if(verbose){
153 printf(" i upruns downruns\n");
154 }
155 for(i=0;i<RUN_MAX;i++) {
156 if(verbose){
157 printf("%d: %7d %7d\n",i,upruns[i],downruns[i]);
158 }
159 for(j=0;j<RUN_MAX;j++) {
160 uv += ((double)upruns[i] - test[0]->tsamples*b[i])*(upruns[j] - test[0]->tsamples*b[j])*a[i][j];
161 dv += ((double)downruns[i] - test[0]->tsamples*b[i])*(downruns[j] - test[0]->tsamples*b[j])*a[i][j];
162 }
163 }
164 uv /= (double)test[0]->tsamples;
165 dv /= (double)test[0]->tsamples;
166
167 /*
168 * This NEEDS WORK! It isn't right, somehow...
169 */
170 up_pks = 1.0 - exp ( -0.5 * uv ) * ( 1.0 + 0.5 * uv + 0.125 * uv*uv );
171 dn_pks = 1.0 - exp ( -0.5 * dv ) * ( 1.0 + 0.5 * dv + 0.125 * dv*dv );
172
173 MYDEBUG(D_DIEHARD_RUNS) {
174 printf("uv = %f dv = %f\n",uv,dv);
175 }
176 test[0]->pvalues[irun] = gsl_sf_gamma_inc_Q(3.0,uv/2.0);
177 test[1]->pvalues[irun] = gsl_sf_gamma_inc_Q(3.0,dv/2.0);
178
179 MYDEBUG(D_DIEHARD_RUNS) {
180 printf("# diehard_runs(): test[0]->pvalues[%u] = %10.5f\n",irun,test[0]->pvalues[irun]);
181 printf("# diehard_runs(): test[1]->pvalues[%u] = %10.5f\n",irun,test[1]->pvalues[irun]);
182 }
183
184 free(runs_rand);
185
186 return(0);
187
188 }
189
190