1 /*
2  * See copyright in copyright.h and the accompanying file COPYING
3  */
4 
5 /*
6  *========================================================================
7  * This is the Diehard Runs test, rewritten from the description
8  * in tests.txt on George Marsaglia's diehard site.
9  *
10  * Here is the test description from diehard_tests.txt:
11  *
12  * This is the RUNS test. It counts runs up, and runs down,in a sequence
13  * of uniform [0,1) variables, obtained by floating the 32-bit integers
14  * in the specified file. This example shows how runs are counted:
15  *  .123, .357, .789, .425,. 224, .416, .95
16  * contains an up-run of length 3, a down-run of length 2 and an up-run
17  * of (at least) 2, depending on the next values.  The covariance matrices
18  * for the runs-up and runs-down are well-known, leading to chisquare tests
19  * for quadratic forms in the weak inverses of the covariance matrices.
20  * Runs are counted for sequences of length 10,000.  This is done ten times,
21  * then repeated.
22  *
23  *                            Comment
24  *
25  * I modify this the following ways. First, I let the sequence length be
26  * the variable -t (vector length) instead of fixing it at 10,000.  This
27  * lets one test sequences that are much longer (entirely possible with
28  * a modern CPU even for a fairly slow RNG).  Second, I repeat this for
29  * the variable -s (samples) times, default 100 and not just 10.  Third,
30  * because RNG's often have "bad seeds" for which they misbehave, the
31  * individual sequences can be optionally -i reseeded for each sample.
32  * Because this CAN let bad behavior be averaged out to where
33  * it isn't apparent for many samples with few bad seeds, we may need to
34  * plot the actual distribution of p-values for this and other tests where
35  * this option is used.  Fourth, it is silly to convert integers into floats
36  * in order to do this test.  Up sequences in integers are down sequences in
37  * floats once one divides by the largest integer available to the generator,
38  * period. Integer arithmetic is much faster than float AND one skips the
39  * very costly division associated with conversion.
40  * *========================================================================
41  */
42 
43 
44 #include <dieharder/libdieharder.h>
45 /*
46  * The following are the definitions and parameters for runs, based on
47  * Journal of Applied Statistics v30, Algorithm AS 157, 1981:
48  *    The Runs-Up and Runs-Down Tests, by R. G. T. Grafton.
49  * (and before that Knuth's The Art of Programming v. 2).
50  */
51 
52 #define RUN_MAX 6
53 
54 /*
55  * a_ij
56  */
57 static double a[6][6] = {
58  { 4529.4,   9044.9,  13568.0,   18091.0,   22615.0,   27892.0},
59  { 9044.9,  18097.0,  27139.0,   36187.0,   45234.0,   55789.0},
60  {13568.0,  27139.0,  40721.0,   54281.0,   67852.0,   83685.0},
61  {18091.0,  36187.0,  54281.0,   72414.0,   90470.0,  111580.0},
62  {22615.0,  45234.0,  67852.0,   90470.0,  113262.0,  139476.0},
63  {27892.0,  55789.0,  83685.0,  111580.0,  139476.0,  172860.0}
64 };
65 
66 /*
67  * b_i
68  */
69 static double b[6] = {
70  1.0/6.0,
71  5.0/24.0,
72  11.0/120.0,
73  19.0/720.0,
74  29.0/5040.0,
75  1.0/840.0,};
76 
77 uint *runs_rand;
78 
diehard_runs(Test ** test,int irun)79 int diehard_runs(Test **test, int irun)
80 {
81 
82  int i,j,k,t,ns;
83  unsigned int ucount,dcount,increased;
84  int upruns[RUN_MAX],downruns[RUN_MAX];
85  double uv,dv,up_pks,dn_pks;
86  double *uv_pvalue,*dv_pvalue;
87 
88  runs_rand = (uint *)malloc(test[0]->tsamples*sizeof(uint));
89  /*
90   * This is just for display.
91   */
92  test[0]->ntuple = 0;
93  test[1]->ntuple = 0;
94 
95  /*
96   * Clear up and down run bins
97   */
98  for(k=0;k<RUN_MAX;k++){
99    upruns[k] = 0;
100    downruns[k] = 0;
101  }
102 
103  /*
104   * Now count up and down runs and increment the bins.  Note
105   * that each successive up counts as a run of one down, and
106   * each successive down counts as a run of one up.
107   */
108  ucount = dcount = 1;
109  if(verbose){
110    printf("j    rand    ucount  dcount\n");
111  }
112  runs_rand[0] = gsl_rng_get(rng);
113  for(t=1;t<test[0]->tsamples;t++) {
114    runs_rand[t] = gsl_rng_get(rng);
115    if(verbose){
116      printf("%d:  %10u   %u    %u\n",t,runs_rand[t],ucount,dcount);
117    }
118 
119    /*
120     * Did we increase?
121     */
122    if(runs_rand[t] > runs_rand[t-1]){
123      ucount++;
124      if(ucount > RUN_MAX) ucount = RUN_MAX;
125      downruns[dcount-1]++;
126      dcount = 1;
127    } else {
128      dcount++;
129      if(dcount > RUN_MAX) dcount = RUN_MAX;
130      upruns[ucount-1]++;
131      ucount = 1;
132    }
133  }
134  if(runs_rand[test[0]->tsamples-1] > runs_rand[0]){
135    ucount++;
136    if(ucount > RUN_MAX) ucount = RUN_MAX;
137    downruns[dcount-1]++;
138    dcount = 1;
139  } else {
140    dcount++;
141    if(dcount > RUN_MAX) dcount = RUN_MAX;
142    upruns[ucount-1]++;
143    ucount = 1;
144  }
145 
146  /*
147   * This ends a single sample.
148   * Compute the test statistic for up and down runs.
149   */
150  uv=0.0;
151  dv=0.0;
152  if(verbose){
153    printf(" i      upruns    downruns\n");
154  }
155  for(i=0;i<RUN_MAX;i++) {
156    if(verbose){
157      printf("%d:   %7d   %7d\n",i,upruns[i],downruns[i]);
158    }
159    for(j=0;j<RUN_MAX;j++) {
160      uv += ((double)upruns[i]   - test[0]->tsamples*b[i])*(upruns[j]   - test[0]->tsamples*b[j])*a[i][j];
161      dv += ((double)downruns[i] - test[0]->tsamples*b[i])*(downruns[j] - test[0]->tsamples*b[j])*a[i][j];
162    }
163  }
164  uv /= (double)test[0]->tsamples;
165  dv /= (double)test[0]->tsamples;
166 
167  /*
168   * This NEEDS WORK!  It isn't right, somehow...
169   */
170  up_pks = 1.0 - exp ( -0.5 * uv ) * ( 1.0 + 0.5 * uv + 0.125 * uv*uv );
171  dn_pks = 1.0 - exp ( -0.5 * dv ) * ( 1.0 + 0.5 * dv + 0.125 * dv*dv );
172 
173  MYDEBUG(D_DIEHARD_RUNS) {
174    printf("uv = %f   dv = %f\n",uv,dv);
175  }
176  test[0]->pvalues[irun] = gsl_sf_gamma_inc_Q(3.0,uv/2.0);
177  test[1]->pvalues[irun] = gsl_sf_gamma_inc_Q(3.0,dv/2.0);
178 
179  MYDEBUG(D_DIEHARD_RUNS) {
180    printf("# diehard_runs(): test[0]->pvalues[%u] = %10.5f\n",irun,test[0]->pvalues[irun]);
181    printf("# diehard_runs(): test[1]->pvalues[%u] = %10.5f\n",irun,test[1]->pvalues[irun]);
182  }
183 
184  free(runs_rand);
185 
186  return(0);
187 
188 }
189 
190