1 /*
2  * ========================================================================
3  * See copyright in copyright.h and the accompanying file COPYING
4  * ========================================================================
5  */
6 
7 /*
8  *========================================================================
9  * This is the Diehard OPERM5 test, rewritten from the description
10  * in tests.txt on George Marsaglia's diehard site.
11  *
12  *          THE OVERLAPPING 5-PERMUTATION TEST                   ::
13  * This is the OPERM5 test.  It looks at a sequence of one mill- ::
14  * ion 32-bit random integers.  Each set of five consecutive     ::
15  * integers can be in one of 120 states, for the 5! possible or- ::
16  * derings of five numbers.  Thus the 5th, 6th, 7th,...numbers   ::
17  * each provide a state. As many thousands of state transitions  ::
18  * are observed,  cumulative counts are made of the number of    ::
19  * occurences of each state.  Then the quadratic form in the     ::
20  * weak inverse of the 120x120 covariance matrix yields a test   ::
21  * equivalent to the likelihood ratio test that the 120 cell     ::
22  * counts came from the specified (asymptotically) normal dis-   ::
23  * tribution with the specified 120x120 covariance matrix (with  ::
24  * rank 99).  This version uses 1,000,000 integers, twice.       ::
25  *
26  * Note -- the original diehard test almost certainly had errors,
27  * as did the documentation.  For example, the actual rank is
28  * 5!-4!=96, not 99.  The original dieharder version validated
29  * against the c port of dieharder to give the same answers from
30  * the same data, but failed gold-standard generators such as AES
31  * or the XOR supergenerator with AES and several other top rank
32  * generators.  Frustration with trying to fix the test with very
33  * little useful documentation caused me to eventually write
34  * the rgb permutations test, which uses non-overlapping samples
35  * (and hence avoids the covariance problem altogether) and can
36  * be used for permutations of other than 5 integers.  I was able
37  * to compute the covariance matrix for the problem, but was unable
38  * break it down into the combination of R, S and map that Marsaglia
39  * used, and I wanted to (if possible) use the GSL permutations
40  * routines to count/index the permutations, which yield a different
41  * permutation index from Marsaglia's (adding to the problem).
42  *
43  * Fortunately, Stephen Moenkehues (moenkehues@googlemail.com) was
44  * bored and listless and annoyed all at the same time while using
45  * dieharder to test his SWIFFTX rng, a SHA3-candidate and fixed
46  * diehard_operm5.  His fix avoids the R, S and map -- he too went
47  * the route of directly computing the correlation matrix but he
48  * figured out how to transform the correlation matrix plus the
49  * counts from a run directly into the desired statistic (a thing
50  * that frustrated me in my own previous attempts) and now it works!
51  * He even made it work (correctly) in overlapping and non-overlapping
52  * versions, so one can invoke dieharder with the -L 1 option and run
53  * what should be the moral equivalent of the rgb permutation test at
54  * -n 5!
55  *
56  * So >>thank you<< Stephen!  Thank you Open Source development
57  * process!  Thank you Ifni, Goddess of Luck and Numbers!  And anybody
58  * who wants to tackle the remaining diehard "problem" tests, (sums in
59  * particular) should feel free to play through...
60  *========================================================================
61  */
62 
63 
64 #include <dieharder/libdieharder.h>
65 
66 static int tflag=0;
67 static double tcount[120];
68 
69 /*
70 * kperm computes the permutation number of a vector of five integers
71 * passed to it.
72 */
kperm(uint v[],uint voffset)73 int kperm(uint v[],uint voffset)
74 {
75 
76  int i,j,k,max;
77  int w[5];
78  int pindex,uret,tmp;
79 
80  /*
81   * work on a copy of v, not v itself in case we are using
82   * overlapping 5-patterns.
83   */
84  for(i=0;i<5;i++){
85    j = (i+voffset)%5;
86    w[i] = v[j];
87  }
88 
89  if(verbose == -1){
90    printf("==================================================================\n");
91    printf("%10u %10u %10u %10u %10u\n",w[0],w[1],w[2],w[3],w[4]);
92    printf(" Permutations = \n");
93  }
94 
95  pindex = 0;
96  for(i=4;i>0;i--){
97    max = w[0];
98    k = 0;
99    for(j=1;j<=i;j++){
100      if(max <= w[j]){
101        max = w[j];
102        k = j;
103      }
104    }
105    pindex = (i+1)*pindex + k;
106    tmp = w[i];
107    w[i] = w[k];
108    w[k] = tmp;
109    if(verbose == -1){
110      printf("%10u %10u %10u %10u %10u\n",w[0],w[1],w[2],w[3],w[4]);
111    }
112  }
113 
114  uret = pindex;
115 
116  if(verbose == -1){
117    printf(" => %u\n",pindex);
118  }
119 
120  return uret;
121 
122 }
123 
diehard_operm5(Test ** test,int irun)124 int diehard_operm5(Test **test, int irun)
125 {
126 
127  int i,j,kp,t,vind;
128  uint v[5];
129  double count[120];
130  double av,norm,x[120],chisq,ndof;
131 
132  /*
133   * Zero count vector, was t(120) in diehard.f90.
134   */
135  for(i=0;i<120;i++) {
136    count[i] = 0.0;
137    if(tflag == 0){
138      tcount[i] = 0.0;
139      tflag = 1;
140    }
141  }
142 
143  if(overlap){
144    for(i=0;i<5;i++){
145      v[i] = gsl_rng_get(rng);
146    }
147    vind = 0;
148  } else {
149    for(i=0;i<5;i++){
150      v[i] = gsl_rng_get(rng);
151    }
152  }
153 
154  for(t=0;t<test[0]->tsamples;t++){
155 
156    /*
157     * OK, now we are ready to generate a list of permutation indices.
158     * Basically, we take a vector of 5 integers and transform it into a
159     * number with the kperm function.  We will use the overlap flag to
160     * determine whether or not to refill the entire v vector or just
161     * rotate bytes.
162     */
163   if(overlap){
164     kp = kperm(v,vind);
165     count[kp] += 1;
166     v[vind] = gsl_rng_get(rng);
167     vind = (vind+1)%5;
168   } else {
169     for(i=0;i<5;i++){
170       v[i] = gsl_rng_get(rng);
171     }
172     kp = kperm(v,0);
173     count[kp] += 1;
174   }
175  }
176 
177  for(i=0;i<120;i++){
178    tcount[i] += count[i];
179    /* printf("%u: %f\n",i,tcount[i]); */
180  }
181 
182  chisq = 0.0;
183  av = test[0]->tsamples/120.0;
184  norm = test[0]->tsamples; // this belongs to the pseudoinverse
185  /*
186   * The pseudoinverse P of the covariancematrix C is computed for n = 1.
187   * If n = 100 the new covariancematrix is C_100 = 100*C. Therefore the
188   * new pseudoinverse is P_100 = (1/100)*P.  You can see this from the
189   * equation C*P*C = C
190   */
191 
192  if(overlap==0){
193    norm = av;
194  }
195  for(i=0;i<120;i++){
196    x[i] = count[i] - av;
197  }
198 
199  if(overlap){
200    for(i=0;i<120;i++){
201      for(j=0;j<120;j++){
202        chisq = chisq + x[i]*pseudoInv[i][j]*x[j];
203      }
204    }
205  }
206 
207  if(overlap==0){
208    for(i=0;i<120;i++){
209      chisq = chisq + x[i]*x[i];
210    }
211  }
212 
213  if(verbose == -2){
214    printf("norm = %10.2f, av = %10.2f",norm,av);
215    for(i=0;i<120;i++){
216      printf("count[%u] = %4.0f; x[%u] = %3.2f ",i,count[i],i,x[i]);
217      if((i%2)==0){printf("\n");}
218    }
219    if((chisq/norm) >= 0){
220      printf("\n\nchisq/norm: %10.5f :-) and chisq: %10.5f\n",(chisq/norm), chisq);
221    }
222  }
223 
224  if((chisq/norm) < 0){
225    printf("\n\nCHISQ NEG.! chisq/norm: %10.5f and chisq: %10.5f",(chisq/norm), chisq);
226  }
227 
228  chisq = fabs(chisq / norm);
229 
230  ndof = 96; /* the rank of the covariancematrix and the pseudoinverse */
231  if(overlap == 0){
232    ndof = 120-1;
233  }
234 
235  MYDEBUG(D_DIEHARD_OPERM5){
236    printf("# diehard_operm5(): chisq[%u] = %10.5f\n",kspi,chisq);
237  }
238 
239  test[0]->pvalues[irun] = gsl_sf_gamma_inc_Q((double)(ndof)/2.0,chisq/2.0);
240 
241  MYDEBUG(D_DIEHARD_OPERM5){
242    printf("# diehard_operm5(): test[0]->pvalues[%u] = %10.5f\n",irun,test[0]->pvalues[irun]);
243  }
244 
245  kspi++;
246 
247  return(0);
248 
249 }
250 
251