1 /*
2 * ========================================================================
3 * See copyright in copyright.h and the accompanying file COPYING
4 * ========================================================================
5 */
6
7 /*
8 *========================================================================
9 * This is the Diehard OPERM5 test, rewritten from the description
10 * in tests.txt on George Marsaglia's diehard site.
11 *
12 * THE OVERLAPPING 5-PERMUTATION TEST ::
13 * This is the OPERM5 test. It looks at a sequence of one mill- ::
14 * ion 32-bit random integers. Each set of five consecutive ::
15 * integers can be in one of 120 states, for the 5! possible or- ::
16 * derings of five numbers. Thus the 5th, 6th, 7th,...numbers ::
17 * each provide a state. As many thousands of state transitions ::
18 * are observed, cumulative counts are made of the number of ::
19 * occurences of each state. Then the quadratic form in the ::
20 * weak inverse of the 120x120 covariance matrix yields a test ::
21 * equivalent to the likelihood ratio test that the 120 cell ::
22 * counts came from the specified (asymptotically) normal dis- ::
23 * tribution with the specified 120x120 covariance matrix (with ::
24 * rank 99). This version uses 1,000,000 integers, twice. ::
25 *
26 * Note -- the original diehard test almost certainly had errors,
27 * as did the documentation. For example, the actual rank is
28 * 5!-4!=96, not 99. The original dieharder version validated
29 * against the c port of dieharder to give the same answers from
30 * the same data, but failed gold-standard generators such as AES
31 * or the XOR supergenerator with AES and several other top rank
32 * generators. Frustration with trying to fix the test with very
33 * little useful documentation caused me to eventually write
34 * the rgb permutations test, which uses non-overlapping samples
35 * (and hence avoids the covariance problem altogether) and can
36 * be used for permutations of other than 5 integers. I was able
37 * to compute the covariance matrix for the problem, but was unable
38 * break it down into the combination of R, S and map that Marsaglia
39 * used, and I wanted to (if possible) use the GSL permutations
40 * routines to count/index the permutations, which yield a different
41 * permutation index from Marsaglia's (adding to the problem).
42 *
43 * Fortunately, Stephen Moenkehues (moenkehues@googlemail.com) was
44 * bored and listless and annoyed all at the same time while using
45 * dieharder to test his SWIFFTX rng, a SHA3-candidate and fixed
46 * diehard_operm5. His fix avoids the R, S and map -- he too went
47 * the route of directly computing the correlation matrix but he
48 * figured out how to transform the correlation matrix plus the
49 * counts from a run directly into the desired statistic (a thing
50 * that frustrated me in my own previous attempts) and now it works!
51 * He even made it work (correctly) in overlapping and non-overlapping
52 * versions, so one can invoke dieharder with the -L 1 option and run
53 * what should be the moral equivalent of the rgb permutation test at
54 * -n 5!
55 *
56 * So >>thank you<< Stephen! Thank you Open Source development
57 * process! Thank you Ifni, Goddess of Luck and Numbers! And anybody
58 * who wants to tackle the remaining diehard "problem" tests, (sums in
59 * particular) should feel free to play through...
60 *========================================================================
61 */
62
63
64 #include <dieharder/libdieharder.h>
65
66 static int tflag=0;
67 static double tcount[120];
68
69 /*
70 * kperm computes the permutation number of a vector of five integers
71 * passed to it.
72 */
kperm(uint v[],uint voffset)73 int kperm(uint v[],uint voffset)
74 {
75
76 int i,j,k,max;
77 int w[5];
78 int pindex,uret,tmp;
79
80 /*
81 * work on a copy of v, not v itself in case we are using
82 * overlapping 5-patterns.
83 */
84 for(i=0;i<5;i++){
85 j = (i+voffset)%5;
86 w[i] = v[j];
87 }
88
89 if(verbose == -1){
90 printf("==================================================================\n");
91 printf("%10u %10u %10u %10u %10u\n",w[0],w[1],w[2],w[3],w[4]);
92 printf(" Permutations = \n");
93 }
94
95 pindex = 0;
96 for(i=4;i>0;i--){
97 max = w[0];
98 k = 0;
99 for(j=1;j<=i;j++){
100 if(max <= w[j]){
101 max = w[j];
102 k = j;
103 }
104 }
105 pindex = (i+1)*pindex + k;
106 tmp = w[i];
107 w[i] = w[k];
108 w[k] = tmp;
109 if(verbose == -1){
110 printf("%10u %10u %10u %10u %10u\n",w[0],w[1],w[2],w[3],w[4]);
111 }
112 }
113
114 uret = pindex;
115
116 if(verbose == -1){
117 printf(" => %u\n",pindex);
118 }
119
120 return uret;
121
122 }
123
diehard_operm5(Test ** test,int irun)124 int diehard_operm5(Test **test, int irun)
125 {
126
127 int i,j,kp,t,vind;
128 uint v[5];
129 double count[120];
130 double av,norm,x[120],chisq,ndof;
131
132 /*
133 * Zero count vector, was t(120) in diehard.f90.
134 */
135 for(i=0;i<120;i++) {
136 count[i] = 0.0;
137 if(tflag == 0){
138 tcount[i] = 0.0;
139 tflag = 1;
140 }
141 }
142
143 if(overlap){
144 for(i=0;i<5;i++){
145 v[i] = gsl_rng_get(rng);
146 }
147 vind = 0;
148 } else {
149 for(i=0;i<5;i++){
150 v[i] = gsl_rng_get(rng);
151 }
152 }
153
154 for(t=0;t<test[0]->tsamples;t++){
155
156 /*
157 * OK, now we are ready to generate a list of permutation indices.
158 * Basically, we take a vector of 5 integers and transform it into a
159 * number with the kperm function. We will use the overlap flag to
160 * determine whether or not to refill the entire v vector or just
161 * rotate bytes.
162 */
163 if(overlap){
164 kp = kperm(v,vind);
165 count[kp] += 1;
166 v[vind] = gsl_rng_get(rng);
167 vind = (vind+1)%5;
168 } else {
169 for(i=0;i<5;i++){
170 v[i] = gsl_rng_get(rng);
171 }
172 kp = kperm(v,0);
173 count[kp] += 1;
174 }
175 }
176
177 for(i=0;i<120;i++){
178 tcount[i] += count[i];
179 /* printf("%u: %f\n",i,tcount[i]); */
180 }
181
182 chisq = 0.0;
183 av = test[0]->tsamples/120.0;
184 norm = test[0]->tsamples; // this belongs to the pseudoinverse
185 /*
186 * The pseudoinverse P of the covariancematrix C is computed for n = 1.
187 * If n = 100 the new covariancematrix is C_100 = 100*C. Therefore the
188 * new pseudoinverse is P_100 = (1/100)*P. You can see this from the
189 * equation C*P*C = C
190 */
191
192 if(overlap==0){
193 norm = av;
194 }
195 for(i=0;i<120;i++){
196 x[i] = count[i] - av;
197 }
198
199 if(overlap){
200 for(i=0;i<120;i++){
201 for(j=0;j<120;j++){
202 chisq = chisq + x[i]*pseudoInv[i][j]*x[j];
203 }
204 }
205 }
206
207 if(overlap==0){
208 for(i=0;i<120;i++){
209 chisq = chisq + x[i]*x[i];
210 }
211 }
212
213 if(verbose == -2){
214 printf("norm = %10.2f, av = %10.2f",norm,av);
215 for(i=0;i<120;i++){
216 printf("count[%u] = %4.0f; x[%u] = %3.2f ",i,count[i],i,x[i]);
217 if((i%2)==0){printf("\n");}
218 }
219 if((chisq/norm) >= 0){
220 printf("\n\nchisq/norm: %10.5f :-) and chisq: %10.5f\n",(chisq/norm), chisq);
221 }
222 }
223
224 if((chisq/norm) < 0){
225 printf("\n\nCHISQ NEG.! chisq/norm: %10.5f and chisq: %10.5f",(chisq/norm), chisq);
226 }
227
228 chisq = fabs(chisq / norm);
229
230 ndof = 96; /* the rank of the covariancematrix and the pseudoinverse */
231 if(overlap == 0){
232 ndof = 120-1;
233 }
234
235 MYDEBUG(D_DIEHARD_OPERM5){
236 printf("# diehard_operm5(): chisq[%u] = %10.5f\n",kspi,chisq);
237 }
238
239 test[0]->pvalues[irun] = gsl_sf_gamma_inc_Q((double)(ndof)/2.0,chisq/2.0);
240
241 MYDEBUG(D_DIEHARD_OPERM5){
242 printf("# diehard_operm5(): test[0]->pvalues[%u] = %10.5f\n",irun,test[0]->pvalues[irun]);
243 }
244
245 kspi++;
246
247 return(0);
248
249 }
250
251