1 /*
2 * ========================================================================
3 * See copyright in copyright.h and the accompanying file COPYING
4 * ========================================================================
5 */
6
7 /*
8 *========================================================================
9 * This is the Diehard Count a Stream of 1's test, rewritten from the
10 * description in tests.txt on George Marsaglia's diehard site.
11 *
12 * This is the COUNT-THE-1's TEST for specific bytes. ::
13 * Consider the file under test as a stream of 32-bit integers. ::
14 * From each integer, a specific byte is chosen , say the left- ::
15 * most:: bits 1 to 8. Each byte can contain from 0 to 8 1's, ::
16 * with probabilitie 1,8,28,56,70,56,28,8,1 over 256. Now let ::
17 * the specified bytes from successive integers provide a string ::
18 * of (overlapping) 5-letter words, each "letter" taking values ::
19 * A,B,C,D,E. The letters are determined by the number of 1's, ::
20 * in that byte:: 0,1,or 2 ---> A, 3 ---> B, 4 ---> C, 5 ---> D,::
21 * and 6,7 or 8 ---> E. Thus we have a monkey at a typewriter ::
22 * hitting five keys with with various probabilities:: 37,56,70,::
23 * 56,37 over 256. There are 5^5 possible 5-letter words, and ::
24 * from a string of 256,000 (overlapping) 5-letter words, counts ::
25 * are made on the frequencies for each word. The quadratic form ::
26 * in the weak inverse of the covariance matrix of the cell ::
27 * counts provides a chisquare test:: Q5-Q4, the difference of ::
28 * the naive Pearson sums of (OBS-EXP)^2/EXP on counts for 5- ::
29 * and 4-letter cell counts. ::
30 *
31 * Comment
32 *
33 * For the less statistically trained amongst us, the translation
34 * of the above is:
35 * Generate a string of base-5 digits derived as described from
36 * specific (randomly chosen) byte offsets in a random integer.
37 * Turn four and five digit base 5 numbers (created from these digits)
38 * into indices and incrementally count the frequency of occurrence of
39 * each index.
40 * Compute the expected value of these counts given tsamples samples,
41 * and thereby (from the vector of actual vs expected counts) generate
42 * a chisq for 4 and 5 digit numbers separately. These chisq's are
43 * expected, for a large number of trials, to be equal to the number of
44 * degrees of freedom of the vectors, (5^5 - 1) or (5^4 - 1). Generate
45 * the mean DIFFERENCE = 2500 as the expected value of the difference
46 * chisq_5 - chisq_4 and compute a pvalue based on this expected value and
47 * the associated standard deviation sqrt(2*2500).
48 *
49 * Note: The byte offset is systematically cycled PER tsample, so enough
50 * tsamples gives one a reasonable chance of discovering "bad" offsets
51 * in any exist. How many is enough? Difficult to say a priori -- it
52 * depends on how bit the failure is and how many offsets create a
53 * failure. Play around with it.
54 *
55 * Note also that the code itself is a bit simpler than the stream
56 * version (no need to worry about e.g. overlap or modulus 4/5 when
57 * getting the next int/byte) but that it generates 4x as many rands
58 * as non-overlapping stream: tsamples*psamples*5, basically.
59 *
60 * This test is actually LESS stringent than the stream version overall,
61 * and is much closer to rgb_bitdist. In fact, if rgb_bitdist for 8 bits
62 * fails the diehard_count_1s_byte test MUST fail as it too samples all
63 * the different offsets systematically. However, as before, MOST failures
64 * at 8 bits are derived from failures at smaller numbers of bits (this
65 * is an assertion that can be made precise in terms of contributing
66 * permutations) and again, the test is vastly less sensitive than
67 * rgb_bitdist and is less senstive that diehard_count_1s_stream EXCEPT
68 * that it samples more rands and might reveal problems with specific
69 * offsets ignored by the stream test. Honestly, I could fix the stream
70 * test to cycle through the possible bitlevel offsets and make this
71 * test completely obsolete.
72 *========================================================================
73 */
74
75
76 #include <dieharder/libdieharder.h>
77
78 /*
79 * Include inline uint generator
80 */
81 #include "static_get_bits.c"
82
83 /*
84 * This table was generated using the following code fragment.
85 {
86 char table[256];
87 table[i] = 0;
88 for(j=0;j<8*sizeof(uint);j++){
89 table[i] += get_int_bit(i,j);
90 }
91 switch(table[i]){
92 case 0:
93 case 1:
94 case 2:
95 table[i] = 0;
96 break;
97 case 3:
98 table[i] = 1;
99 break;
100 case 4:
101 table[i] = 2;
102 break;
103 case 5:
104 table[i] = 3;
105 break;
106 case 6:
107 case 7:
108 case 8:
109 table[i] = 4;
110 break;
111 default:
112 fprintf(stderr,"Hahahahah\n");
113 exit(0);
114 break;
115 }
116 }
117 */
118
119 char b5b[] = {
120 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2,
121 0, 0, 0, 1, 0, 1, 1, 2, 0, 1, 1, 2, 1, 2, 2, 3,
122 0, 0, 0, 1, 0, 1, 1, 2, 0, 1, 1, 2, 1, 2, 2, 3,
123 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
124 0, 0, 0, 1, 0, 1, 1, 2, 0, 1, 1, 2, 1, 2, 2, 3,
125 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
126 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
127 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 4,
128 0, 0, 0, 1, 0, 1, 1, 2, 0, 1, 1, 2, 1, 2, 2, 3,
129 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
130 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
131 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 4,
132 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
133 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 4,
134 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 4,
135 2, 3, 3, 4, 3, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4};
136
137 /*
138 * The following is needed to generate the test statistic
139 *
140 * Vector of probabilities for each integer.
141 * 37.0/256.0,56.0/256.0,70.0/256.0,56.0/256.0,37.0/256.0
142 */
143 const double pb[]={
144 0.144531250,
145 0.218750000,
146 0.273437500,
147 0.218750000,
148 0.144531250};
149
150
151
152 /*
153 * Useful macro for building 4 and 5 digit base 5 numbers from
154 * base 5 digits
155 */
156 #define LSHIFT5(old,new) (old*5 + new)
157
diehard_count_1s_byte(Test ** test,int irun)158 int diehard_count_1s_byte(Test **test, int irun)
159 {
160
161 uint i,j,k,index5=0,index4,letter,t;
162 uint boffset;
163 Vtest vtest4,vtest5;
164 Xtest ptest;
165
166 /*
167 * count_1s in specific bytes is straightforward after looking over
168 * count_1s in a byte. The statistic is identical; we just have to
169 * cycle the offset of the bytes selected and generate 1 random uint
170 * per digit.
171 */
172
173 /*
174 * I'm leaving this in so the chronically bored can validate that
175 * the table exists and is correctly loaded and addressable.
176 */
177 if(verbose == -1){
178 for(i=0;i<256;i++){
179 printf("%u, ",b5b[i]);
180 /* dumpbits(&i,8); */
181 if((i+1)%16 == 0){
182 printf("\n");
183 }
184 }
185 exit(0);
186 }
187
188 /*
189 * for display only. 0 means "ignored".
190 */
191 test[0]->ntuple = 0;
192
193 /*
194 * This is basically a pair of parallel vtests, with a final test
195 * statistic generated from their difference (somehow). We therefore
196 * create two vtests, one for four digit base 5 integers and one for
197 * five digit base 5 integers, and generate their expected values for
198 * test[0]->tsamples trials.
199 */
200
201 ptest.y = 2500.0;
202 ptest.sigma = sqrt(5000.0);
203
204 Vtest_create(&vtest4,625);
205 vtest4.cutoff = 5.0;
206 for(i=0;i<625;i++){
207 j = i;
208 vtest4.y[i] = test[0]->tsamples;
209 vtest4.x[i] = 0.0;
210 /*
211 * Digitize base 5, compute expected value for THIS integer i.
212 */
213 /* printf("%u: ",j); */
214 for(k=0;k<4;k++){
215 /*
216 * Take the least significant "letter" of j in range 0-4
217 */
218 letter = j%5;
219 /*
220 * multiply by the probability of getting this letter
221 */
222 vtest4.y[i] *= pb[letter];
223 /*
224 * Right shift j to get next digit.
225 */
226 /* printf("%1u",letter); */
227 j /= 5;
228 }
229 /* printf(" = %f\n",vtest4.y[i]); */
230 }
231
232 Vtest_create(&vtest5,3125);
233 vtest5.cutoff = 5.0;
234 for(i=0;i<3125;i++){
235 j = i;
236 vtest5.y[i] = test[0]->tsamples;
237 vtest5.x[i] = 0.0;
238 /*
239 * Digitize base 5, compute expected value for THIS integer i.
240 */
241 for(k=0;k<5;k++){
242 /*
243 * Take the least significant "letter" of j in range 0-4
244 */
245 letter = j%5;
246 /*
247 * multiply by the probability of getting this letter
248 */
249 vtest5.y[i] *= pb[letter];
250 /*
251 * Right shift j to get next digit.
252 */
253 j /= 5;
254 }
255 }
256
257 /*
258 * Here is the test. We cycle boffset through test[0]->tsamples
259 */
260 boffset = 0;
261 for(t=0;t<test[0]->tsamples;t++){
262
263 boffset = t%32; /* Remember that get_bit_ntuple periodic wraps the uint */
264 /*
265 * Get the next five bytes and make an index5 out of them, no
266 * overlap.
267 */
268 for(k=0;k<5;k++){
269 i = get_rand_bits_uint(32, 0xFFFFFFFF, rng);
270 if(verbose == D_DIEHARD_COUNT_1S_STREAM || verbose == D_ALL){
271 dumpbits(&i,32);
272 }
273 /*
274 * get next byte from the last rand we generated.
275 * Bauer fix -
276 * Cruft: j = get_bit_ntuple_from_uint(i,8,0x000000FF,boffset);
277 */
278 j = get_bit_ntuple_from_whole_uint(i,8,0x000000FF,boffset);
279 index5 = LSHIFT5(index5,b5b[j]);
280 if(verbose == D_DIEHARD_COUNT_1S_STREAM || verbose == D_ALL){
281 printf("b5b[%u] = %u, index5 = %u\n",j,b5b[j],index5);
282 dumpbits(&j,8);
283 }
284 }
285 /*
286 * We use modulus to throw away the sixth digit in the left-shifted
287 * base 5 index value, keeping the value of the 5-digit base 5 number
288 * in the range 0 to 5^5-1 or 0 to 3124 decimal. We repeat for the
289 * four digit index. At this point we increment the counts for index4
290 * and index5. Tres simple, no?
291 */
292 index5 = index5%3125;
293 index4 = index5%625;
294 vtest4.x[index4]++;
295 vtest5.x[index5]++;
296 }
297 /*
298 * OK, all that is left now is to figure out the statistic.
299 */
300 if(verbose == D_DIEHARD_COUNT_1S_BYTE || verbose == D_ALL){
301 for(i = 0;i<625;i++){
302 printf("%u: %f %f\n",i,vtest4.y[i],vtest4.x[i]);
303 }
304 for(i = 0;i<3125;i++){
305 printf("%u: %f %f\n",i,vtest5.y[i],vtest5.x[i]);
306 }
307 }
308 Vtest_eval(&vtest4);
309 Vtest_eval(&vtest5);
310 if(verbose == D_DIEHARD_COUNT_1S_BYTE || verbose == D_ALL){
311 printf("vtest4.chisq = %f vtest5.chisq = %f\n",vtest4.chisq,vtest5.chisq);
312 }
313 ptest.x = vtest5.chisq - vtest4.chisq;
314
315 Xtest_eval(&ptest);
316 test[0]->pvalues[irun] = ptest.pvalue;
317
318 MYDEBUG(D_DIEHARD_COUNT_1S_BYTE) {
319 printf("# diehard_count_1s_byte(): test[0]->pvalues[%u] = %10.5f\n",irun,test[0]->pvalues[irun]);
320 }
321
322
323 Vtest_destroy(&vtest4);
324 Vtest_destroy(&vtest5);
325
326 return(0);
327 }
328
329