1 /*
2 * See copyright in copyright.h and the accompanying file COPYING
3 */
4
5 /*
6 *========================================================================
7 * This is a test that checks to see if the rng generates bit patterns
8 * (n-tuples) that are distributed correctly (binomially). For example,
9 * for 2-tuples (bit pairs) there are four possibilities: 00, 01, 10, 11.
10 * Each should occur with a probability of 1/4, hence the count of (say) K
11 * 00 bitpairs out of N trials should be distributed (over M samples)
12 * according to the binomial probability p = binomial(N,K,0.25) -- that
13 * is, the expected count for k 00's is M*p(N,K,0.25).
14 *
15 * This test should be more sensitive than just doing a large number of
16 * trials and aggregating all the 00's over all the samples and comparing
17 * the result to the expected mean, as there can be distributions that
18 * have the expected mean of 0.25 that are >>not<< binomially distributed.
19 *
20 * By making a single program capable of doing any n-tuple, we gain
21 * immediate benefit. Both STS and Diehard, for example, contain
22 * tests that validate bit distribution frequencies; however, they typically
23 * do so for specific n-tuples, e.g. 5 in several diehard tests. There
24 * is nothing terribly special about 5, and no point in looking too
25 * hard at e.g. intervals between patterns at 5, since testing pure
26 * frequency at 6 bits simultaneously ensures that all 6 bit patterns have
27 * the correct frequency (since they are sub-patterns of 6 bit patterns
28 * that have the correct frequency, and if they had the INcorrect frequency
29 * the six-bit patterns would as well by inheritance) and (less obviously,
30 * but I'm pretty sure I can prove it) that intervals between the five
31 * bit patterns must also be correct IF the test is satisfied robustly
32 * for various arbitrary test string lengths and sampling counts.
33 *
34 * Anyway, I'm giving this one a shot as it may end up being a one-size
35 * fits all tool for validating bitlevel randomness for any generator,
36 * up to some n-tuple. I expect that 2004/2005 computers will easily
37 * be able to validate through n=8 without really warming up or taking
38 * terribly long (validating the frequency and distribution for e.g.
39 * the entire ascii alphabet at the byte level) and MAY be able to do
40 * actual frequency validation on 16-bit integers. Note that it only takes
41 * a few seconds to generate millions of 32 bit integers with most of
42 * the GSL routines. To get statistically valid results, one needs to
43 * accumulate order of 100 trials per outcome, or test order of millions
44 * of integers and accumulate the results in an outcome vector 64K long.
45 * both of these are well within reach using at most hours of CPU time,
46 * and partitioned on a beowulf might take only minutes or even seconds.
47 *
48 * The latest version of the actual test (below) tests only ONE ntuple,
49 * the value set in the global variable sts_serial_ntuple which must
50 * be a positive integer. The calling program is responsible for e.g.
51 * testing a range of ntuples.
52 *========================================================================
53 */
54
55 /*
56 * Overlapping Test
57 *==================================================================
58 * We fill input with inlen uints. The test rules are that
59 * nb < |log_2 (inlen*32) | - 2. Or inverting (which is
60 * more useful) inlen = 2^(nb+2)/sizeof(uint). For example, if
61 * nb=8, inlen = 1024/32 = 32. This is obviously very convervative.
62 * Even if nb=16 (testing 65K number) it is only 2^18/2^5 = 2^13 or
63 * around 8 MB of uints.
64 *
65 * To make this test fast, we should very likely just use
66 * inlen = 2^nb UINTS, which is 8x the minimum recommended and easy
67 * to compute, e.g. for nb = 16 we have input[65536], containing
68 * 2^24 = 16 Mbits. We then allocate one extra uint at the end,
69 * fill it, copy the 0 uint to the end for periodic wrap, and
70 * simply rip a uint window along it with the inline shift trick
71 * to generate the overlapping samples. The code we develop can
72 * likely be backported to bits.c as being a bit more efficient.
73 */
74
75 #include <dieharder/libdieharder.h>
76
77 #include "static_get_bits.c"
78
79 /*
80 * This is a buffer of uint length (2^nb)+1 that we will fill with rands
81 * to be tested, once per test.
82 */
83
sts_serial(Test ** test,int irun)84 int sts_serial(Test **test,int irun)
85 {
86
87 uint bsize; /* number of bits/samples in uintbuf */
88 uint nb,nb1; /* number of bits in a tested ntuple */
89 uint value; /* value of sampled ntuple (as a uint) */
90 uint mask; /* mask in only nb bits */
91 uint bi; /* bit offset relative to window */
92
93 /* Look for cruft below */
94
95 uint i,j,m,t; /* generic loop indices */
96 uint ctotal; /* count of any ntuple per bitstring */
97 double **freq,*psi2,*delpsi2,*del2psi2;
98 double pvalue;
99 uint *uintbuf;
100 uint window; /* uint window into uintbuf, slide along a byte at at time. */
101
102 double mono_mean,mono_sigma; /* For single bit test */
103
104 /*
105 * Sample a bitstring of rgb_bitstring_ntuple in length (exactly). Note
106 * that I'm going to force nb>=2, nb<=16 for the moment. The routine
107 * might "work" for nb up to 24 or even (on a really big memory machine)
108 * 32, but memory requirements in uints are around 2^(nb+1) + n where n
109 * is order unity, maybe worse, so I think it would take a 12 to 16 GB
110 * machine to be able to just hold the memory, and then it would take a
111 * very long time. However, the testing process might parallelize
112 * decently if we could farm out the blocks of bits to be run efficiently
113 * over a network relative to the time required to generate them.
114 */
115 nb = 16; /* Just ignore sts_serial_ntuple for now */
116 tsamples = test[0]->tsamples; /* ditto */
117 MYDEBUG(D_STS_SERIAL){
118 printf("#==================================================================\n");
119 printf("# Starting sts_serial.\n");
120 printf("# sts_serial: Testing ntuple = %u\n",nb);
121 }
122
123 /*
124 * This is useful as a limit here and there as we have things that range
125 * from 0 to nb INclusive (unless/until we fix the mallocs up a bit).
126 */
127 nb1 = nb+1;
128
129 /*
130 * We need a vector of freq vectors. We make each frequency counter
131 * vector only as large as it needs to be. We zero each accumulator as
132 * we make it. Note that we cannot sample "0 bits at a time" so m starts
133 * at 1.
134 */
135 freq = (double **) malloc(nb1*sizeof(double *));
136 for(m = 1;m < nb1;m++) {
137 freq[m] = (double *)malloc(pow(2,m)*sizeof(double));
138 memset(freq[m],0,pow(2,m)*sizeof(double));
139 }
140
141 /*
142 * These are the statistics required by sts_serial in SP800. psi2[m] is
143 * basically the sum of the squares of the bitpattern frequences for a
144 * given number of bits m. delpsi2 is the difference between psi2[m] and
145 * psi2[m-1]. del2psi2 is the difference of delpsi2[m] and delpsi2[m-1].
146 * We zero only need to zero psi2, as the other two are set directly.
147 *
148 * Note that we can only generate psi2 for m>0, delpsi2 for m>1, delpsi3
149 * for m>2. We can therefore only generate both test statistics (based
150 * on delpsi2 and del2psi2) for m>=3 to m=16. What we CAN do for m=1
151 * is generate a straight normal p-value on a binomial probability for
152 * e.g. average number of 1's, basically sts_monobit. That would give
153 * us one test for m = 1, one test for m = 2, and two tests each for
154 * m = [3,16].
155 */
156 psi2 = (double *) malloc(nb1*sizeof(double));
157 delpsi2 = (double *) malloc(nb1*sizeof(double));
158 del2psi2 = (double *) malloc(nb1*sizeof(double));
159 memset(psi2,0,nb1*sizeof(double));
160
161 /*
162 * uintbuf is the one true vector of rands processed during this test.
163 * It needs to be at LEAST 2^16 = 64K uints in length, 2^20 = 1M is much
164 * better. We'll default this from tsamples == 100000, though; it doesn't
165 * have to be divisible by 2 or anything. Note that we add one to tsamples
166 * to permit cyclic wraparound of the overlapping samples, although honestly
167 * this hardly matters in the limit of large tsamples.
168 */
169 uintbuf = (uint *)malloc((tsamples+1)*sizeof(uint));
170
171 /*
172 * If uintbuf[test[0]->tsamples] is allocated (plus one for wraparound)
173 * then we need to count the number of bits, which is the number of
174 * OVERLAPPING samples we will pull.
175 */
176 bsize = tsamples*sizeof(uint)*CHAR_BIT;
177 MYDEBUG(D_STS_SERIAL){
178 printf("# sts_serial(): bsize = %u\n",bsize);
179 }
180
181 /*
182 * We start by filling testbuf with rands and cloning the first into
183 * the last slot for cyclic wrap. This initial effort just uses the
184 * fast gsl_rng_get() call, but we'll need to use the inline from the
185 * static_get_rng routines in production.
186 */
187 for(t=0;t<tsamples;t++){
188 /* A bit slower per call, but won't fail for short rngs */
189 uintbuf[t] = get_rand_bits_uint(32,0xFFFFFFFF,rng);
190 /* Fast, but deadly to rngs with less than 32 bits returned */
191 /* uintbuf[t] = gsl_rng_get(rng); */
192 MYDEBUG(D_STS_SERIAL){
193 printf("# sts_serial(): %u: ",t);
194 dumpuintbits(&uintbuf[t],1);
195 printf("\n");
196 }
197 }
198 uintbuf[tsamples] = uintbuf[0]; /* Periodic wraparound */
199 MYDEBUG(D_STS_SERIAL){
200 printf("# sts_serial(): %u: ",(uint)tsamples);
201 dumpuintbits(&uintbuf[tsamples],1);
202 printf("\n");
203 }
204
205 /*
206 * We now in ONE PASS loop over all of the possible values of m from
207 * 1 to 16.
208 */
209 for(m=1;m<nb1;m++){
210
211 /*
212 * Set the mask for bits to be returned. This will work fine for
213 * m in the allowed range, and will fail if m = 32 if that ever
214 * happens in the future.
215 */
216 mask = 0;
217 for(i = 0; i < m; i++){
218 mask |= (1u << m);
219 }
220 mask = ((1u << m) - 1);
221
222 /* Initialize index and count total */
223 j = 0;
224 ctotal = 0;
225 MYDEBUG(D_STS_SERIAL){
226 printf("looping bsize = %u times\n",bsize);
227 }
228 for(t=0;t<bsize;t++){
229 /*
230 * Offset of current sample, relative to left boundary of window.
231 */
232 if((t%32) == 0){
233 window = uintbuf[j]; /* start with window = testbuf = charbuf */
234 MYDEBUG(D_STS_SERIAL){
235 printf("uint window[%u] = %08x = ",j,window);
236 dumpuintbits(&window,1);
237 printf("\n");
238 }
239 j++;
240 }
241 bi = t%16;
242 value = (window >> (32 - m - bi)) & mask;
243 MYDEBUG(D_STS_SERIAL){
244 dumpbitwin(value,m);
245 printf("\n");
246 }
247 freq[m][value]++;
248 ctotal++;
249 if(bi == 15){
250 window = (window << 16);
251 window += (uintbuf[j] >> 16);
252 MYDEBUG(D_STS_SERIAL){
253 printf("half window[%u] = %08x = ",j,window);
254 dumpuintbits(&window,1);
255 printf("\n");
256 }
257 }
258 }
259
260 MYDEBUG(D_STS_SERIAL){
261 printf("# sts_serial():=====================================================\n");
262 printf("# sts_serial(): Count table\n");
263 printf("# sts_serial():\tbits\tvalue\tcount\tprob\n");
264 for(i = 0; i<pow(2,m); i++){
265 printf("# sts_serial(): ");
266 dumpbitwin(i,m);
267 printf("\t%u\t%f\t%f\n",i,freq[m][i],(double) freq[m][i]/ctotal);
268 }
269 printf("# sts_serial(): Total count = %u, target probability = %f\n",ctotal,1.0/pow(2,m));
270 }
271
272 } /* End of m loop */
273
274 /*
275 * Now it is time to implement the statistic from STS SP800 whatever.
276 */
277 MYDEBUG(D_STS_SERIAL){
278 printf("# sts_serial():=====================================================\n");
279 }
280 for(m=1;m<nb1;m++){
281 for(i=0;i<pow(2,m);i++){
282 psi2[m] += freq[m][i]*freq[m][i];
283 /* printf("freq[%u][%u] = %f p2 = %f \n",m,i,freq[m][i],psi2[m]); */
284 }
285 psi2[m] = pow(2,m)*psi2[m]/bsize - bsize;
286 MYDEBUG(D_STS_SERIAL){
287 printf("# sts_serial(): psi2[%u] = %f\n",m,psi2[m]);
288 }
289 }
290
291 j=0;
292 /*
293 * This is sts_monobit, basically.
294 */
295 mono_mean = (double) 2*freq[1][0] - bsize; /* Should be 0.0 */
296 mono_sigma = sqrt((double)bsize);
297 /* printf("mono mean = %f mono_sigma = %f\n",mono_mean,mono_sigma); */
298 if(irun == 0){
299 test[j]->ntuple = 1;
300 }
301 test[j++]->pvalues[irun] = gsl_cdf_gaussian_P(mono_mean,mono_sigma);
302
303 for(m=2;m<nb1;m++){
304 delpsi2[m] = psi2[m] - psi2[m-1];
305 del2psi2[m] = psi2[m] - 2.0*psi2[m-1] + psi2[m-2];
306 pvalue = gsl_sf_gamma_inc_Q(pow(2,m-2),delpsi2[m]/2.0);
307 if(irun == 0){
308 test[j]->ntuple = m;
309 }
310 test[j++]->pvalues[irun] = pvalue;
311 MYDEBUG(D_STS_SERIAL){
312 printf("pvalue 1[%u] = %f\n",m,pvalue);
313 }
314 if(m>2){
315 pvalue = gsl_sf_gamma_inc_Q(pow(2,m-3),del2psi2[m]/2.0);
316 if(irun == 0){
317 test[j]->ntuple = m;
318 }
319 test[j++]->pvalues[irun] = pvalue;
320 MYDEBUG(D_STS_SERIAL){
321 printf("pvalue 2[%u] = %f\n",m,pvalue);
322 }
323 }
324 }
325
326 free(uintbuf);
327 free(psi2);
328 free(del2psi2);
329 free(delpsi2);
330 for(m = 1;m < nb1;m++){
331 free(freq[m]);
332 }
333 free(freq);
334
335 return(0);
336
337 }
338