1 /*
2  * See copyright in copyright.h and the accompanying file COPYING
3  */
4 
5 /*
6  *========================================================================
7  * This is a test that checks to see if the rng generates bit patterns
8  * (n-tuples) that are distributed correctly (binomially).  For example,
9  * for 2-tuples (bit pairs) there are four possibilities: 00, 01, 10, 11.
10  * Each should occur with a probability of 1/4, hence the count of (say) K
11  * 00 bitpairs out of N trials should be distributed (over M samples)
12  * according to the binomial probability p = binomial(N,K,0.25) -- that
13  * is, the expected count for k 00's is M*p(N,K,0.25).
14  *
15  * This test should be more sensitive than just doing a large number of
16  * trials and aggregating all the 00's over all the samples and comparing
17  * the result to the expected mean, as there can be distributions that
18  * have the expected mean of 0.25 that are >>not<< binomially distributed.
19  *
20  * By making a single program capable of doing any n-tuple, we gain
21  * immediate benefit.  Both STS and Diehard, for example, contain
22  * tests that validate bit distribution frequencies; however, they typically
23  * do so for specific n-tuples, e.g. 5 in several diehard tests.  There
24  * is nothing terribly special about 5, and no point in looking too
25  * hard at e.g. intervals between patterns at 5, since testing pure
26  * frequency at 6 bits simultaneously ensures that all 6 bit patterns have
27  * the correct frequency (since they are sub-patterns of 6 bit patterns
28  * that have the correct frequency, and if they had the INcorrect frequency
29  * the six-bit patterns would as well by inheritance) and (less obviously,
30  * but I'm pretty sure I can prove it) that intervals between the five
31  * bit patterns must also be correct IF the test is satisfied robustly
32  * for various arbitrary test string lengths and sampling counts.
33  *
34  * Anyway, I'm giving this one a shot as it may end up being a one-size
35  * fits all tool for validating bitlevel randomness for any generator,
36  * up to some n-tuple.  I expect that 2004/2005 computers will easily
37  * be able to validate through n=8 without really warming up or taking
38  * terribly long (validating the frequency and distribution for e.g.
39  * the entire ascii alphabet at the byte level) and MAY be able to do
40  * actual frequency validation on 16-bit integers.  Note that it only takes
41  * a few seconds to generate millions of 32 bit integers with most of
42  * the GSL routines.  To get statistically valid results, one needs to
43  * accumulate order of 100 trials per outcome, or test order of millions
44  * of integers and accumulate the results in an outcome vector 64K long.
45  * both of these are well within reach using at most hours of CPU time,
46  * and partitioned on a beowulf might take only minutes or even seconds.
47  *
48  * The latest version of the actual test (below) tests only ONE ntuple,
49  * the value set in the global variable sts_serial_ntuple which must
50  * be a positive integer.  The calling program is responsible for e.g.
51  * testing a range of ntuples.
52  *========================================================================
53  */
54 
55 /*
56  *                      Overlapping Test
57  *==================================================================
58  * We fill input with inlen uints. The test rules are that
59  * nb < |log_2 (inlen*32) | - 2.  Or inverting (which is
60  * more useful) inlen = 2^(nb+2)/sizeof(uint).  For example, if
61  * nb=8, inlen = 1024/32 = 32.  This is obviously very convervative.
62  * Even if nb=16 (testing 65K number) it is only 2^18/2^5 = 2^13 or
63  * around 8 MB of uints.
64  *
65  * To make this test fast, we should very likely just use
66  * inlen = 2^nb UINTS, which is 8x the minimum recommended and easy
67  * to compute, e.g. for nb = 16 we have input[65536], containing
68  * 2^24 = 16 Mbits.  We then allocate one extra uint at the end,
69  * fill it, copy the 0 uint to the end for periodic wrap, and
70  * simply rip a uint window along it with the inline shift trick
71  * to generate the overlapping samples.  The code we develop can
72  * likely be backported to bits.c as being a bit more efficient.
73  */
74 
75 #include <dieharder/libdieharder.h>
76 
77 #include "static_get_bits.c"
78 
79 /*
80  * This is a buffer of uint length (2^nb)+1 that we will fill with rands
81  * to be tested, once per test.
82  */
83 
sts_serial(Test ** test,int irun)84 int sts_serial(Test **test,int irun)
85 {
86 
87  uint bsize;       /* number of bits/samples in uintbuf */
88  uint nb,nb1;          /* number of bits in a tested ntuple */
89  uint value;       /* value of sampled ntuple (as a uint) */
90  uint mask;        /* mask in only nb bits */
91  uint bi;          /* bit offset relative to window */
92 
93  /* Look for cruft below */
94 
95  uint i,j,m,t;            /* generic loop indices */
96  uint ctotal;  /* count of any ntuple per bitstring */
97  double **freq,*psi2,*delpsi2,*del2psi2;
98  double pvalue;
99  uint *uintbuf;
100  uint window;  /* uint window into uintbuf, slide along a byte at at time. */
101 
102  double mono_mean,mono_sigma;  /* For single bit test */
103 
104  /*
105   * Sample a bitstring of rgb_bitstring_ntuple in length (exactly).  Note
106   * that I'm going to force nb>=2, nb<=16 for the moment.  The routine
107   * might "work" for nb up to 24 or even (on a really big memory machine)
108   * 32, but memory requirements in uints are around 2^(nb+1) + n where n
109   * is order unity, maybe worse, so I think it would take a 12 to 16 GB
110   * machine to be able to just hold the memory, and then it would take a
111   * very long time.  However, the testing process might parallelize
112   * decently if we could farm out the blocks of bits to be run efficiently
113   * over a network relative to the time required to generate them.
114   */
115  nb = 16;        /* Just ignore sts_serial_ntuple for now */
116  tsamples = test[0]->tsamples;  /* ditto */
117  MYDEBUG(D_STS_SERIAL){
118    printf("#==================================================================\n");
119    printf("# Starting sts_serial.\n");
120    printf("# sts_serial: Testing ntuple = %u\n",nb);
121  }
122 
123  /*
124   * This is useful as a limit here and there as we have things that range
125   * from 0 to nb INclusive (unless/until we fix the mallocs up a bit).
126   */
127  nb1 = nb+1;
128 
129  /*
130   * We need a vector of freq vectors.  We make each frequency counter
131   * vector only as large as it needs to be.  We zero each accumulator as
132   * we make it.  Note that we cannot sample "0 bits at a time" so m starts
133   * at 1.
134   */
135  freq = (double **) malloc(nb1*sizeof(double *));
136  for(m = 1;m < nb1;m++) {
137    freq[m] = (double *)malloc(pow(2,m)*sizeof(double));
138    memset(freq[m],0,pow(2,m)*sizeof(double));
139  }
140 
141  /*
142   * These are the statistics required by sts_serial in SP800.  psi2[m] is
143   * basically the sum of the squares of the bitpattern frequences for a
144   * given number of bits m.  delpsi2 is the difference between psi2[m] and
145   * psi2[m-1].  del2psi2 is the difference of delpsi2[m] and delpsi2[m-1].
146   * We zero only need to zero psi2, as the other two are set directly.
147   *
148   * Note that we can only generate psi2 for m>0, delpsi2 for m>1, delpsi3
149   * for m>2.  We can therefore only generate both test statistics (based
150   * on delpsi2 and del2psi2) for m>=3 to m=16.  What we CAN do for m=1
151   * is generate a straight normal p-value on a binomial probability for
152   * e.g. average number of 1's, basically sts_monobit.  That would give
153   * us one test for m = 1, one test for m = 2, and two tests each for
154   * m = [3,16].
155   */
156  psi2     = (double *) malloc(nb1*sizeof(double));
157  delpsi2  = (double *) malloc(nb1*sizeof(double));
158  del2psi2 = (double *) malloc(nb1*sizeof(double));
159  memset(psi2,0,nb1*sizeof(double));
160 
161  /*
162   * uintbuf is the one true vector of rands processed during this test.
163   * It needs to be at LEAST 2^16 = 64K uints in length, 2^20 = 1M is much
164   * better.  We'll default this from tsamples == 100000, though; it doesn't
165   * have to be divisible by 2 or anything.  Note that we add one to tsamples
166   * to permit cyclic wraparound of the overlapping samples, although honestly
167   * this hardly matters in the limit of large tsamples.
168   */
169  uintbuf = (uint *)malloc((tsamples+1)*sizeof(uint));
170 
171  /*
172   * If uintbuf[test[0]->tsamples] is allocated (plus one for wraparound)
173   * then we need to count the number of bits, which is the number of
174   * OVERLAPPING samples we will pull.
175   */
176  bsize = tsamples*sizeof(uint)*CHAR_BIT;
177  MYDEBUG(D_STS_SERIAL){
178    printf("# sts_serial(): bsize = %u\n",bsize);
179  }
180 
181  /*
182   * We start by filling testbuf with rands and cloning the first into
183   * the last slot for cyclic wrap.  This initial effort just uses the
184   * fast gsl_rng_get() call, but we'll need to use the inline from the
185   * static_get_rng routines in production.
186   */
187  for(t=0;t<tsamples;t++){
188    /* A bit slower per call, but won't fail for short rngs */
189    uintbuf[t] = get_rand_bits_uint(32,0xFFFFFFFF,rng);
190    /* Fast, but deadly to rngs with less than 32 bits returned */
191    /* uintbuf[t] = gsl_rng_get(rng); */
192    MYDEBUG(D_STS_SERIAL){
193      printf("# sts_serial(): %u:  ",t);
194      dumpuintbits(&uintbuf[t],1);
195      printf("\n");
196    }
197  }
198  uintbuf[tsamples] = uintbuf[0];   /* Periodic wraparound */
199  MYDEBUG(D_STS_SERIAL){
200    printf("# sts_serial(): %u:  ",(uint)tsamples);
201    dumpuintbits(&uintbuf[tsamples],1);
202    printf("\n");
203  }
204 
205  /*
206   * We now in ONE PASS loop over all of the possible values of m from
207   * 1 to 16.
208   */
209  for(m=1;m<nb1;m++){
210 
211    /*
212     * Set the mask for bits to be returned.  This will work fine for
213     * m in the allowed range, and will fail if m = 32 if that ever
214     * happens in the future.
215     */
216    mask = 0;
217    for(i = 0; i < m; i++){
218       mask |= (1u << m);
219    }
220    mask = ((1u << m) - 1);
221 
222    /* Initialize index and count total */
223    j = 0;
224    ctotal = 0;
225    MYDEBUG(D_STS_SERIAL){
226      printf("looping bsize = %u times\n",bsize);
227    }
228    for(t=0;t<bsize;t++){
229      /*
230       * Offset of current sample, relative to left boundary of window.
231       */
232      if((t%32) == 0){
233        window = uintbuf[j];  /* start with window = testbuf = charbuf */
234        MYDEBUG(D_STS_SERIAL){
235          printf("uint window[%u] = %08x = ",j,window);
236          dumpuintbits(&window,1);
237          printf("\n");
238        }
239        j++;
240      }
241      bi = t%16;
242      value = (window >> (32 - m - bi)) & mask;
243      MYDEBUG(D_STS_SERIAL){
244        dumpbitwin(value,m);
245        printf("\n");
246      }
247      freq[m][value]++;
248      ctotal++;
249      if(bi == 15){
250        window = (window << 16);
251        window += (uintbuf[j] >> 16);
252        MYDEBUG(D_STS_SERIAL){
253          printf("half window[%u] = %08x = ",j,window);
254          dumpuintbits(&window,1);
255          printf("\n");
256        }
257      }
258    }
259 
260    MYDEBUG(D_STS_SERIAL){
261      printf("# sts_serial():=====================================================\n");
262      printf("# sts_serial():                  Count table\n");
263      printf("# sts_serial():\tbits\tvalue\tcount\tprob\n");
264      for(i = 0; i<pow(2,m); i++){
265        printf("# sts_serial():   ");
266        dumpbitwin(i,m);
267        printf("\t%u\t%f\t%f\n",i,freq[m][i],(double) freq[m][i]/ctotal);
268      }
269      printf("# sts_serial(): Total count = %u, target probability = %f\n",ctotal,1.0/pow(2,m));
270    }
271 
272  } /* End of m loop */
273 
274  /*
275   * Now it is time to implement the statistic from STS SP800 whatever.
276   */
277  MYDEBUG(D_STS_SERIAL){
278    printf("# sts_serial():=====================================================\n");
279  }
280  for(m=1;m<nb1;m++){
281    for(i=0;i<pow(2,m);i++){
282      psi2[m] += freq[m][i]*freq[m][i];
283      /* printf("freq[%u][%u] = %f  p2 = %f \n",m,i,freq[m][i],psi2[m]); */
284    }
285    psi2[m] = pow(2,m)*psi2[m]/bsize - bsize;
286    MYDEBUG(D_STS_SERIAL){
287      printf("# sts_serial(): psi2[%u] = %f\n",m,psi2[m]);
288    }
289  }
290 
291  j=0;
292  /*
293   * This is sts_monobit, basically.
294   */
295  mono_mean = (double) 2*freq[1][0] - bsize;   /* Should be 0.0 */
296  mono_sigma = sqrt((double)bsize);
297  /* printf("mono mean = %f   mono_sigma = %f\n",mono_mean,mono_sigma); */
298  if(irun == 0){
299    test[j]->ntuple = 1;
300  }
301  test[j++]->pvalues[irun] = gsl_cdf_gaussian_P(mono_mean,mono_sigma);
302 
303  for(m=2;m<nb1;m++){
304    delpsi2[m] = psi2[m] - psi2[m-1];
305    del2psi2[m] = psi2[m] - 2.0*psi2[m-1] + psi2[m-2];
306    pvalue = gsl_sf_gamma_inc_Q(pow(2,m-2),delpsi2[m]/2.0);
307    if(irun == 0){
308      test[j]->ntuple = m;
309    }
310    test[j++]->pvalues[irun] = pvalue;
311    MYDEBUG(D_STS_SERIAL){
312      printf("pvalue 1[%u] = %f\n",m,pvalue);
313    }
314    if(m>2){
315      pvalue = gsl_sf_gamma_inc_Q(pow(2,m-3),del2psi2[m]/2.0);
316      if(irun == 0){
317        test[j]->ntuple = m;
318      }
319      test[j++]->pvalues[irun] = pvalue;
320      MYDEBUG(D_STS_SERIAL){
321        printf("pvalue 2[%u] = %f\n",m,pvalue);
322      }
323    }
324  }
325 
326  free(uintbuf);
327  free(psi2);
328  free(del2psi2);
329  free(delpsi2);
330  for(m = 1;m < nb1;m++){
331    free(freq[m]);
332  }
333  free(freq);
334 
335  return(0);
336 
337 }
338