1 /*
2  * $Id: diehard_craps.c 191 2006-07-13 08:23:50Z rgb $
3  *
4  * See copyright in copyright.h and the accompanying file COPYING
5  *
6  */
7 
8 /*
9  *========================================================================
10  *
11  * This is a "supertest" of random number generators -- a test that should
12  * systematically subsume pretty much all tests and provide very precise
13  * information on how a RNG fails.
14  *
15  * The way it works is as follows:
16  *
17  *    Take l bits (starting at an arbitrary offset) from an RNG
18  *    Skip m bits
19  *    Take n bits
20  *
21  * to form a random integer l+n bits long.  These integers should be
22  * uniformly distributed across the range 0 to (2^(l+n)-1).
23  *
24  * I'm not PRECISELY certain how to best form the test statistic here.
25  * It should be the same as the future "sts_series" test or the current
26  * "rgb_bitdist" test, and indeed either of these should be the same
27  * as l=bits, m=0, n=0 of the lmn test.
28  *
29  * Let me explain how this test is a supertest of sorts.  First of all,
30  * the l00 test (bitdist/series) is already a universal test, in principle.
31  * Any generator that fails fails because integers cease to be uniformly
32  * distributed at some bit level.  For example, suppose l = 2, m=n=0.
33  * One can achieve non-random uniformity by counting:
34  *
35  *  00 01 10 11 00 01 10 11 00 01 10 11 00 01 10 11...
36  *
37  * but the sequence is then highly non-random, obviously.  This lack of
38  * randomness is revealed most glaringly by looking at the distribution
39  * of l=8,m=0,n=0 patterns, where only 00011011 appears out of the 256
40  * possibilities!  Or, if one examines l = 2, m = 6, n = 2 one forms:
41  * 0000 0000 0000 (an obvious failure).
42  *
43  * A bit of meditation on this will convince you that ANY possible failure
44  * in randomness -- be it fourier related or high dimensional striping or
45  * monkey tests or whatever -- is revealed by a lack of uniformity for some
46  * sufficiently high l,m=0,n=0.  A "good" generator is one where that does
47  * not occur for l << period of the generator, and a GREAT generator is
48  * one where it does not occur for l << period and that period is very large
49  * indeed!
50  *
51  * However, practical considerations prevent us from testing uniformity
52  * for l > 32 or thereabouts.  There are 4 billion-odd bins at l = 32 (m = 0,
53  * n = 0).  To test uniformity on this, one needs to generate roughly
54  * 400 billion rands (to get an average of 100 hits per bin, with a
55  * reasonable sigma).  Cumulating e.g. chisquare across this would be
56  * difficult, but conceivably could be done on a cluster if nothing else.
57  * At 64 bits, though, one cannot imagine holding the bins on any extant
58  * system, and it would take far longer than the lifetime of the universe
59  * to compute etc.  128 bits is just laughable.
60  *
61  * Hence the "lmn" part -- if we assume that (say) 24 bits is a practical
62  * upper bound on what we can bin, we can extend our test into a PARTIAL
63  * projection of randomness over longer sequences by testing:
64  * l+n = 24, m=0-104.  This slides two windows across 128 bits and looks
65  * at the relative frequency of the integers thus generated.  If the
66  * RNG generates numbers that are uniform at 24 bits but correlated across
67  * 128, this has some chance of revealing the correlation.  Indeed,
68  * m can be quite large here -- one can potentially discover long range
69  * fourier correlations in this way.
70  *
71  * This approach has one more weakness, though.  It is known that certain
72  * generators fail because e.g. triples of integers, viewed as coordinates,
73  * fall only on certain hyperplanes.  The lmn test should reveal this
74  * profoundly for doublets of integers, but might have a hard time doing
75  * third order correlations for triplets. That is, for large enough l it
76  * can do so, but there is probably a projection of numbers that will pass
77  * second order at l+n one can afford while missing a glaring third order
78  * violation).  And so on -- there is likely a four-at-a-time correlated
79  * sequency that passes three-at-a-time tests.  So the MOST general test
80  * may need to be an lmnop... series.
81  *
82  * This isn't quite the only test one would ever need.  One may well want
83  * to look at particular statistics for l=32 (for example), m = whatever,
84  * n = 32, where one cannot afford to actually bin the 64 bits.  In that
85  * case one has to e.g. figure out some sort of asymptotic statistic that
86  * would deviate for at least a certain class of nonrandomness and pray.
87  * this is what e.g. monkey tests do today, but they are not arranged
88  * anywhere nearly this systematically and hence do not reveal specific
89  * details about the correlations they uncover.
90  *========================================================================
91  */
92 
93 #include <dieharder/libdieharder.h>
94 #include <dieharder/rgb_lmn.h>
95 
rgb_lmn()96 int rgb_lmn()
97 {
98 
99  double pks;
100  uint ps_save=0,ts_save=0;
101 
102  /*
103   * Do a standard test if -a(ll) is selected.
104   * ALSO use standard values if tsamples or psamples are 0
105   */
106  if(all == YES){
107    ts_save = tsamples;
108    tsamples = dtest->tsamples_std;
109    ps_save = psamples;
110    psamples = dtest->psamples_std;
111  }
112  if(tsamples == 0){
113    tsamples = dtest->tsamples_std;
114  }
115  if(psamples == 0){
116    psamples = dtest->psamples_std;
117  }
118 
119  /*
120   * Allocate memory for THIS test's ks_pvalues, etc.  Make sure that
121   * any missed prior allocations are freed.
122   */
123  if(ks_pvalue) nullfree(ks_pvalue);
124  ks_pvalue  = (double *)malloc((size_t) psamples*sizeof(double));
125 
126  /*
127   * Reseed FILE random number generators once per individual test.
128   * This correctly resets the rewind counter per test.
129   */
130  if(strncmp("file_input",gsl_rng_name(rng),10) == 0){
131    gsl_rng_set(rng,1);
132  }
133 
134  /* show_test_header(dtest); */
135 
136  /*
137   * Any custom test header output lines go here.  They should be
138   * used VERY sparingly.
139   */
140 
141  /*
142   * This is the standard test call.
143   */
144  kspi = 0;  /* Always zero first */
145  /* pks = sample((void *)rgb_lmn_test); */
146 
147  /*
148   * Test Results, standard form.
149  show_test_results(dtest,pks,ks_pvalue,"Lagged Sum Test");
150   */
151 
152  /*
153   * Put back tsamples
154   */
155  if(all == YES){
156    tsamples = ts_save;
157    psamples = ps_save;
158  }
159 
160  if(ks_pvalue) nullfree(ks_pvalue);
161 
162  return(0);
163 
164 }
165 
rgb_lmn_test()166 void rgb_lmn_test()
167 {
168 
169  uint t,i,lag;
170  Xtest ptest;
171 
172  /*
173   * ptest.x = actual sum of tsamples lagged samples from rng
174   * ptest.y = tsamples*0.5 is the expected mean value of the sum
175   * ptest.sigma = sqrt(tsamples/12.0) is the standard deviation
176   */
177  ptest.x = 0.0;  /* Initial value */
178  ptest.y = (double) tsamples*0.5;
179  ptest.sigma = sqrt(tsamples/12.0);
180 
181  /*
182   * sample only every lag returns from the rng, discard the rest.
183   * We have to get the (float) value from the user input and set
184   * a uint
185   */
186  if(x_user){
187    lag = x_user;
188  } else {
189    lag = 2; /* Why not?  Anything but 0, really... */
190  }
191 
192  if(verbose == D_USER_TEMPLATE || verbose == D_ALL){
193    printf("# rgb_lmn(): Doing a test on lag %u\n",lag);
194  }
195 
196  for(t=0;t<tsamples;t++){
197 
198    /*
199     * A VERY SIMPLE test (probably not too sensitive)
200     */
201 
202    /* Throw away lag-1 per sample */
203    for(i=0;i<(lag-1);i++) gsl_rng_uniform(rng);
204 
205    /* sample only every lag numbers, reset counter */
206    ptest.x += gsl_rng_uniform(rng);
207 
208  }
209 
210  Xtest_eval(&ptest);
211  ks_pvalue[kspi] = ptest.pvalue;
212 
213  if(verbose == D_USER_TEMPLATE || verbose == D_ALL){
214    printf("# rgb_lmn(): ks_pvalue[%u] = %10.5f\n",kspi,ks_pvalue[kspi]);
215  }
216 
217  kspi++;
218 
219 }
220 
help_rgb_lmn()221 void help_rgb_lmn()
222 {
223 
224   printf("%s",dtest->description);
225 
226 }
227