1 /*
2 * $Id: diehard_craps.c 191 2006-07-13 08:23:50Z rgb $
3 *
4 * See copyright in copyright.h and the accompanying file COPYING
5 *
6 */
7
8 /*
9 *========================================================================
10 *
11 * This is a "supertest" of random number generators -- a test that should
12 * systematically subsume pretty much all tests and provide very precise
13 * information on how a RNG fails.
14 *
15 * The way it works is as follows:
16 *
17 * Take l bits (starting at an arbitrary offset) from an RNG
18 * Skip m bits
19 * Take n bits
20 *
21 * to form a random integer l+n bits long. These integers should be
22 * uniformly distributed across the range 0 to (2^(l+n)-1).
23 *
24 * I'm not PRECISELY certain how to best form the test statistic here.
25 * It should be the same as the future "sts_series" test or the current
26 * "rgb_bitdist" test, and indeed either of these should be the same
27 * as l=bits, m=0, n=0 of the lmn test.
28 *
29 * Let me explain how this test is a supertest of sorts. First of all,
30 * the l00 test (bitdist/series) is already a universal test, in principle.
31 * Any generator that fails fails because integers cease to be uniformly
32 * distributed at some bit level. For example, suppose l = 2, m=n=0.
33 * One can achieve non-random uniformity by counting:
34 *
35 * 00 01 10 11 00 01 10 11 00 01 10 11 00 01 10 11...
36 *
37 * but the sequence is then highly non-random, obviously. This lack of
38 * randomness is revealed most glaringly by looking at the distribution
39 * of l=8,m=0,n=0 patterns, where only 00011011 appears out of the 256
40 * possibilities! Or, if one examines l = 2, m = 6, n = 2 one forms:
41 * 0000 0000 0000 (an obvious failure).
42 *
43 * A bit of meditation on this will convince you that ANY possible failure
44 * in randomness -- be it fourier related or high dimensional striping or
45 * monkey tests or whatever -- is revealed by a lack of uniformity for some
46 * sufficiently high l,m=0,n=0. A "good" generator is one where that does
47 * not occur for l << period of the generator, and a GREAT generator is
48 * one where it does not occur for l << period and that period is very large
49 * indeed!
50 *
51 * However, practical considerations prevent us from testing uniformity
52 * for l > 32 or thereabouts. There are 4 billion-odd bins at l = 32 (m = 0,
53 * n = 0). To test uniformity on this, one needs to generate roughly
54 * 400 billion rands (to get an average of 100 hits per bin, with a
55 * reasonable sigma). Cumulating e.g. chisquare across this would be
56 * difficult, but conceivably could be done on a cluster if nothing else.
57 * At 64 bits, though, one cannot imagine holding the bins on any extant
58 * system, and it would take far longer than the lifetime of the universe
59 * to compute etc. 128 bits is just laughable.
60 *
61 * Hence the "lmn" part -- if we assume that (say) 24 bits is a practical
62 * upper bound on what we can bin, we can extend our test into a PARTIAL
63 * projection of randomness over longer sequences by testing:
64 * l+n = 24, m=0-104. This slides two windows across 128 bits and looks
65 * at the relative frequency of the integers thus generated. If the
66 * RNG generates numbers that are uniform at 24 bits but correlated across
67 * 128, this has some chance of revealing the correlation. Indeed,
68 * m can be quite large here -- one can potentially discover long range
69 * fourier correlations in this way.
70 *
71 * This approach has one more weakness, though. It is known that certain
72 * generators fail because e.g. triples of integers, viewed as coordinates,
73 * fall only on certain hyperplanes. The lmn test should reveal this
74 * profoundly for doublets of integers, but might have a hard time doing
75 * third order correlations for triplets. That is, for large enough l it
76 * can do so, but there is probably a projection of numbers that will pass
77 * second order at l+n one can afford while missing a glaring third order
78 * violation). And so on -- there is likely a four-at-a-time correlated
79 * sequency that passes three-at-a-time tests. So the MOST general test
80 * may need to be an lmnop... series.
81 *
82 * This isn't quite the only test one would ever need. One may well want
83 * to look at particular statistics for l=32 (for example), m = whatever,
84 * n = 32, where one cannot afford to actually bin the 64 bits. In that
85 * case one has to e.g. figure out some sort of asymptotic statistic that
86 * would deviate for at least a certain class of nonrandomness and pray.
87 * this is what e.g. monkey tests do today, but they are not arranged
88 * anywhere nearly this systematically and hence do not reveal specific
89 * details about the correlations they uncover.
90 *========================================================================
91 */
92
93 #include <dieharder/libdieharder.h>
94 #include <dieharder/rgb_lmn.h>
95
rgb_lmn()96 int rgb_lmn()
97 {
98
99 double pks;
100 uint ps_save=0,ts_save=0;
101
102 /*
103 * Do a standard test if -a(ll) is selected.
104 * ALSO use standard values if tsamples or psamples are 0
105 */
106 if(all == YES){
107 ts_save = tsamples;
108 tsamples = dtest->tsamples_std;
109 ps_save = psamples;
110 psamples = dtest->psamples_std;
111 }
112 if(tsamples == 0){
113 tsamples = dtest->tsamples_std;
114 }
115 if(psamples == 0){
116 psamples = dtest->psamples_std;
117 }
118
119 /*
120 * Allocate memory for THIS test's ks_pvalues, etc. Make sure that
121 * any missed prior allocations are freed.
122 */
123 if(ks_pvalue) nullfree(ks_pvalue);
124 ks_pvalue = (double *)malloc((size_t) psamples*sizeof(double));
125
126 /*
127 * Reseed FILE random number generators once per individual test.
128 * This correctly resets the rewind counter per test.
129 */
130 if(strncmp("file_input",gsl_rng_name(rng),10) == 0){
131 gsl_rng_set(rng,1);
132 }
133
134 /* show_test_header(dtest); */
135
136 /*
137 * Any custom test header output lines go here. They should be
138 * used VERY sparingly.
139 */
140
141 /*
142 * This is the standard test call.
143 */
144 kspi = 0; /* Always zero first */
145 /* pks = sample((void *)rgb_lmn_test); */
146
147 /*
148 * Test Results, standard form.
149 show_test_results(dtest,pks,ks_pvalue,"Lagged Sum Test");
150 */
151
152 /*
153 * Put back tsamples
154 */
155 if(all == YES){
156 tsamples = ts_save;
157 psamples = ps_save;
158 }
159
160 if(ks_pvalue) nullfree(ks_pvalue);
161
162 return(0);
163
164 }
165
rgb_lmn_test()166 void rgb_lmn_test()
167 {
168
169 uint t,i,lag;
170 Xtest ptest;
171
172 /*
173 * ptest.x = actual sum of tsamples lagged samples from rng
174 * ptest.y = tsamples*0.5 is the expected mean value of the sum
175 * ptest.sigma = sqrt(tsamples/12.0) is the standard deviation
176 */
177 ptest.x = 0.0; /* Initial value */
178 ptest.y = (double) tsamples*0.5;
179 ptest.sigma = sqrt(tsamples/12.0);
180
181 /*
182 * sample only every lag returns from the rng, discard the rest.
183 * We have to get the (float) value from the user input and set
184 * a uint
185 */
186 if(x_user){
187 lag = x_user;
188 } else {
189 lag = 2; /* Why not? Anything but 0, really... */
190 }
191
192 if(verbose == D_USER_TEMPLATE || verbose == D_ALL){
193 printf("# rgb_lmn(): Doing a test on lag %u\n",lag);
194 }
195
196 for(t=0;t<tsamples;t++){
197
198 /*
199 * A VERY SIMPLE test (probably not too sensitive)
200 */
201
202 /* Throw away lag-1 per sample */
203 for(i=0;i<(lag-1);i++) gsl_rng_uniform(rng);
204
205 /* sample only every lag numbers, reset counter */
206 ptest.x += gsl_rng_uniform(rng);
207
208 }
209
210 Xtest_eval(&ptest);
211 ks_pvalue[kspi] = ptest.pvalue;
212
213 if(verbose == D_USER_TEMPLATE || verbose == D_ALL){
214 printf("# rgb_lmn(): ks_pvalue[%u] = %10.5f\n",kspi,ks_pvalue[kspi]);
215 }
216
217 kspi++;
218
219 }
220
help_rgb_lmn()221 void help_rgb_lmn()
222 {
223
224 printf("%s",dtest->description);
225
226 }
227