1 /*
2 * ========================================================================
3 * $Id: diehard_opso.c 231 2006-08-22 16:18:05Z rgb $
4 *
5 * See copyright in copyright.h and the accompanying file COPYING
6 * ========================================================================
7 */
8
9 /*
10 * ========================================================================
11 * This is the Diehard OPSO test, rewritten from the description
12 * in tests.txt on George Marsaglia's diehard site.
13 *
14 * OPSO means Overlapping-Pairs-Sparse-Occupancy ::
15 * The OPSO test considers 2-letter words from an alphabet of ::
16 * 1024 letters. Each letter is determined by a specified ten ::
17 * bits from a 32-bit integer in the sequence to be tested. OPSO ::
18 * generates 2^21 (overlapping) 2-letter words (from 2^21+1 ::
19 * "keystrokes") and counts the number of missing words---that ::
20 * is 2-letter words which do not appear in the entire sequence. ::
21 * That count should be very close to normally distributed with ::
22 * mean 141,909, sigma 290. Thus (missingwrds-141909)/290 should ::
23 * be a standard normal variable. The OPSO test takes 32 bits at ::
24 * a time from the test file and uses a designated set of ten ::
25 * consecutive bits. It then restarts the file for the next de- ::
26 * signated 10 bits, and so on. ::
27 *
28 * Note: Overlapping samples must be used to get the right sigma.
29 * The tests BITSTREAM, OPSO, OQSO and DNA are all closely related.
30 *
31 * This test is now CORRECTED on the basis of a private communication
32 * from Paul Leopardi (MCQMC-2008 presentation) and Noonan and Zeilberger
33 * (1999), Rivals and Rahmann (2003), and Rahmann and Rivals (2003).
34 * The "exact" target statistic (to many places) is:
35 * \mu = 141909.3299550069, \sigma = 290.4622634038
36 *========================================================================
37 */
38
39
40 #include <dieharder/libdieharder.h>
41
diehard_opso(Test ** test,int irun)42 int diehard_opso(Test **test, int irun)
43 {
44
45 uint j0=0,k0=0,j,k,t;
46 Xtest ptest;
47 /*
48 * Fixed test size for speed and as per diehard.
49 */
50 char w[1024][1024];
51
52 /*
53 * for display only. 0 means "ignored".
54 */
55 test[0]->ntuple = 0;
56
57 /*
58 * p = 141909, with sigma 290, FOR test[0]->tsamples 2^21+1 2 letter words.
59 * These cannot be varied unless one figures out the actual
60 * expected "missing works" count as a function of sample size. SO:
61 *
62 * ptest.x = number of "missing words" given 2^21+1 trials
63 * Recalculation by David Bauer, from the original Monkey Tests paper:
64 * ptest.y = 141909.600361375512162724864.
65 * This shouldn't matter, I don't think, at least at any reasonable scale
66 * dieharder can yet reach (but we'll see!). If we start getting
67 * unreasonable failures we may have to try switching this number around,
68 * but given sigma and y, we'd need a LOT of rands to result the 0.3 diff.
69 *
70 * ptest.y = 141909.3299550069;
71 * ptest.sigma = 290.4622634038;
72 *
73 */
74 ptest.y = 141909.3299550069;
75 ptest.sigma = 290.4622634038;
76
77 /*
78 * We now make test[0]->tsamples measurements, as usual, to generate the
79 * missing statistic. The easiest way to proceed, I think, will
80 * be to generate a simple char matrix 1024x1024 in size and empty.
81 * Each pair of "letters" generated become indices, and a (char) 1
82 * is inserted there. At the end we just loop the matrix and count
83 * the zeros.
84 *
85 * Of course doing it THIS way it is pretty obvious that we could,
86 * say, display the 2-color 1024x1024 bitmap this represented graphically.
87 * Missing words are just pixels that are still in the background color.
88 * Hmmm, sounds a whole lot like Knuth's test looking for hyperplanes
89 * in 2 dimensions, hmmm. At the very least, any generator that produces
90 * hyperplanar banding at 2 dimensions should fail this test, but it is
91 * possible for it to find distributions that do NOT have banding but
92 * STILL fail the test, I suppose. Projectively speaking, though,
93 * I have some fairly serious doubts about this, though.
94 */
95
96 memset(w,0,sizeof(char)*1024*1024);
97
98 k = 0;
99 for(t=0;t<test[0]->tsamples;t++){
100 /*
101 * Let's do this the cheap/easy way first, sliding a 20 bit
102 * window along each int for the 32 possible starting
103 * positions a la birthdays, before trying to slide it all
104 * the way down the whole random bitstring implicit in a
105 * long sequence of random ints. That way we can exit
106 * the test[0]->tsamples loop at test[0]->tsamples = 2^15...
107 */
108 if(t%2 == 0) {
109 j0 = gsl_rng_get(rng);
110 k0 = gsl_rng_get(rng);
111 j = j0 & 0x03ff;
112 k = k0 & 0x03ff;
113 } else {
114 j = (j0 >> 10) & 0x03ff;
115 k = (k0 >> 10) & 0x03ff;
116 }
117 /*
118 * Get two "letters" (indices into w)
119 */
120 /* printf("%u: %u %u %u\n",t,j,k,boffset); */
121 w[j][k] = 1;
122 }
123
124 /*
125 * Now we count the holes, so to speak
126 */
127 ptest.x = 0;
128 for(j=0;j<1024;j++){
129 for(k=0;k<1024;k++){
130 if(w[j][k] == 0){
131 ptest.x += 1.0;
132 /* printf("ptest.x = %f Hole: w[%u][%u] = %u\n",ptest.x,j,k,w[j][k]); */
133 }
134 }
135 }
136 MYDEBUG(D_DIEHARD_OPSO) {
137 printf("%f %f %f\n",ptest.y,ptest.x,ptest.x-ptest.y);
138 }
139
140 Xtest_eval(&ptest);
141 test[0]->pvalues[irun] = ptest.pvalue;
142
143 MYDEBUG(D_DIEHARD_OPSO) {
144 printf("# diehard_opso(): ks_pvalue[%u] = %10.5f\n",irun,test[0]->pvalues[irun]);
145 }
146
147 return(0);
148
149 }
150
151