1 /*
2  * ========================================================================
3  * $Id: diehard_opso.c 231 2006-08-22 16:18:05Z rgb $
4  *
5  * See copyright in copyright.h and the accompanying file COPYING
6  * ========================================================================
7  */
8 
9 /*
10  * ========================================================================
11  * This is the Diehard OPSO test, rewritten from the description
12  * in tests.txt on George Marsaglia's diehard site.
13  *
14  *         OPSO means Overlapping-Pairs-Sparse-Occupancy         ::
15  * The OPSO test considers 2-letter words from an alphabet of    ::
16  * 1024 letters.  Each letter is determined by a specified ten   ::
17  * bits from a 32-bit integer in the sequence to be tested. OPSO ::
18  * generates  2^21 (overlapping) 2-letter words  (from 2^21+1    ::
19  * "keystrokes")  and counts the number of missing words---that  ::
20  * is 2-letter words which do not appear in the entire sequence. ::
21  * That count should be very close to normally distributed with  ::
22  * mean 141,909, sigma 290. Thus (missingwrds-141909)/290 should ::
23  * be a standard normal variable. The OPSO test takes 32 bits at ::
24  * a time from the test file and uses a designated set of ten    ::
25  * consecutive bits. It then restarts the file for the next de-  ::
26  * signated 10 bits, and so on.                                  ::
27  *
28  * Note: Overlapping samples must be used to get the right sigma.
29  * The tests BITSTREAM, OPSO, OQSO and DNA are all closely related.
30  *
31  * This test is now CORRECTED on the basis of a private communication
32  * from Paul Leopardi (MCQMC-2008 presentation) and Noonan and Zeilberger
33  * (1999), Rivals and Rahmann (2003), and Rahmann and Rivals (2003).
34  * The "exact" target statistic (to many places) is:
35  * \mu  = 141909.3299550069,  \sigma = 290.4622634038
36  *========================================================================
37  */
38 
39 
40 #include <dieharder/libdieharder.h>
41 
diehard_opso(Test ** test,int irun)42 int diehard_opso(Test **test, int irun)
43 {
44 
45  uint j0=0,k0=0,j,k,t;
46  Xtest ptest;
47  /*
48   * Fixed test size for speed and as per diehard.
49   */
50  char w[1024][1024];
51 
52  /*
53   * for display only.  0 means "ignored".
54   */
55  test[0]->ntuple = 0;
56 
57  /*
58   * p = 141909, with sigma 290, FOR test[0]->tsamples 2^21+1 2 letter words.
59   * These cannot be varied unless one figures out the actual
60   * expected "missing works" count as a function of sample size.  SO:
61   *
62   * ptest.x = number of "missing words" given 2^21+1 trials
63   * Recalculation by David Bauer, from the original Monkey Tests paper:
64   *    ptest.y = 141909.600361375512162724864.
65   * This shouldn't matter, I don't think, at least at any reasonable scale
66   * dieharder can yet reach (but we'll see!).  If we start getting
67   * unreasonable failures we may have to try switching this number around,
68   * but given sigma and y, we'd need a LOT of rands to result the 0.3 diff.
69   *
70   * ptest.y = 141909.3299550069;
71   * ptest.sigma = 290.4622634038;
72   *
73   */
74  ptest.y = 141909.3299550069;
75  ptest.sigma = 290.4622634038;
76 
77  /*
78   * We now make test[0]->tsamples measurements, as usual, to generate the
79   * missing statistic.  The easiest way to proceed, I think, will
80   * be to generate a simple char matrix 1024x1024 in size and empty.
81   * Each pair of "letters" generated become indices, and a (char) 1
82   * is inserted there.  At the end we just loop the matrix and count
83   * the zeros.
84   *
85   * Of course doing it THIS way it is pretty obvious that we could,
86   * say, display the 2-color 1024x1024 bitmap this represented graphically.
87   * Missing words are just pixels that are still in the background color.
88   * Hmmm, sounds a whole lot like Knuth's test looking for hyperplanes
89   * in 2 dimensions, hmmm.  At the very least, any generator that produces
90   * hyperplanar banding at 2 dimensions should fail this test, but it is
91   * possible for it to find distributions that do NOT have banding but
92   * STILL fail the test, I suppose.  Projectively speaking, though,
93   * I have some fairly serious doubts about this, though.
94   */
95 
96  memset(w,0,sizeof(char)*1024*1024);
97 
98  k = 0;
99  for(t=0;t<test[0]->tsamples;t++){
100    /*
101     * Let's do this the cheap/easy way first, sliding a 20 bit
102     * window along each int for the 32 possible starting
103     * positions a la birthdays, before trying to slide it all
104     * the way down the whole random bitstring implicit in a
105     * long sequence of random ints.  That way we can exit
106     * the test[0]->tsamples loop at test[0]->tsamples = 2^15...
107     */
108    if(t%2 == 0) {
109      j0 = gsl_rng_get(rng);
110      k0 = gsl_rng_get(rng);
111      j = j0 & 0x03ff;
112      k = k0 & 0x03ff;
113    } else {
114       j = (j0 >> 10) & 0x03ff;
115       k = (k0 >> 10) & 0x03ff;
116    }
117    /*
118     * Get two "letters" (indices into w)
119     */
120    /* printf("%u:   %u  %u  %u\n",t,j,k,boffset); */
121    w[j][k] = 1;
122  }
123 
124  /*
125   * Now we count the holes, so to speak
126   */
127  ptest.x = 0;
128  for(j=0;j<1024;j++){
129    for(k=0;k<1024;k++){
130      if(w[j][k] == 0){
131        ptest.x += 1.0;
132        /* printf("ptest.x = %f  Hole: w[%u][%u] = %u\n",ptest.x,j,k,w[j][k]); */
133      }
134    }
135  }
136  MYDEBUG(D_DIEHARD_OPSO) {
137    printf("%f %f %f\n",ptest.y,ptest.x,ptest.x-ptest.y);
138  }
139 
140  Xtest_eval(&ptest);
141  test[0]->pvalues[irun] = ptest.pvalue;
142 
143  MYDEBUG(D_DIEHARD_OPSO) {
144    printf("# diehard_opso(): ks_pvalue[%u] = %10.5f\n",irun,test[0]->pvalues[irun]);
145  }
146 
147  return(0);
148 
149 }
150 
151