1 /*
2  * ========================================================================
3  * $Id: diehard_2dsphere.c 231 2006-08-22 16:18:05Z rgb $
4  *
5  * See copyright in copyright.h and the accompanying file COPYING
6  * ========================================================================
7  */
8 
9 /*
10  * ========================================================================
11  * This is the Diehard minimum distance (2d sphere) test, rewritten from
12  * the description in tests.txt on George Marsaglia's diehard site.
13  *
14  *               THE MINIMUM DISTANCE TEST                       ::
15  * It does this 100 times::   choose n=8000 random points in a   ::
16  * square of side 10000.  Find d, the minimum distance between   ::
17  * the (n^2-n)/2 pairs of points.  If the points are truly inde- ::
18  * pendent uniform, then d^2, the square of the minimum distance ::
19  * should be (very close to) exponentially distributed with mean ::
20  * .995 .  Thus 1-exp(-d^2/.995) should be uniform on [0,1) and  ::
21  * a KSTEST on the resulting 100 values serves as a test of uni- ::
22  * formity for random points in the square. Test numbers=0 mod 5 ::
23  * are printed but the KSTEST is based on the full set of 100    ::
24  * random choices of 8000 points in the 10000x10000 square.      ::
25  *
26  *                      Comment
27  * Obviously the same test as 3d Spheres but in 2d, hence the
28  * name.  This test has a BUG in it -- the expression it uses to
29  * evaluate p is not accurate enough to withstand the demands of
30  * dieharder.  M. Fischler pointed this out and derived the
31  * required corrections (giving explicit forms useful out to d=5)
32  * and they are incorporated in rgb_minimum_distance().  This test
33  * is hence OBSOLETE and is left in so people can play with it and
34  * convince themselves that this is so.
35  *
36  * I did make one set of changes to this test to make it considerably more
37  * efficient at extracting the minimum distance.
38  * ========================================================================
39  */
40 
41 
42 #include <dieharder/libdieharder.h>
43 
44 #define POINTS_2D 8000
45 #define DIM_2D 2
46 
47 typedef struct {
48   double x[DIM_2D];
49 } C3_2D;
50 
51 int compare_points(const dTuple *a,const dTuple *b);
52 double distance(const dTuple a,const dTuple b,uint dim);
53 
diehard_2dsphere(Test ** test,int irun)54 int diehard_2dsphere(Test **test, int irun)
55 {
56 
57  int i,j,d,t;
58 
59  /*
60   * These are the vector of points and the current point being
61   * considered.  We may or may not need to restructure the vectors
62   * to be able to do the sort.  I'm going to TRY to implement
63   * Fischler's suggested algorithm here that is NlogN instead of doing
64   * the straightforward N^2 algorithm, but we'll see.
65   */
66  dTuple *points;
67  double dist,mindist;
68 
69  /*
70   * for display only.
71   */
72  test[0]->ntuple = ntuple;
73 
74  /*
75   * Generate d-tuples of tsamples random coordinates in the range 0-10000
76   * (which we may have to scale with dimension). Determine the shortest
77   * separation of any pair of points.  From this generate p from the
78   * Marsaglia form, and apply the usual KS test over psamples of
79   * independent tests, per dimension.
80   */
81  test[0]->ntuple = 2;      /* 2 dimensional test, of course */
82  points = (dTuple *)malloc(test[0]->tsamples*sizeof(dTuple));
83 
84 
85  if(verbose == D_DIEHARD_2DSPHERE || verbose == D_ALL){
86      printf("Generating a list of %u points in %d dimensions\n",test[0]->tsamples,test[0]->ntuple);
87  }
88  for(t=0;t<test[0]->tsamples;t++){
89    /*
90     * Generate a new d-dimensional point in the unit d-cube (with
91     * periodic boundary conditions).
92     */
93    if(verbose == D_DIEHARD_2DSPHERE || verbose == D_ALL){
94        printf("points[%u]: (",t);
95    }
96    for(d=0;d<2;d++) {
97      points[t].c[d] = gsl_rng_uniform_pos(rng)*10000;
98      if(verbose == D_DIEHARD_2DSPHERE || verbose == D_ALL){
99        printf("%6.4f",points[t].c[d]);
100        if(d == 1){
101          printf(")\n");
102        } else {
103          printf(",");
104        }
105      }
106    }
107  }
108 
109  /*
110   * Now we sort the points using gsl_heapsort and a comparator
111   * on the first coordinate only.  Don't know how to get rid
112   * of the gcc prototype warning.  Probably need a cast of some
113   * sort.
114   */
115  gsl_heapsort(points,test[0]->tsamples,sizeof(dTuple),
116                     (gsl_comparison_fn_t) compare_points);
117 
118  if(verbose == D_DIEHARD_2DSPHERE || verbose == D_ALL){
119    printf("List of points sorted by first coordinate:\n");
120    for(t=0;t<test[0]->tsamples;t++){
121      printf("points[%u]: (",t);
122      for(d=0;d<2;d++) {
123        printf("%6.4f",points[t].c[d]);
124        if(d == 1){
125          printf(")\n");
126        } else {
127          printf(",");
128        }
129      }
130    }
131  }
132 
133  /*
134   * Now we do the SINGLE PASS through to determine mindist
135   */
136  mindist = 10000.0;
137  for(i=0;i<test[0]->tsamples;i++){
138    /*
139     * One thing to experiment with here (very much) is
140     * whether or not we need periodic wraparound.  For
141     * the moment we omit it, although distributing
142     * the points on a euclidean d-torus seems more symmetric
143     * than not and checks to be sure that points are correct
144     * on or very near a boundary.
145     */
146    for(j=i+1;j<test[0]->tsamples;j++){
147      if(points[j].c[0] - points[i].c[0] > mindist) break;
148      dist = distance(points[j],points[i],2);
149      MYDEBUG(D_DIEHARD_2DSPHERE) {
150        printf("d(%d,%d) = %16.10e\n",i,j,dist);
151      }
152      if( dist < mindist) mindist = dist;
153    }
154  }
155  MYDEBUG(D_DIEHARD_2DSPHERE) {
156    printf("Found minimum distance = %16.10e\n",mindist);
157  }
158 
159  /*
160   * This form should be "bad", but I'm finding the badness
161   * difficult to demonstrate for presumed good rngs.  One
162   * does wonder how large an effect Fischler's corrections
163   * are for n = 8000.  I'm guessing that they should be
164   * more important for smaller n, but that will be difficult
165   * to try without converting this further into a scale-free
166   * form (that is, just like rgb_minimum_distance() but without
167   * the qarg correction piece.  Hmmm, I could do that by hacking
168   * its value to 1.0 in rgb_minimum_distance now, couldn't I?
169   */
170  test[0]->pvalues[irun] = 1.0 - exp(-mindist*mindist/0.995);
171 
172  free(points);
173 
174  MYDEBUG(D_DIEHARD_2DSPHERE) {
175    printf("# diehard_2dsphere(): test[0]->pvalues[%u] = %10.5f\n",irun,test[0]->pvalues[irun]);
176  }
177 
178  return(0);
179 
180 }
181 
182