1 /* Portable, threadsafe Mersenne Twister random number generator
2  *
3  *  1. The ESL_RANDOMNESS object.
4  *  2. The generators, esl_random().
5  *  3. Debugging/development tools.
6  *  4. Other fundamental sampling (including Gaussian, gamma).
7  *  5. Multinomial sampling from discrete probability n-vectors.
8  *  6. Random data generators (unit testing, etc.)
9  *  7. Benchmark driver
10  *  8. Unit tests.
11  *  9. Test driver.
12  * 10. Example.
13  *
14  * NIST test suite for validating random number generators:
15  * https://csrc.nist.gov/projects/random-bit-generation/documentation-and-software
16  *
17  * It'd be nice if we had a debugging/unit testing mode in which
18  * esl_random() deliberately generated extreme values, such as 0 for
19  * example. Routines that use esl_random() can be sensitive to whether
20  * the interval 0,1 is open or closed. We should be able to test for
21  * problems with interval endpoints without taking enormous numbers of
22  * samples.
23  *
24  * Competitive alternatives to MT exist, including PCG [http://www.pcg-random.org/].
25  */
26 #include "esl_config.h"
27 
28 #include <stdlib.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <math.h>
34 #include <time.h>
35 #ifdef HAVE_UNISTD_H
36 #include <unistd.h>
37 #endif
38 
39 #include "easel.h"
40 #include "esl_random.h"
41 
42 static uint32_t choose_arbitrary_seed(void);
43 static uint32_t knuth              (ESL_RANDOMNESS *r);
44 static uint32_t mersenne_twister   (ESL_RANDOMNESS *r);
45 static void     mersenne_seed_table(ESL_RANDOMNESS *r, uint32_t seed);
46 static void     mersenne_fill_table(ESL_RANDOMNESS *r);
47 
48 /*****************************************************************
49  *# 1. The <ESL_RANDOMNESS> object.
50  *****************************************************************/
51 
52 /* Function:  esl_randomness_Create()
53  * Synopsis:  Create the default strong random number generator.
54  *
55  * Purpose:   Create a random number generator using
56  *            a given random seed.
57  *
58  *            The default random number generator uses the Mersenne
59  *            Twister MT19937 algorithm \citep{Matsumoto98}.  It has a
60  *            period of $2^{19937}-1$, and equidistribution over
61  *            $2^{32}$ values.
62  *
63  *            If <seed> is $>0$, the random number generator is
64  *            reproducibly initialized with that seed.  Two RNGs
65  *            created with the same nonzero seed will give exactly the
66  *            same stream of pseudorandom numbers. This allows you to
67  *            make reproducible stochastic simulations, for example.
68  *
69  *            If <seed> is 0, an arbitrary seed is chosen.
70  *            Internally, this comes from hashing together the current
71  *            <time()>, <clock()>, and process id.  Two RNGs created
72  *            with <seed>=0 probably (but not assuredly) give
73  *            different streams of pseudorandom numbers. The true seed
74  *            can be retrieved from the <ESL_RANDOMNESS> object using
75  *            <esl_randomness_GetSeed()>.  The strategy used for
76  *            choosing the arbitrary seed is predictable; it should
77  *            not be considered cryptographically secure.
78  *
79  * Args:      seed $>= 0$.
80  *
81  * Returns:   an initialized <ESL_RANDOMNESS *> on success.
82  *            Caller free's with <esl_randomness_Destroy()>.
83  *
84  * Throws:    <NULL> on failure.
85  *
86  * Xref:      SRE:STL8/p57.
87  *            SRE:J5/21:    Mersenne Twister.
88  */
89 ESL_RANDOMNESS *
esl_randomness_Create(uint32_t seed)90 esl_randomness_Create(uint32_t seed)
91 {
92   ESL_RANDOMNESS *r      = NULL;
93   int             status;
94 
95   ESL_ALLOC(r, sizeof(ESL_RANDOMNESS));
96   r->type = eslRND_MERSENNE;
97   r->mti  = 0;
98   r->x    = 0;
99   r->seed = 0;
100   esl_randomness_Init(r, seed);
101   return r;
102 
103  ERROR:
104   return NULL;
105 }
106 
107 /* Function:  esl_randomness_CreateFast()
108  * Synopsis:  Create the alternative fast generator.
109  *
110  * THIS FUNCTION IS DEPRECATED. USE esl_randomness_Create() INSTEAD.
111  *
112  * Purpose:   Same as <esl_randomness_Create()>, except that a simple
113  *            linear congruential generator (LCG) will be used.
114  *            This is a $(a=69069, c=1)$ LCG, with a period of
115  *            $2^{32}$.
116  *
117  *            This is a low quality RNG. It fails several standard
118  *            NIST statistical tests. Successive samples from an LCG
119  *            are correlated, and it has a relatively short period. IT
120  *            SHOULD NOT BE USED (period). It is present in Easel for
121  *            legacy reasons. It used to be substantially faster than
122  *            our high quality RNG, but now our default Mersenne
123  *            Twister is plenty fast. We have regression and unit
124  *            tests that use the LCG and depend on a fixed seed and a
125  *            particular pseudorandom number sequence; they would have
126  *            to be re-studied carefully to upgrade them to a
127  *            different RNG.
128  *
129  *            Here's an example of how serial correlation arises in an
130  *            LCG, and how it can lead to serious (and difficult to
131  *            diagnose) failure in a Monte Carlo simulation.  An LCG
132  *            calculates $x_{i+1} = ax_i + c$. Suppose $x_i$ is small:
133  *            in the range 0..6000, say, as a specific example. Now
134  *            $x_{i+1}$ cannot be larger than 4.1e8, for an LCG with
135  *            $a=69069$,$c=1$. So if you take a sample and test
136  *            whether it is $< 1e-6$ (say), the next sample will be in
137  *            a range of about 0..0.1, rather than being uniform on
138  *            0..1.
139  *
140  * Args:      seed $>= 0$.
141  *
142  * Returns:   an initialized <ESL_RANDOMNESS *> on success.
143  *            Caller free's with <esl_randomness_Destroy()>.
144  *
145  * Throws:    <NULL> on failure.
146  *
147  * Xref:      SRE:J5/44: for accidental proof that period is indeed 2^32.
148  */
149 ESL_RANDOMNESS *
esl_randomness_CreateFast(uint32_t seed)150 esl_randomness_CreateFast(uint32_t seed)
151 {
152   ESL_RANDOMNESS *r      = NULL;
153   int             status;
154 
155   ESL_ALLOC(r, sizeof(ESL_RANDOMNESS));
156   r->type = eslRND_FAST;
157   r->mti  = 0;
158   r->x    = 0;
159   r->seed = 0;
160   esl_randomness_Init(r, seed);
161   return r;
162 
163  ERROR:
164   return NULL;
165 }
166 
167 
168 /* Function:  esl_randomness_CreateTimeseeded()
169  * Synopsis:  Create an RNG with a quasirandom seed.
170  *
171  * THIS FUNCTION IS DEPRECATED. USE esl_randomness_Create(0).
172  *
173  * Purpose:   Like <esl_randomness_Create()>, but it initializes the
174  *            the random number generator using a POSIX <time()> call
175  *            (number of seconds since the POSIX epoch).
176  *
177  *            This function is deprecated. Use
178  *            <esl_randomness_Create(0)> instead.
179  *
180  * Returns:   an initialized <ESL_RANDOMNESS *> on success.
181  *            Caller free's with <esl_randomness_Destroy()>.
182  *
183  * Throws:    <NULL> on failure.
184  *
185  * Xref:      SRE:STL8/p57.
186  */
187 ESL_RANDOMNESS *
esl_randomness_CreateTimeseeded(void)188 esl_randomness_CreateTimeseeded(void)
189 {
190   return esl_randomness_Create(0);
191 }
192 
193 
194 /* Function:  esl_randomness_Init()
195  * Synopsis:  Reinitialize a RNG.
196  *
197  * Purpose:   Reset and reinitialize an existing <ESL_RANDOMNESS>
198  *            object with a new seed.
199  *
200  *            Sometimes it's useful to reseed an RNG to guarantee a
201  *            particular reproducible series of pseudorandom numbers
202  *            at an arbitrary point in a program; HMMER does this, for
203  *            example, to guarantee the same results from the same
204  *            HMM/sequence comparison regardless of where in a search
205  *            the HMM or sequence occurs.
206  *
207  * Args:      r     - randomness object
208  *            seed  - new seed to use; >=0.
209  *
210  * Returns:   <eslOK> on success.
211  *
212  * Xref:      SRE:STL8/p57.
213  */
214 int
esl_randomness_Init(ESL_RANDOMNESS * r,uint32_t seed)215 esl_randomness_Init(ESL_RANDOMNESS *r, uint32_t seed)
216 {
217   if (seed == 0) seed = choose_arbitrary_seed();
218   if (r->type == eslRND_MERSENNE)
219     {
220       mersenne_seed_table(r, seed);
221       mersenne_fill_table(r);
222     }
223   else
224     {
225       r->seed = seed;
226       r->x    = esl_rnd_mix3(seed, 87654321, 12345678);	/* arbitrary dispersion of the seed */
227       if (r->x == 0) r->x = 42;                         /* make sure we don't have a zero */
228     }
229   return eslOK;
230 }
231 
232 /* Function:  esl_randomness_GetSeed()
233  * Synopsis:  Returns the value of RNG's seed.
234  *
235  * Purpose:   Return the value of the seed.
236  */
237 uint32_t
esl_randomness_GetSeed(const ESL_RANDOMNESS * r)238 esl_randomness_GetSeed(const ESL_RANDOMNESS *r)
239 {
240   return r->seed;
241 }
242 
243 
244 /* Function:  esl_randomness_Destroy()
245  * Synopsis:  Free an RNG.
246  *
247  * Purpose:   Frees an <ESL_RANDOMNESS> object.
248  */
249 void
esl_randomness_Destroy(ESL_RANDOMNESS * r)250 esl_randomness_Destroy(ESL_RANDOMNESS *r)
251 {
252   free(r);
253   return;
254 }
255 
256 /*----------- end of ESL_RANDOMNESS object functions --------------*/
257 
258 
259 
260 /*****************************************************************
261  *# 2. The generators and <esl_random()>
262  *****************************************************************/
263 
264 /* Function: esl_random()
265  * Synopsis: Generate a uniform random deviate on [0,1)
266  *
267  * Purpose:  Returns a uniform deviate x, $0.0 \leq x < 1.0$, given
268  *           RNG <r>.
269  *
270  *           If you cast the return value to float, the [0,1) interval
271  *           guarantee is lost because values close to 1 will round to
272  *           1.0.
273  *
274  * Returns:  a uniformly distribute random deviate on interval
275  *           $0.0 \leq x < 1.0$.
276  */
277 double
esl_random(ESL_RANDOMNESS * r)278 esl_random(ESL_RANDOMNESS *r)
279 {
280   uint32_t x = (r->type == eslRND_MERSENNE) ? mersenne_twister(r) : knuth(r);
281   return ((double) x / 4294967296.0);    // 2^32: [0,1).  Original MT code has * (1.0/ 4294967296.0), which I believe (and tested) to be identical.
282 }
283 
284 
285 /* Function:  esl_random_uint32()
286  * Synopsis:  Generate a uniform random deviate on 0..2^32-1
287  * Incept:    SRE, Wed Jan 13 10:59:26 2016
288  *
289  * Purpose:   Returns a uniform deviate x, a 32-bit unsigned
290  *            integer $0 \leq x < 2^{32}$, given RNG <r>.
291  */
292 uint32_t
esl_random_uint32(ESL_RANDOMNESS * r)293 esl_random_uint32(ESL_RANDOMNESS *r)
294 {
295   return (r->type == eslRND_MERSENNE) ? mersenne_twister(r) : knuth(r);
296 }
297 
298 
299 static uint32_t
knuth(ESL_RANDOMNESS * r)300 knuth(ESL_RANDOMNESS *r)
301 {
302   r->x *= 69069;
303   r->x += 1;
304   return r->x;
305 }
306 
307 /* mersenne_twister() and other mersenne_*() functions below:
308  * A simple serial implementation of the original Mersenne Twister
309  * algorithm [Matsumoto98].
310  *
311  * There are more sophisticated and faster implementations of MT, using
312  * vector instructions and/or directly generating IEEE754 doubles
313  * bitwise rather than doing an expensive normalization. We can
314  * improve the implementation later if necessary, but even the basic
315  * MT offers ~10x speed improvement over Easel's previous RNG.
316  * [SRE, 30 May 09, Stockholm]
317  */
318 static uint32_t
mersenne_twister(ESL_RANDOMNESS * r)319 mersenne_twister(ESL_RANDOMNESS *r)
320 {
321   uint32_t x;
322   if (r->mti >= 624) mersenne_fill_table(r);
323 
324   x = r->mt[r->mti++];
325   x ^= (x >> 11);
326   x ^= (x <<  7) & 0x9d2c5680;
327   x ^= (x << 15) & 0xefc60000;
328   x ^= (x >> 18);
329   return x;
330 }
331 
332 /* mersenne_seed_table()
333  * Initialize the state of the RNG from a seed, using a Knuth LCG.
334  *
335  * TODO: In January 2002, Nishimura and Matsumoto replaced this with a
336  * better initialization. (Why did I use their ~1999 initialization
337  * when I adapted MT in June 2009?) They say that the problem with
338  * this version is that the seed's most significant bit has little
339  * effect on the MT state vector. Upgrading this will require effort
340  * though: any change to the output of our RNG will break numerous
341  * unit tests and regressions that use fixed RNG seeds to get
342  * reproducible streams.
343  */
344 static void
mersenne_seed_table(ESL_RANDOMNESS * r,uint32_t seed)345 mersenne_seed_table(ESL_RANDOMNESS *r, uint32_t seed)
346 {
347   int z;
348 
349   r->seed  = seed;
350   r->mt[0] = seed;
351   for (z = 1; z < 624; z++)
352     r->mt[z] = 69069 * r->mt[z-1];
353   return;
354 }
355 
356 /* mersenne_fill_table()
357  * Refill the table with 624 new random numbers.
358  * We do this whenever we've reseeded, or when we
359  * run out of numbers.
360  */
361 static void
mersenne_fill_table(ESL_RANDOMNESS * r)362 mersenne_fill_table(ESL_RANDOMNESS *r)
363 {
364   uint32_t y;
365   int      z;
366   static uint32_t mag01[2] = { 0x0, 0x9908b0df };
367 
368   for (z = 0; z < 227; z++)	/* 227 = N-M = 624-397 */
369     {
370       y = (r->mt[z] & 0x80000000) | (r->mt[z+1] & 0x7fffffff);
371       r->mt[z] = r->mt[z+397] ^ (y>>1) ^ mag01[(int)(y & 0x1)]; /* yes, the (int) cast is necessary; xref bug #e7; some compilers may try to cast y to signed int otherwise, to use it in an array index */
372     }
373   for (; z < 623; z++)
374     {
375       y = (r->mt[z] & 0x80000000) | (r->mt[z+1] & 0x7fffffff);
376       r->mt[z] = r->mt[z-227] ^ (y>>1) ^ mag01[(int)(y & 0x1)];
377     }
378   y = (r->mt[623] & 0x80000000) | (r->mt[0] & 0x7fffffff);
379   r->mt[623] = r->mt[396] ^ (y>>1) ^ mag01[(int)(y & 0x1)];
380   r->mti = 0;
381 
382   return;
383 }
384 
385 
386 /* choose_arbitrary_seed()
387  * Return a 'quasirandom' seed > 0, concocted by hashing time(),
388  * clock(), and getpid() together.
389  * See RFC1750 for a discussion of securely seeding RNGs.
390  */
391 static uint32_t
choose_arbitrary_seed(void)392 choose_arbitrary_seed(void)
393 {
394   uint32_t a = (uint32_t) time ((time_t *) NULL);
395   uint32_t b = 87654321;	                    // we'll use getpid() below, if we can
396   uint32_t c = (uint32_t) clock();                  // clock() gives time since process invocation, in msec at least, if not usec
397   uint32_t seed;
398 #ifdef HAVE_GETPID
399   b  = (uint32_t) getpid();	                    // preferable b choice, if we have POSIX getpid()
400 #endif
401   seed = esl_rnd_mix3(a,b,c);	                    // try to decorrelate closely spaced choices of pid/times
402   return (seed == 0) ? 42 : seed;                   // 42 is arbitrary, just to avoid seed==0.
403 }
404 
405 /* Function:  esl_rnd_mix3()
406  * Synopsis:  Make a quasirandom number by mixing three inputs.
407  * Incept:    SRE, Tue 21 Aug 2018
408  *
409  * Purpose:   This is Bob Jenkin's <mix()>. Given <a,b,c>,
410  *            generate a number that's generated reasonably
411  *            uniformly on $[0,2^{32}-1]$ even for closely
412  *            spaced choices of $a,b,c$.
413  */
414 uint32_t
esl_rnd_mix3(uint32_t a,uint32_t b,uint32_t c)415 esl_rnd_mix3(uint32_t a, uint32_t b, uint32_t c)
416 {
417   a -= b; a -= c; a ^= (c>>13);
418   b -= c; b -= a; b ^= (a<<8);
419   c -= a; c -= b; c ^= (b>>13);
420   a -= b; a -= c; a ^= (c>>12);
421   b -= c; b -= a; b ^= (a<<16);
422   c -= a; c -= b; c ^= (b>>5);
423   a -= b; a -= c; a ^= (c>>3);
424   b -= c; b -= a; b ^= (a<<10);
425   c -= a; c -= b; c ^= (b>>15);
426   return c;
427 }
428 /*----------- end of esl_random() --------------*/
429 
430 
431 
432 /*****************************************************************
433  *# 3. Debugging and development tools
434  *****************************************************************/
435 
436 /* Function:  esl_randomness_Dump()
437  * Synopsis:  Dump ESL_RANDOMNESS object to stream, for debugging/examination.
438  */
439 int
esl_randomness_Dump(FILE * fp,ESL_RANDOMNESS * r)440 esl_randomness_Dump(FILE *fp, ESL_RANDOMNESS *r)
441 {
442   if (r->type == eslRND_FAST)
443     {
444       fputs      ("type  = knuth\n", fp );
445       fprintf(fp, "state = %" PRIu32 "\n", r->x);
446       fprintf(fp, "seed  = %" PRIu32 "\n", r->seed);
447     }
448   else if (r->type == eslRND_MERSENNE)
449     {
450       int i,j;
451 
452       fputs      ("type    = mersenne twister\n", fp );
453       fprintf(fp, "mti     = %d (0..623)\n", r->mti);
454       fprintf(fp, "mt[mti] = %" PRIu32 "\n", r->mt[r->mti]);
455 
456       fprintf(fp, "%6d: ", 0);
457       for (i = 0, j=0; i < 624; i++)
458 	{
459 	  fprintf(fp, "%11" PRIu32 " ", r->mt[i]);
460 	  if (++j == 20) { fprintf(fp, "\n%6d: ", i+1); j=0; }
461 	}
462       fputs("\n", fp);
463     }
464   return eslOK;
465 }
466 /*----------- end, debugging/development tools ------------------*/
467 
468 
469 /*****************************************************************
470  *# 4. Other fundamental sampling (including Gaussian, gamma)
471  *****************************************************************/
472 
473 /* Function: esl_rnd_UniformPositive()
474  * Synopsis: Generate a uniform positive random deviate on interval (0,1).
475  *
476  * Purpose:  Same as <esl_random()>, but assure $0 < x < 1$;
477  *           (positive uniform deviate).
478  */
479 double
esl_rnd_UniformPositive(ESL_RANDOMNESS * r)480 esl_rnd_UniformPositive(ESL_RANDOMNESS *r)
481 {
482   double x;
483   do { x = esl_random(r); } while (x == 0.0);
484   return x;
485 }
486 
487 
488 /* Function:  esl_rnd_Gaussian()
489  * Synopsis:  Generate a Gaussian-distributed sample.
490  *
491  * Purpose:   Pick a Gaussian-distributed random variable
492  *            with a given <mean> and standard deviation <stddev>, and
493  *            return it.
494  *
495  *            Implementation is derived from the public domain
496  *            RANLIB.c <gennor()> function, written by Barry W. Brown
497  *            and James Lovato (M.D. Anderson Cancer Center, Texas
498  *            USA) using the method described in
499  *            \citep{AhrensDieter73}.
500  *
501  *            Original algorithm said to use uniform deviates on [0,1)
502  *            interval (i.e. <esl_random()>), but this appears to be
503  *            wrong.  Use uniform deviates on (0,1) interval instead
504  *            (i.e., <esl_rnd_UniformPositive()>). RANLIB, GNU Octave
505  *            have made this alteration, possibly inadvertently.
506  *            [xref cryptogenomicon post, 13 Oct 2014].
507  *
508  * Method:    Impenetrability of the code is to be blamed on
509  *            FORTRAN/f2c lineage.
510  *
511  * Args:      r      - ESL_RANDOMNESS object
512  *            mean   - mean of the Gaussian we're sampling from
513  *            stddev - standard deviation of the Gaussian
514  */
515 double
esl_rnd_Gaussian(ESL_RANDOMNESS * r,double mean,double stddev)516 esl_rnd_Gaussian(ESL_RANDOMNESS *r, double mean, double stddev)
517 {
518   long   i;
519   double snorm,u,s,ustar,aa,w,y,tt;
520 
521   /* These static's are threadsafe: they are magic constants
522    * we will not touch.
523    */
524   static double a[32] = {
525     0.0,3.917609E-2,7.841241E-2,0.11777,0.1573107,0.1970991,0.2372021,0.2776904,
526     0.3186394,0.36013,0.4022501,0.4450965,0.4887764,0.5334097,0.5791322,
527     0.626099,0.6744898,0.7245144,0.7764218,0.8305109,0.8871466,0.9467818,
528     1.00999,1.077516,1.150349,1.229859,1.318011,1.417797,1.534121,1.67594,
529     1.862732,2.153875
530   };
531   static double d[31] = {
532     0.0,0.0,0.0,0.0,0.0,0.2636843,0.2425085,0.2255674,0.2116342,0.1999243,
533     0.1899108,0.1812252,0.1736014,0.1668419,0.1607967,0.1553497,0.1504094,
534     0.1459026,0.14177,0.1379632,0.1344418,0.1311722,0.128126,0.1252791,
535     0.1226109,0.1201036,0.1177417,0.1155119,0.1134023,0.1114027,0.1095039
536   };
537   static double t[31] = {
538     7.673828E-4,2.30687E-3,3.860618E-3,5.438454E-3,7.0507E-3,8.708396E-3,
539     1.042357E-2,1.220953E-2,1.408125E-2,1.605579E-2,1.81529E-2,2.039573E-2,
540     2.281177E-2,2.543407E-2,2.830296E-2,3.146822E-2,3.499233E-2,3.895483E-2,
541     4.345878E-2,4.864035E-2,5.468334E-2,6.184222E-2,7.047983E-2,8.113195E-2,
542     9.462444E-2,0.1123001,0.136498,0.1716886,0.2276241,0.330498,0.5847031
543   };
544   static double h[31] = {
545     3.920617E-2,3.932705E-2,3.951E-2,3.975703E-2,4.007093E-2,4.045533E-2,
546     4.091481E-2,4.145507E-2,4.208311E-2,4.280748E-2,4.363863E-2,4.458932E-2,
547     4.567523E-2,4.691571E-2,4.833487E-2,4.996298E-2,5.183859E-2,5.401138E-2,
548     5.654656E-2,5.95313E-2,6.308489E-2,6.737503E-2,7.264544E-2,7.926471E-2,
549     8.781922E-2,9.930398E-2,0.11556,0.1404344,0.1836142,0.2790016,0.7010474
550   };
551 
552   u = esl_rnd_UniformPositive(r);
553   s = 0.0;
554   if(u > 0.5) s = 1.0;
555   u += (u-s);
556   u = 32.0*u;
557   i = (long) (u);
558   if(i == 32) i = 31;
559   if(i == 0) goto S100;
560   /*
561    * START CENTER
562    */
563   ustar = u-(double)i;
564   aa = a[i-1];
565 S40:
566   if (ustar <= t[i-1]) goto S60;
567   w = (ustar - t[i-1]) * h[i-1];
568 S50:
569   /*
570    * EXIT   (BOTH CASES)
571    */
572   y = aa+w;
573   snorm = y;
574   if(s == 1.0) snorm = -y;
575   return (stddev*snorm + mean);
576 S60:
577   /*
578    * CENTER CONTINUED
579    */
580   u = esl_rnd_UniformPositive(r);
581   w = u*(a[i]-aa);
582   tt = (0.5*w+aa)*w;
583   goto S80;
584 S70:
585   tt = u;
586   ustar = esl_rnd_UniformPositive(r);
587 S80:
588   if(ustar > tt) goto S50;
589   u = esl_rnd_UniformPositive(r);
590   if(ustar >= u) goto S70;
591   ustar = esl_rnd_UniformPositive(r);
592   goto S40;
593 S100:
594   /*
595    * START TAIL
596    */
597   i = 6;
598   aa = a[31];
599   goto S120;
600 S110:
601   aa += d[i-1];
602   i += 1;
603   ESL_DASSERT1(( i <= 31 ));
604 S120:
605   u += u;
606   if(u < 1.0) goto S110;
607   u -= 1.0;
608 S140:
609   w = u*d[i-1];
610   tt = (0.5*w+aa)*w;
611   goto S160;
612 S150:
613   tt = u;
614 S160:
615   ustar = esl_rnd_UniformPositive(r);
616   if(ustar > tt) goto S50;
617   u = esl_rnd_UniformPositive(r);
618   if(ustar >= u) goto S150;
619   u = esl_rnd_UniformPositive(r);
620   goto S140;
621 }
622 
623 
624 
625 /* subfunctions that esl_rnd_Gamma() is going to call:
626  */
627 static double
gamma_ahrens(ESL_RANDOMNESS * r,double a)628 gamma_ahrens(ESL_RANDOMNESS *r, double a)	/* for a >= 3 */
629 {
630   double V;			/* uniform deviates */
631   double X,Y;
632   double test;
633 
634   do {
635     do {				/* generate candidate X */
636       Y = tan(eslCONST_PI * esl_random(r));
637       X = Y * sqrt(2.*a -1.) + a - 1.;
638     } while (X <= 0.);
639 				/* accept/reject X */
640     V    = esl_random(r);
641     test = (1+Y*Y) * exp( (a-1.)* log(X/(a-1.)) - Y*sqrt(2.*a-1.));
642   } while (V > test);
643   return X;
644 }
645 static double
gamma_integer(ESL_RANDOMNESS * r,unsigned int a)646 gamma_integer(ESL_RANDOMNESS *r, unsigned int a)	/* for small integer a, a < 12 */
647 {
648   int    i;
649   double U,X;
650 
651   U = 1.;
652   for (i = 0; i < a; i++)
653     U *= esl_rnd_UniformPositive(r);
654   X = -log(U);
655 
656   return X;
657 }
658 static double
gamma_fraction(ESL_RANDOMNESS * r,double a)659 gamma_fraction(ESL_RANDOMNESS *r, double a)	/* for fractional a, 0 < a < 1 */
660 {				/* Knuth 3.4.1, exercise 16, pp. 586-587 */
661   double p, U, V, X, q;
662 
663   p = eslCONST_E / (a + eslCONST_E);
664   do {
665     U = esl_random(r);
666     V = esl_rnd_UniformPositive(r);
667     if (U < p) {
668       X = pow(V, 1./a);
669       q = exp(-X);
670     } else {
671       X = 1. - log(V);
672       q = pow(X, a-1.);
673     }
674     U = esl_random(r);
675   } while (U >= q);
676   return X;
677 }
678 
679 
680 /* Function: esl_rnd_Gamma()
681  * Synopsis: Returns a random deviate from a Gamma(a, 1) distribution.
682  *
683  * Purpose:  Return a random deviate distributed as Gamma(a, 1.)
684  *           \citep[pp. 133--134]{Knu-81a}.
685  *
686  *           The implementation follows not only Knuth \citep{Knu-81a},
687  *           but also relied on examination of the implementation in
688  *           the GNU Scientific Library (libgsl) \citep{Galassi06}.
689  *
690  * Args:     r      - random number generation seed
691  *           a      - order of the gamma function; a > 0
692  */
693 double
esl_rnd_Gamma(ESL_RANDOMNESS * r,double a)694 esl_rnd_Gamma(ESL_RANDOMNESS *r, double a)
695 {
696   double aint;
697 
698   ESL_DASSERT1(( a > 0. ));
699 
700   aint = floor(a);
701   if (a == aint && a < 12.)
702     return gamma_integer(r, (unsigned int) a);
703   else if (a > 3.)
704     return gamma_ahrens(r, a);
705   else if (a < 1.)
706     return gamma_fraction(r, a);
707   else
708     return gamma_integer(r, aint) + gamma_fraction(r, a-aint);
709 }
710 
711 
712 /* Function:  esl_rnd_Dirichlet()
713  * Synopsis:  Sample a Dirichlet-distributed random probability vector
714  * Incept:    SRE, Wed Feb 17 12:20:53 2016 [H1/76]
715  *
716  * Purpose:   Using generator <rng>, sample a Dirichlet-distributed
717  *            probabilty vector <p> of <K> elements, using Dirichlet
718  *            parameters <alpha> (also of <K> elements).
719  *
720  *            Caller provides the allocated space for <p>.
721  *
722  *            <alpha> is optional. If it isn't provided (i.e. is
723  *            <NULL>), sample <p> uniformly. (That is, use <alpha[i] =
724  *            1.> for all i=0..K-1.)
725  *
726  *            This routine is redundant with <esl_dirichlet_DSample()>
727  *            and <esl_dirichlet_DSampleUniform()> in the dirichlet
728  *            module. Provided here because there's cases where we
729  *            just want to sample a probability vector without
730  *            introducing a dependency on all the stats/dirichlet code
731  *            in Easel.
732  *
733  * Args:      rng   : random number generator
734  *            alpha : OPTIONAL: Dirichlet parameters 0..K-1, or NULL to use alpha[i]=1 for all i
735  *            K     : number of elements in alpha, p
736  *            p     : RESULT: sampled probability vector
737  *
738  * Returns:   <eslOK> on success, and <p> is a sampled probability vector.
739  */
740 int
esl_rnd_Dirichlet(ESL_RANDOMNESS * rng,const double * alpha,int K,double * p)741 esl_rnd_Dirichlet(ESL_RANDOMNESS *rng, const double *alpha, int K, double *p)
742 {
743   int    i;
744   double norm = 0.;
745 
746   for (i = 0; i < K; i++)
747     {
748       p[i] = esl_rnd_Gamma(rng, (alpha ? alpha[i] : 1.0));
749       norm += p[i];
750     }
751   for (i = 0; i < K; i++)
752     p[i] /= norm;
753 
754   return eslOK;
755 }
756 
757 
758 /* Function:  esl_rnd_Deal()
759  * Synopsis:  Sequential random sample of <m> random integers in range <n>
760  * Incept:    SRE, Sun 17 Mar 2019
761  *
762  * Purpose:   Obtain a random sequential sample of <m> integers without
763  *            replacement from <n> possible ones, like dealing a hand
764  *            of m=5 cards from a deck of n=52 possible cards with
765  *            cards numbered <0..n-1>. Caller provides allocated space
766  *            <deal>, allocated for at least <m> elements. Return the
767  *            sample in <deal>, in sorted order (smallest to largest).
768  *
769  *            Uses the selection sampling algorithm [Knuth, 3.4.2],
770  *            which is O(n) time. For more impressive O(m)
771  *            performance, see <esl_rand64_Deal()>.
772  *
773  *            As a special case, if m = 0 (for whatever reason,
774  *            probably an edge-case-exercising unit test), do
775  *            nothing. <deal> is untouched, and can even be <NULL>.
776  *
777  * Args:      m    - number of integers to sample
778  *            n    - range: each sample is 0..n-1
779  *            deal - RESULT: allocated space for <m> sampled integers
780  *
781  * Returns:   <eslOK> on success, and the sample of <m> integers
782  *            is in <deal>.
783  *
784  * Throws:    (no abnormal error conditions)
785  */
786 int
esl_rnd_Deal(ESL_RANDOMNESS * rng,int m,int n,int * deal)787 esl_rnd_Deal(ESL_RANDOMNESS *rng, int m, int n, int *deal)
788 {
789   int i = 0;
790   int j = 0;
791 
792   ESL_DASSERT1(( m >= 0 ));  // m=0 is a special case where <deal> can be NULL.
793   ESL_DASSERT1(( n >= 1 ));
794   ESL_DASSERT1(( m <= n ));
795 
796   for (j = 0; j < n && i < m; j++)
797     if ( ((double) (n - j)) * esl_random(rng) < (double) (m - i)) deal[i++] = j;
798   return eslOK;
799 }
800 
801 
802 
803 /*****************************************************************
804  *# 5. Multinomial sampling from discrete probability n-vectors
805  *****************************************************************/
806 
807 /* Function:  esl_rnd_DChoose()
808  * Synopsis:  Return random choice from discrete multinomial distribution.
809  *
810  * Purpose:   Make a random choice from a normalized discrete
811  *            distribution <p> of <N> elements, where <p>
812  *            is double-precision. Returns the index of the
813  *            selected element, $0..N-1$.
814  *
815  *            <p> must be a normalized probability distribution
816  *            (i.e. must sum to one). Sampling distribution is
817  *            undefined otherwise: that is, a choice will always
818  *            be returned, but it might be an arbitrary one.
819  *
820  *            All $p_i$ must be $>$ <DBL_EPSILON> in order to
821  *            have a non-zero probability of being sampled.
822  *
823  *            <esl_rnd_FChoose()> is the same, but for floats in <p>,
824  *            and all $p_i$ must be $>$ <FLT_EPSILON>.
825  */
826 int
esl_rnd_DChoose(ESL_RANDOMNESS * r,const double * p,int N)827 esl_rnd_DChoose(ESL_RANDOMNESS *r, const double *p, int N)
828 {
829   double norm = 0.0;		/* ~ 1.0                  */
830   double sum  = 0.0;            /* integrated prob        */
831   double roll = esl_random(r);  /* random fraction        */
832   int    i;                     /* counter over the probs */
833 
834   /* we need to deal with finite roundoff error in p's sum */
835   for (i = 0; i < N; i++) norm += p[i];
836   ESL_DASSERT1((norm > 0.999 && norm < 1.001));
837 
838   for (i = 0; i < N; i++)
839     {
840       sum += p[i];
841       if (roll < (sum / norm) ) return i;
842     }
843   esl_fatal("unreached code was reached. universe collapses.");
844   return 0; /*notreached*/
845 }
846 int
esl_rnd_FChoose(ESL_RANDOMNESS * r,const float * p,int N)847 esl_rnd_FChoose(ESL_RANDOMNESS *r, const float *p, int N)
848 {
849   /* Computing in double precision is important:
850    * casting <roll> to (float) gives a [0,1] number instead of [0,1).
851    */
852   double norm = 0.0;		/* ~ 1.0                  */
853   double sum  = 0.0;            /* integrated prob        */
854   double roll = esl_random(r);  /* random fraction        */
855   int    i;                     /* counter over the probs */
856 
857   for (i = 0; i < N; i++) norm += p[i];
858   ESL_DASSERT1((norm > 0.99 && norm < 1.01));
859 
860   for (i = 0; i < N; i++)
861     {
862       sum += (double) p[i];
863       if (roll < (sum / norm) ) return i;
864     }
865   esl_fatal("unreached code was reached. universe collapses.");
866   return 0; /*notreached*/
867 }
868 
869 
870 /* Function:  esl_rnd_DChooseCDF()
871  * Synopsis:  Return random choice from cumulative multinomial distribution.
872  *
873  * Purpose:   Given a random number generator <r> and a cumulative
874  *            multinomial distribution <cdf[0..N-1]>, sample an element
875  *            <0..N-1> from that distribution. Return the index <0..N-1>.
876  *
877  *            Caller should be sure that <cdf[0..N-1]> is indeed a
878  *            cumulative multinomial distribution -- in particular, that
879  *            <cdf[N-1]> is tolerably close to 1.0 (within roundoff error).
880  *
881  *            When sampling many times from the same multinomial
882  *            distribution <p>, it will generally be faster to
883  *            calculate the CDF once using <esl_vec_DCDF(p, N, cdf)>,
884  *            then sampling many times from the CDF with
885  *            <esl_rnd_DChooseCDF(r, cdf, N)>, as opposed to calling
886  *            <esl_rnd_DChoose(r, p, N)> many times, because
887  *            <esl_rnd_DChoose()> has to calculated the CDF before
888  *            sampling. This also gives you a bit more control over
889  *            error detection: you can make sure that the CDF is ok (p
890  *            does sum to ~1.0) before doing a lot of sampling from
891  *            it.
892  *
893  *            <esl_rnd_FChooseCDF()> is the same, but for
894  *            a single-precision float <cdf>.
895  *
896  * Args:      r    - random number generator
897  *            cdf  - cumulative multinomial distribution, cdf[0..N-1]
898  *            N    - number of elements in <cdf>
899  *
900  * Returns:   index 0..N-1 of the randomly sampled choice from <cdf>.
901  *
902  * Note:      For large N, it might be advantageous to bisection search the
903  *            cdf. For typical N in Easel (up to 20, for amino acid
904  *            prob vectors, for example), the naive code below is
905  *            faster. We could revisit this if we start sampling
906  *            larger vectors.
907  */
908 int
esl_rnd_DChooseCDF(ESL_RANDOMNESS * r,const double * cdf,int N)909 esl_rnd_DChooseCDF(ESL_RANDOMNESS *r, const double *cdf, int N)
910 {
911   double roll = esl_random(r);	/* uniform 0.0 <= x < 1.0 */
912   int    i;
913 
914   ESL_DASSERT1((cdf[0] >= 0.0));
915   ESL_DASSERT1((cdf[N-1] > 0.999 && cdf[N-1] < 1.001));
916 
917   for (i = 0; i < N; i++)
918     if (roll < cdf[i] / cdf[N-1]) return i;
919   esl_fatal("unreached code is reached. universe goes foom");
920   return 0; /*notreached*/
921 }
922 int
esl_rnd_FChooseCDF(ESL_RANDOMNESS * r,const float * cdf,int N)923 esl_rnd_FChooseCDF(ESL_RANDOMNESS *r, const float *cdf, int N)
924 {
925   double roll = esl_random(r);	/* uniform 0.0 <= x < 1.0. must be double, not float, to guarantee x <1 */
926   int    i;
927 
928   ESL_DASSERT1((cdf[0] >= 0.0));
929   ESL_DASSERT1((cdf[N-1] > 0.99 && cdf[N-1] < 1.01));
930 
931   for (i = 0; i < N; i++)
932     if (roll < (double) cdf[i] / (double) cdf[N-1]) return i;  // yes, the casts are NECESSARY. Without them, you get a heisenbug on icc/linux.
933   esl_fatal("unreached code is reached. universe goes foom");
934   return 0; /*notreached*/
935 }
936 
937 
938 /*****************************************************************
939  * 6. Random data generators (unit testing, etc.)
940  *****************************************************************/
941 
942 
943 
944 /* Function:  esl_rnd_mem()
945  * Synopsis:  Overwrite a buffer with random garbage.
946  * Incept:    SRE, Fri Feb 19 08:53:07 2016
947  *
948  * Purpose:   Write <n> bytes of random garbage into buffer
949  *            <buf>, by uniform sampling of values 0..255,
950  *            using generator <rng>.
951  *
952  *            Used in unit tests that are reusing memory, and that
953  *            want to make sure that there's no favorable side effects
954  *            from that reuse.
955  */
956 int
esl_rnd_mem(ESL_RANDOMNESS * rng,void * buf,int n)957 esl_rnd_mem(ESL_RANDOMNESS *rng, void *buf, int n)
958 {
959   unsigned char *p = (unsigned char *) buf;
960   int            i;
961 
962   for (i = 0; i < n; i++)
963     p[i] = (unsigned char) esl_rnd_Roll(rng, 256);
964   return eslOK;
965 }
966 
967 
968 /* Function:  esl_rnd_floatstring()
969  * Synopsis:  Generate a string representation of a floating point number.
970  * Incept:    SRE, Wed 15 Aug 2018 [Hamilton, It's Quiet Uptown]
971  *
972  * Purpose:   Generate a string decimal representation of a random
973  *            floating point number in <s>, which is space provided
974  *            by the caller, allocated for at least 20 characters.
975  *
976  *            The string representation of the number consists of:
977  *              * an optional - sign  (50%)
978  *              * a 1-6 digit integer part; (1/7 '0'; else 1/7 for each len, [1-9][0-9]* uniformly)
979  *              * an optional fractional part (50%), consisting of:
980  *                 - '.'
981  *                 - a 1-7 digit fractional part; 1/7 for each len, [0-9]* uniformly
982  *              * an optional exponent (50%), consisting of:
983  *                 - 'e'
984  *                 -  -20..e20, 1/41 uniformly.
985  *              * '\0' terminator.
986  *
987  *            Maximum length is 1+6+1+7+4+1 = 20.
988  *
989  *            The string representation is an intersection of what's
990  *            valid for <strtod()>, JSON number values, and C. JSON
991  *            number values do not permit numbers like "1."  (a
992  *            decimal part, decimal point, no fractional part), nor
993  *            hexadecimal representations, nor leading or trailing
994  *            whitespace.
995  *
996  *            The generated number does not attempt to exercise
997  *            extreme values. The absolute magnitude of non-zero
998  *            numbers ranges from 0.0000001e-20..999999e20 (1e-27 to
999  *            ~1e27), well within IEEE754 FLT_MIN..FLT_MAX
1000  *            range. Special values "infinity" and "nan" are not
1001  *            generated.  Zero can be represented by many different
1002  *            patterns <-?0.0+(e-?..)?>.
1003  *
1004  * Returns:   <eslOK> on success.
1005  */
1006 int
esl_rnd_floatstring(ESL_RANDOMNESS * rng,char * s)1007 esl_rnd_floatstring(ESL_RANDOMNESS *rng, char *s)
1008 {
1009   int i = 0;
1010   int n;
1011   int exponent;
1012 
1013   if (esl_rnd_Roll(rng, 2)) s[i++] = '-';
1014   n = esl_rnd_Roll(rng, 7);                                      // length of integer part.
1015   s[i++] = (n-- == 0) ? '0' : '0' + (1 + esl_rnd_Roll(rng, 9));  // No 0 prefix except "0."
1016   while (n-- > 0) s[i++] = '0' + esl_rnd_Roll(rng, 10);
1017   if (esl_rnd_Roll(rng, 2))                                      // optional fractional part
1018     {
1019       n = 1 + esl_rnd_Roll(rng, 7);
1020       s[i++] = '.';
1021       while (n--) s[i++] = '0' + esl_rnd_Roll(rng, 10);
1022     }
1023   if (esl_rnd_Roll(rng, 2))                                      // optional exponent
1024     {
1025       s[i++] = 'e';
1026       exponent = -20 + esl_rnd_Roll(rng, 41);
1027       i += sprintf(s+i, "%d", exponent);
1028     }
1029   s[i++] = '\0';
1030   ESL_DASSERT1(( i <= 20 ));
1031   return eslOK;
1032 }
1033 
1034 
1035 
1036 
1037 /*****************************************************************
1038  * 7. Benchmark driver
1039  *****************************************************************/
1040 #ifdef eslRANDOM_BENCHMARK
1041 /*
1042    gcc -O3 -malign-double -o esl_random_benchmark -I. -L. -DeslRANDOM_BENCHMARK esl_random.c -leasel -lm
1043    ./esl_random_benchmark -N 1000000000
1044    ./esl_random_benchmark -f -N 1000000000
1045    ./esl_random_benchmark -r -N1000000
1046    ./esl_random_benchmark -fr -N 1000000000
1047                                esl_random()            esl_randomness_Init()
1048                            iter  cpu time  per call   iter  cpu time  per call
1049                            ----  --------  --------   ---- ---------- ---------
1050    27 Dec 08 on wanderoo:  1e7    0.78s    78 nsec     1e6   2.08s     2.1 usec   ran2() from NR
1051    30 May 09 on wanderoo:  1e9    8.39s     8 nsec     1e6   5.98s     6.0 usec   Mersenne Twister
1052                            1e9    5.73s     6 nsec     1e8   2.51s     0.03 usec  Knuth
1053    22 Aug 18 on wyvern:    1e9    3.31s     3 nsec     1e7   8.71s     0.8 usec   Mersenne Twister
1054 
1055  */
1056 #include "easel.h"
1057 #include "esl_composition.h"
1058 #include "esl_getopts.h"
1059 #include "esl_random.h"
1060 #include "esl_stopwatch.h"
1061 #include "esl_vectorops.h"
1062 
1063 static ESL_OPTIONS options[] = {
1064   /* name     type      default  env  range toggles reqs incomp  help                                       docgroup*/
1065   { "-h",  eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
1066   { "-c",  eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "benchmark DChooseCDF()",                           0 },
1067   { "-d",  eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "benchmark DChoose()",                              0 },
1068   { "-f",  eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "run fast version instead of MT19937",              0 },
1069   { "-r",  eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "benchmark _Init(), not just random()",             0 },
1070   { "-N",  eslARG_INT, "10000000",NULL, NULL,  NULL,  NULL, NULL, "number of trials",                                 0 },
1071   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
1072 };
1073 static char usage[]  = "[-options]";
1074 static char banner[] = "benchmarking speed of random number generator";
1075 
1076 int
main(int argc,char ** argv)1077 main(int argc, char **argv)
1078 {
1079   ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
1080   ESL_RANDOMNESS *r       = (esl_opt_GetBoolean(go, "-f") == TRUE ? esl_randomness_CreateFast(42) : esl_randomness_Create(42));
1081   ESL_STOPWATCH  *w       = esl_stopwatch_Create();
1082   int             N       = esl_opt_GetInteger(go, "-N");
1083   double          p[20];
1084   double          cdf[20];
1085 
1086   esl_composition_BL62(p);
1087   esl_vec_DCDF(p, 20, cdf);
1088 
1089   esl_stopwatch_Start(w);
1090   if      (esl_opt_GetBoolean(go, "-c")) { while (N--) esl_rnd_DChoose(r, p, 20);      }
1091   else if (esl_opt_GetBoolean(go, "-d")) { while (N--) esl_rnd_DChooseCDF(r, cdf, 20); }
1092   else if (esl_opt_GetBoolean(go, "-r")) { while (N--) esl_randomness_Init(r, 42);     }
1093   else                                   { while (N--) esl_random(r);                  }
1094 
1095   esl_stopwatch_Stop(w);
1096   esl_stopwatch_Display(stdout, w, "# CPU Time: ");
1097 
1098   esl_stopwatch_Destroy(w);
1099   esl_randomness_Destroy(r);
1100   esl_getopts_Destroy(go);
1101   return 0;
1102 }
1103 #endif /*eslRANDOM_BENCHMARK*/
1104 /*----------------- end, benchmark driver -----------------------*/
1105 
1106 
1107 
1108 
1109 /*****************************************************************
1110  * 8. Unit tests.
1111  *****************************************************************/
1112 
1113 #ifdef eslRANDOM_TESTDRIVE
1114 #include "esl_vectorops.h"
1115 #include "esl_stats.h"
1116 #include "esl_dirichlet.h"
1117 
1118 
1119 /* The esl_random() unit test:
1120  * a binned frequency test.
1121  */
1122 static void
utest_random(ESL_RANDOMNESS * r,int n,int nbins,int be_verbose)1123 utest_random(ESL_RANDOMNESS *r, int n, int nbins, int be_verbose)
1124 {
1125   char            msg[]  = "esl_random() unit test failed";
1126   int            *counts = NULL;
1127   double          X2p    = 0.;
1128   int             i;
1129   int             sample;
1130   double          X2, exp, diff;
1131 
1132   if ((counts = malloc(sizeof(int) * nbins)) == NULL) esl_fatal(msg);
1133   esl_vec_ISet(counts, nbins, 0);
1134 
1135   for (i = 0; i < n; i++)
1136     {
1137       sample = esl_rnd_Roll(r, nbins);
1138       if (sample < 0 || sample >= nbins) esl_fatal(msg);
1139       counts[sample]++;
1140     }
1141 
1142   /* X^2 value: \sum (o_i - e_i)^2 / e_i */
1143   for (X2 = 0., i = 0; i < nbins; i++) {
1144     exp  = (double) n / (double) nbins;
1145     diff = (double) counts[i] - exp;
1146     X2 +=  diff*diff/exp;
1147   }
1148   if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal(msg);
1149   if (be_verbose) printf("random():  \t%g\n", X2p);
1150   if (X2p < 0.01) esl_fatal(msg);
1151 
1152   free(counts);
1153   return;
1154 }
1155 
1156 /* The DChoose() and FChoose() unit tests.
1157  */
1158 static void
utest_choose(ESL_RANDOMNESS * r,int n,int nbins,int be_verbose)1159 utest_choose(ESL_RANDOMNESS *r, int n, int nbins, int be_verbose)
1160 {
1161   double *pd  = NULL;		/* probability vector, double */
1162   double *pdc = NULL;		/* CDF, double                */
1163   float  *pf  = NULL;		/* probability vector, float  */
1164   float  *pfc = NULL;		/* CDF, float                 */
1165   int    *ct  = NULL;
1166   int     i;
1167   double  X2, diff, exp, X2p;
1168 
1169   if ((pd  = malloc(sizeof(double) * nbins)) == NULL) esl_fatal("malloc failed");
1170   if ((pdc = malloc(sizeof(double) * nbins)) == NULL) esl_fatal("malloc failed");
1171   if ((pf  = malloc(sizeof(float)  * nbins)) == NULL) esl_fatal("malloc failed");
1172   if ((pfc = malloc(sizeof(float)  * nbins)) == NULL) esl_fatal("malloc failed");
1173   if ((ct  = malloc(sizeof(int)    * nbins)) == NULL) esl_fatal("malloc failed");
1174 
1175   /* Sample a random multinomial probability vector.  */
1176   if (esl_dirichlet_DSampleUniform(r, nbins, pd) != eslOK) esl_fatal("dirichlet sample failed");
1177   esl_vec_D2F(pd, nbins, pf);
1178 
1179   /* Test esl_rnd_DChoose():
1180    * sample observed counts, chi-squared test against expected
1181    */
1182   esl_vec_ISet(ct, nbins, 0);
1183   for (i = 0; i < n; i++)
1184     ct[esl_rnd_DChoose(r, pd, nbins)]++;
1185   for (X2 = 0., i=0; i < nbins; i++) {
1186     exp = (double) n * pd[i];
1187     diff = (double) ct[i] - exp;
1188     X2 += diff*diff/exp;
1189   }
1190   if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed");
1191   if (be_verbose) printf("DChoose():  \t%g\n", X2p);
1192   if (X2p < 0.01) esl_fatal("chi squared test failed");
1193 
1194   /* Repeat above for FChoose(). */
1195   esl_vec_ISet(ct, nbins, 0);
1196   for (i = 0; i < n; i++)
1197     ct[esl_rnd_FChoose(r, pf, nbins)]++;
1198   for (X2 = 0., i=0; i < nbins; i++) {
1199     exp = (double) n * pd[i];
1200     diff = (double) ct[i] - exp;
1201     X2 += diff*diff/exp;
1202   }
1203   if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed");
1204   if (be_verbose) printf("FChoose():  \t%g\n", X2p);
1205   if (X2p < 0.01) esl_fatal("chi squared test failed");
1206 
1207   /* esl_rnd_DChooseCDF(). */
1208   esl_vec_ISet(ct, nbins, 0);
1209   esl_vec_DCDF(pd, nbins, pdc);
1210   for (i = 0; i < n; i++)
1211     ct[esl_rnd_DChooseCDF(r, pdc, nbins)]++;
1212   for (X2 = 0., i=0; i < nbins; i++) {
1213     exp  = (double) n * pd[i];
1214     diff = (double) ct[i] - exp;
1215     X2 += diff*diff/exp;
1216   }
1217   if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed");
1218   if (be_verbose) printf("DChoose():  \t%g\n", X2p);
1219   if (X2p < 0.01) esl_fatal("chi squared test failed");
1220 
1221   /* esl_rnd_FChooseCDF() */
1222   esl_vec_ISet(ct, nbins, 0);
1223   esl_vec_FCDF(pf, nbins, pfc);
1224   for (i = 0; i < n; i++)
1225     ct[esl_rnd_FChooseCDF(r, pfc, nbins)]++;
1226   for (X2 = 0., i=0; i < nbins; i++) {
1227     exp  = (double) n * pf[i];
1228     diff = (double) ct[i] - exp;
1229     X2 += diff*diff/exp;
1230   }
1231   if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed");
1232   if (be_verbose) printf("DChoose():  \t%g\n", X2p);
1233   if (X2p < 0.01) esl_fatal("chi squared test failed");
1234 
1235   free(pd);
1236   free(pdc);
1237   free(pf);
1238   free(pfc);
1239   free(ct);
1240   return;
1241 }
1242 
1243 
1244 /* utest_Deal()
1245  * tests esl_rnd_Deal()
1246  *
1247  * If deals are random, each possible integer is sampled uniformly.
1248  * Take <nsamp> deals of <m> integers from possible <n>.
1249  * Expected count of each integer = nsamp * (m/n), +/- s.d. \sqrt(u)
1250  * Test that min, max are within +/- 6 sd.
1251  *
1252  * Can fail stochastically, so caller should default to an <rng>
1253  * with a fixed RNG seed.
1254  */
1255 static void
utest_Deal(ESL_RANDOMNESS * rng)1256 utest_Deal(ESL_RANDOMNESS *rng)
1257 {
1258   char msg[]           = "esl_random deal unit test failed";
1259   int    m             = 100;
1260   int    n             = 1000;
1261   int    nsamp         = 10000;
1262   int   *deal          = malloc(sizeof(int) * m);
1263   int   *ct            = malloc(sizeof(int) * n);
1264   double expected_mean = ((double) m / (double) n) * (double) nsamp;
1265   double expected_sd   = sqrt(expected_mean);
1266   int    max_allowed   = (int) round( expected_mean + 6. * expected_sd);
1267   int    min_allowed   = (int) round( expected_mean - 6. * expected_sd);
1268   int    i;
1269 
1270   if (deal == NULL || ct == NULL) esl_fatal(msg);
1271   esl_vec_ISet(ct, n, 0);
1272 
1273   while (nsamp--)
1274     {
1275       esl_rnd_Deal(rng, m, n, deal);
1276       for (i = 0; i < m; i++) ct[deal[i]]++;
1277     }
1278   if (esl_vec_IMax(ct, n) > max_allowed) esl_fatal(msg);
1279   if (esl_vec_IMin(ct, n) < min_allowed) esl_fatal(msg);
1280 
1281   free(deal);
1282   free(ct);
1283 }
1284 #endif /*eslRANDOM_TESTDRIVE*/
1285 /*-------------------- end, unit tests --------------------------*/
1286 
1287 
1288 /*****************************************************************
1289  * 9. Test driver.
1290  *****************************************************************/
1291 #ifdef eslRANDOM_TESTDRIVE
1292 #include "esl_config.h"
1293 
1294 #include <stdio.h>
1295 
1296 #include "easel.h"
1297 #include "esl_dirichlet.h"
1298 #include "esl_getopts.h"
1299 #include "esl_random.h"
1300 #include "esl_randomseq.h"
1301 #include "esl_vectorops.h"
1302 
1303 static ESL_OPTIONS options[] = {
1304   /* name  type         default  env   range togs  reqs  incomp  help                docgrp */
1305   {"-h",  eslARG_NONE,    FALSE, NULL, NULL, NULL, NULL, NULL, "show help and usage",               0},
1306   {"-b",  eslARG_INT,      "20", NULL, "n>0",NULL, NULL, NULL, "number of test bins",               0},
1307   {"-n",  eslARG_INT, "1000000", NULL, "n>0",NULL, NULL, NULL, "number of samples",                 0},
1308   {"-s",  eslARG_INT,      "42", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>",     0},
1309   {"-v",  eslARG_NONE,    FALSE, NULL, NULL, NULL, NULL, NULL, "show verbose output",               0},
1310   {"--mtbits",eslARG_STRING,NULL,NULL, NULL, NULL, NULL, NULL, "save MT bit file for NIST benchmark",0},
1311   {"--kbits", eslARG_STRING,NULL,NULL, NULL, NULL, NULL, NULL, "save Knuth bit file for NIST benchmark",0},
1312   { 0,0,0,0,0,0,0,0,0,0},
1313 };
1314 static char usage[]  = "[-options]";
1315 static char banner[] = "test driver for random module";
1316 
1317 static int save_bitfile(char *bitfile, ESL_RANDOMNESS *r, int n);
1318 
1319 int
main(int argc,char ** argv)1320 main(int argc, char **argv)
1321 {
1322   ESL_GETOPTS    *go         = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
1323   ESL_RANDOMNESS *r1         = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
1324   ESL_RANDOMNESS *r2         = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
1325   char           *mtbitfile  = esl_opt_GetString (go, "--mtbits");
1326   char           *kbitfile   = esl_opt_GetString (go, "--kbits");
1327   int             nbins      = esl_opt_GetInteger(go, "-b");
1328   int             n          = esl_opt_GetInteger(go, "-n");
1329   int             be_verbose = esl_opt_GetBoolean(go, "-v");
1330 
1331   fprintf(stderr, "## %s\n", argv[0]);
1332   fprintf(stderr, "#  rng seed 1 (slow) = %" PRIu32 "\n", esl_randomness_GetSeed(r1));
1333   fprintf(stderr, "#  rng seed 2 (fast) = %" PRIu32 "\n", esl_randomness_GetSeed(r2));
1334 
1335   utest_random(r1, n, nbins, be_verbose);
1336   utest_choose(r1, n, nbins, be_verbose);
1337   utest_random(r2, n, nbins, be_verbose);
1338   utest_choose(r2, n, nbins, be_verbose);
1339 
1340   utest_Deal(r1);
1341 
1342   if (mtbitfile) save_bitfile(mtbitfile, r1, n);
1343   if (kbitfile)  save_bitfile(kbitfile,  r2, n);
1344 
1345   fprintf(stderr, "#  status = ok\n");
1346 
1347   esl_randomness_Destroy(r1);
1348   esl_randomness_Destroy(r2);
1349   esl_getopts_Destroy(go);
1350   return 0;
1351 }
1352 
1353 static int
save_bitfile(char * bitfile,ESL_RANDOMNESS * r,int n)1354 save_bitfile(char *bitfile, ESL_RANDOMNESS *r, int n)
1355 {
1356   FILE *fp = NULL;
1357   int b,i;
1358   long x;
1359 
1360   /* Open the file.
1361    */
1362   if ((fp = fopen(bitfile, "w")) == NULL)
1363     esl_fatal("failed to open %s for writing", bitfile);
1364 
1365   /* Sample <n> random numbers, output 32n random bits to the file.
1366    */
1367   for (i = 0; i < n; i++)
1368     {
1369       x = (r->type == eslRND_FAST ? knuth(r) : mersenne_twister(r)); /* generate a 32 bit random variate by MT19937 */
1370       for (b = 0; b < 32; b++)
1371 	{
1372 	  if (x & 01) fprintf(fp, "1");
1373 	  else        fprintf(fp, "0");
1374 	  x >>= 1;
1375 	}
1376       fprintf(fp, "\n");
1377     }
1378   fclose(fp);
1379   return eslOK;
1380 }
1381 #endif /*eslRANDOM_TESTDRIVE*/
1382 
1383 
1384 
1385 /*****************************************************************
1386  * 10. Example.
1387  *****************************************************************/
1388 #ifdef eslRANDOM_EXAMPLE
1389 /*::cexcerpt::random_example::begin::*/
1390 /* compile: cc -I. -o esl_random_example -DeslRANDOM_EXAMPLE esl_random.c esl_getopts.c easel.c -lm
1391  * run:     ./random_example 42
1392  */
1393 #include <stdio.h>
1394 #include "easel.h"
1395 #include "esl_getopts.h"
1396 #include "esl_random.h"
1397 
1398 /* In Easel and HMMER, the option for setting the seed is typically -s, sometimes --seed.
1399  * Default is usually 0 because we want a "random" seed. Less commonly, "42" for a fixed seed;
1400  * rarely, a different fixed seed.
1401  *
1402  * Generally you want to use esl_randomness_Create(), to get a Mersenne Twister. The "fast"
1403  * RNG you get from esl_randomness_CreateFast() isn't all that much faster (~25% per sample)
1404  * but has much worse quality in its randomness.
1405  */
1406 static ESL_OPTIONS options[] = {
1407   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
1408   { "-h",        eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",  0 },
1409   { "-i",        eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "sample uint32's instead of doubles",    0 },
1410   { "-n",        eslARG_INT,     "20",  NULL, NULL,  NULL,  NULL, NULL, "number of random samples to show",      0 },
1411   { "-s",        eslARG_INT,      "0",  NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",         0 },
1412   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
1413 };
1414 static char usage[]  = "[-options]";
1415 static char banner[] = "example of using random module";
1416 
1417 int
main(int argc,char ** argv)1418 main(int argc, char **argv)
1419 {
1420   ESL_GETOPTS    *go        = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
1421   ESL_RANDOMNESS *rng       = esl_randomness_Create( esl_opt_GetInteger(go, "-s") );
1422   int             do_uint32 = esl_opt_GetBoolean(go, "-i");
1423   int             n         = esl_opt_GetInteger(go, "-n");
1424 
1425   printf("RNG seed: %" PRIu32 "\n", esl_randomness_GetSeed(rng));
1426   printf("\nA sequence of %d pseudorandom numbers:\n", n);
1427   if (do_uint32)  while (n--)  printf("%" PRIu32 "\n", esl_random_uint32(rng));
1428   else            while (n--)  printf("%f\n",          esl_random(rng));
1429 
1430   printf("\nInternal dump of RNG state:\n");
1431   esl_randomness_Dump(stdout, rng);
1432 
1433   esl_randomness_Destroy(rng);
1434   esl_getopts_Destroy(go);
1435   return 0;
1436 }
1437 /*::cexcerpt::random_example::end::*/
1438 #endif /*eslRANDOM_EXAMPLE*/
1439 
1440 
1441 
1442