1 /* Portable, threadsafe Mersenne Twister random number generator
2 *
3 * 1. The ESL_RANDOMNESS object.
4 * 2. The generators, esl_random().
5 * 3. Debugging/development tools.
6 * 4. Other fundamental sampling (including Gaussian, gamma).
7 * 5. Multinomial sampling from discrete probability n-vectors.
8 * 6. Random data generators (unit testing, etc.)
9 * 7. Benchmark driver
10 * 8. Unit tests.
11 * 9. Test driver.
12 * 10. Example.
13 *
14 * NIST test suite for validating random number generators:
15 * https://csrc.nist.gov/projects/random-bit-generation/documentation-and-software
16 *
17 * It'd be nice if we had a debugging/unit testing mode in which
18 * esl_random() deliberately generated extreme values, such as 0 for
19 * example. Routines that use esl_random() can be sensitive to whether
20 * the interval 0,1 is open or closed. We should be able to test for
21 * problems with interval endpoints without taking enormous numbers of
22 * samples.
23 *
24 * Competitive alternatives to MT exist, including PCG [http://www.pcg-random.org/].
25 */
26 #include "esl_config.h"
27
28 #include <stdlib.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <math.h>
34 #include <time.h>
35 #ifdef HAVE_UNISTD_H
36 #include <unistd.h>
37 #endif
38
39 #include "easel.h"
40 #include "esl_random.h"
41
42 static uint32_t choose_arbitrary_seed(void);
43 static uint32_t knuth (ESL_RANDOMNESS *r);
44 static uint32_t mersenne_twister (ESL_RANDOMNESS *r);
45 static void mersenne_seed_table(ESL_RANDOMNESS *r, uint32_t seed);
46 static void mersenne_fill_table(ESL_RANDOMNESS *r);
47
48 /*****************************************************************
49 *# 1. The <ESL_RANDOMNESS> object.
50 *****************************************************************/
51
52 /* Function: esl_randomness_Create()
53 * Synopsis: Create the default strong random number generator.
54 *
55 * Purpose: Create a random number generator using
56 * a given random seed.
57 *
58 * The default random number generator uses the Mersenne
59 * Twister MT19937 algorithm \citep{Matsumoto98}. It has a
60 * period of $2^{19937}-1$, and equidistribution over
61 * $2^{32}$ values.
62 *
63 * If <seed> is $>0$, the random number generator is
64 * reproducibly initialized with that seed. Two RNGs
65 * created with the same nonzero seed will give exactly the
66 * same stream of pseudorandom numbers. This allows you to
67 * make reproducible stochastic simulations, for example.
68 *
69 * If <seed> is 0, an arbitrary seed is chosen.
70 * Internally, this comes from hashing together the current
71 * <time()>, <clock()>, and process id. Two RNGs created
72 * with <seed>=0 probably (but not assuredly) give
73 * different streams of pseudorandom numbers. The true seed
74 * can be retrieved from the <ESL_RANDOMNESS> object using
75 * <esl_randomness_GetSeed()>. The strategy used for
76 * choosing the arbitrary seed is predictable; it should
77 * not be considered cryptographically secure.
78 *
79 * Args: seed $>= 0$.
80 *
81 * Returns: an initialized <ESL_RANDOMNESS *> on success.
82 * Caller free's with <esl_randomness_Destroy()>.
83 *
84 * Throws: <NULL> on failure.
85 *
86 * Xref: SRE:STL8/p57.
87 * SRE:J5/21: Mersenne Twister.
88 */
89 ESL_RANDOMNESS *
esl_randomness_Create(uint32_t seed)90 esl_randomness_Create(uint32_t seed)
91 {
92 ESL_RANDOMNESS *r = NULL;
93 int status;
94
95 ESL_ALLOC(r, sizeof(ESL_RANDOMNESS));
96 r->type = eslRND_MERSENNE;
97 r->mti = 0;
98 r->x = 0;
99 r->seed = 0;
100 esl_randomness_Init(r, seed);
101 return r;
102
103 ERROR:
104 return NULL;
105 }
106
107 /* Function: esl_randomness_CreateFast()
108 * Synopsis: Create the alternative fast generator.
109 *
110 * THIS FUNCTION IS DEPRECATED. USE esl_randomness_Create() INSTEAD.
111 *
112 * Purpose: Same as <esl_randomness_Create()>, except that a simple
113 * linear congruential generator (LCG) will be used.
114 * This is a $(a=69069, c=1)$ LCG, with a period of
115 * $2^{32}$.
116 *
117 * This is a low quality RNG. It fails several standard
118 * NIST statistical tests. Successive samples from an LCG
119 * are correlated, and it has a relatively short period. IT
120 * SHOULD NOT BE USED (period). It is present in Easel for
121 * legacy reasons. It used to be substantially faster than
122 * our high quality RNG, but now our default Mersenne
123 * Twister is plenty fast. We have regression and unit
124 * tests that use the LCG and depend on a fixed seed and a
125 * particular pseudorandom number sequence; they would have
126 * to be re-studied carefully to upgrade them to a
127 * different RNG.
128 *
129 * Here's an example of how serial correlation arises in an
130 * LCG, and how it can lead to serious (and difficult to
131 * diagnose) failure in a Monte Carlo simulation. An LCG
132 * calculates $x_{i+1} = ax_i + c$. Suppose $x_i$ is small:
133 * in the range 0..6000, say, as a specific example. Now
134 * $x_{i+1}$ cannot be larger than 4.1e8, for an LCG with
135 * $a=69069$,$c=1$. So if you take a sample and test
136 * whether it is $< 1e-6$ (say), the next sample will be in
137 * a range of about 0..0.1, rather than being uniform on
138 * 0..1.
139 *
140 * Args: seed $>= 0$.
141 *
142 * Returns: an initialized <ESL_RANDOMNESS *> on success.
143 * Caller free's with <esl_randomness_Destroy()>.
144 *
145 * Throws: <NULL> on failure.
146 *
147 * Xref: SRE:J5/44: for accidental proof that period is indeed 2^32.
148 */
149 ESL_RANDOMNESS *
esl_randomness_CreateFast(uint32_t seed)150 esl_randomness_CreateFast(uint32_t seed)
151 {
152 ESL_RANDOMNESS *r = NULL;
153 int status;
154
155 ESL_ALLOC(r, sizeof(ESL_RANDOMNESS));
156 r->type = eslRND_FAST;
157 r->mti = 0;
158 r->x = 0;
159 r->seed = 0;
160 esl_randomness_Init(r, seed);
161 return r;
162
163 ERROR:
164 return NULL;
165 }
166
167
168 /* Function: esl_randomness_CreateTimeseeded()
169 * Synopsis: Create an RNG with a quasirandom seed.
170 *
171 * THIS FUNCTION IS DEPRECATED. USE esl_randomness_Create(0).
172 *
173 * Purpose: Like <esl_randomness_Create()>, but it initializes the
174 * the random number generator using a POSIX <time()> call
175 * (number of seconds since the POSIX epoch).
176 *
177 * This function is deprecated. Use
178 * <esl_randomness_Create(0)> instead.
179 *
180 * Returns: an initialized <ESL_RANDOMNESS *> on success.
181 * Caller free's with <esl_randomness_Destroy()>.
182 *
183 * Throws: <NULL> on failure.
184 *
185 * Xref: SRE:STL8/p57.
186 */
187 ESL_RANDOMNESS *
esl_randomness_CreateTimeseeded(void)188 esl_randomness_CreateTimeseeded(void)
189 {
190 return esl_randomness_Create(0);
191 }
192
193
194 /* Function: esl_randomness_Init()
195 * Synopsis: Reinitialize a RNG.
196 *
197 * Purpose: Reset and reinitialize an existing <ESL_RANDOMNESS>
198 * object with a new seed.
199 *
200 * Sometimes it's useful to reseed an RNG to guarantee a
201 * particular reproducible series of pseudorandom numbers
202 * at an arbitrary point in a program; HMMER does this, for
203 * example, to guarantee the same results from the same
204 * HMM/sequence comparison regardless of where in a search
205 * the HMM or sequence occurs.
206 *
207 * Args: r - randomness object
208 * seed - new seed to use; >=0.
209 *
210 * Returns: <eslOK> on success.
211 *
212 * Xref: SRE:STL8/p57.
213 */
214 int
esl_randomness_Init(ESL_RANDOMNESS * r,uint32_t seed)215 esl_randomness_Init(ESL_RANDOMNESS *r, uint32_t seed)
216 {
217 if (seed == 0) seed = choose_arbitrary_seed();
218 if (r->type == eslRND_MERSENNE)
219 {
220 mersenne_seed_table(r, seed);
221 mersenne_fill_table(r);
222 }
223 else
224 {
225 r->seed = seed;
226 r->x = esl_rnd_mix3(seed, 87654321, 12345678); /* arbitrary dispersion of the seed */
227 if (r->x == 0) r->x = 42; /* make sure we don't have a zero */
228 }
229 return eslOK;
230 }
231
232 /* Function: esl_randomness_GetSeed()
233 * Synopsis: Returns the value of RNG's seed.
234 *
235 * Purpose: Return the value of the seed.
236 */
237 uint32_t
esl_randomness_GetSeed(const ESL_RANDOMNESS * r)238 esl_randomness_GetSeed(const ESL_RANDOMNESS *r)
239 {
240 return r->seed;
241 }
242
243
244 /* Function: esl_randomness_Destroy()
245 * Synopsis: Free an RNG.
246 *
247 * Purpose: Frees an <ESL_RANDOMNESS> object.
248 */
249 void
esl_randomness_Destroy(ESL_RANDOMNESS * r)250 esl_randomness_Destroy(ESL_RANDOMNESS *r)
251 {
252 free(r);
253 return;
254 }
255
256 /*----------- end of ESL_RANDOMNESS object functions --------------*/
257
258
259
260 /*****************************************************************
261 *# 2. The generators and <esl_random()>
262 *****************************************************************/
263
264 /* Function: esl_random()
265 * Synopsis: Generate a uniform random deviate on [0,1)
266 *
267 * Purpose: Returns a uniform deviate x, $0.0 \leq x < 1.0$, given
268 * RNG <r>.
269 *
270 * If you cast the return value to float, the [0,1) interval
271 * guarantee is lost because values close to 1 will round to
272 * 1.0.
273 *
274 * Returns: a uniformly distribute random deviate on interval
275 * $0.0 \leq x < 1.0$.
276 */
277 double
esl_random(ESL_RANDOMNESS * r)278 esl_random(ESL_RANDOMNESS *r)
279 {
280 uint32_t x = (r->type == eslRND_MERSENNE) ? mersenne_twister(r) : knuth(r);
281 return ((double) x / 4294967296.0); // 2^32: [0,1). Original MT code has * (1.0/ 4294967296.0), which I believe (and tested) to be identical.
282 }
283
284
285 /* Function: esl_random_uint32()
286 * Synopsis: Generate a uniform random deviate on 0..2^32-1
287 * Incept: SRE, Wed Jan 13 10:59:26 2016
288 *
289 * Purpose: Returns a uniform deviate x, a 32-bit unsigned
290 * integer $0 \leq x < 2^{32}$, given RNG <r>.
291 */
292 uint32_t
esl_random_uint32(ESL_RANDOMNESS * r)293 esl_random_uint32(ESL_RANDOMNESS *r)
294 {
295 return (r->type == eslRND_MERSENNE) ? mersenne_twister(r) : knuth(r);
296 }
297
298
299 static uint32_t
knuth(ESL_RANDOMNESS * r)300 knuth(ESL_RANDOMNESS *r)
301 {
302 r->x *= 69069;
303 r->x += 1;
304 return r->x;
305 }
306
307 /* mersenne_twister() and other mersenne_*() functions below:
308 * A simple serial implementation of the original Mersenne Twister
309 * algorithm [Matsumoto98].
310 *
311 * There are more sophisticated and faster implementations of MT, using
312 * vector instructions and/or directly generating IEEE754 doubles
313 * bitwise rather than doing an expensive normalization. We can
314 * improve the implementation later if necessary, but even the basic
315 * MT offers ~10x speed improvement over Easel's previous RNG.
316 * [SRE, 30 May 09, Stockholm]
317 */
318 static uint32_t
mersenne_twister(ESL_RANDOMNESS * r)319 mersenne_twister(ESL_RANDOMNESS *r)
320 {
321 uint32_t x;
322 if (r->mti >= 624) mersenne_fill_table(r);
323
324 x = r->mt[r->mti++];
325 x ^= (x >> 11);
326 x ^= (x << 7) & 0x9d2c5680;
327 x ^= (x << 15) & 0xefc60000;
328 x ^= (x >> 18);
329 return x;
330 }
331
332 /* mersenne_seed_table()
333 * Initialize the state of the RNG from a seed, using a Knuth LCG.
334 *
335 * TODO: In January 2002, Nishimura and Matsumoto replaced this with a
336 * better initialization. (Why did I use their ~1999 initialization
337 * when I adapted MT in June 2009?) They say that the problem with
338 * this version is that the seed's most significant bit has little
339 * effect on the MT state vector. Upgrading this will require effort
340 * though: any change to the output of our RNG will break numerous
341 * unit tests and regressions that use fixed RNG seeds to get
342 * reproducible streams.
343 */
344 static void
mersenne_seed_table(ESL_RANDOMNESS * r,uint32_t seed)345 mersenne_seed_table(ESL_RANDOMNESS *r, uint32_t seed)
346 {
347 int z;
348
349 r->seed = seed;
350 r->mt[0] = seed;
351 for (z = 1; z < 624; z++)
352 r->mt[z] = 69069 * r->mt[z-1];
353 return;
354 }
355
356 /* mersenne_fill_table()
357 * Refill the table with 624 new random numbers.
358 * We do this whenever we've reseeded, or when we
359 * run out of numbers.
360 */
361 static void
mersenne_fill_table(ESL_RANDOMNESS * r)362 mersenne_fill_table(ESL_RANDOMNESS *r)
363 {
364 uint32_t y;
365 int z;
366 static uint32_t mag01[2] = { 0x0, 0x9908b0df };
367
368 for (z = 0; z < 227; z++) /* 227 = N-M = 624-397 */
369 {
370 y = (r->mt[z] & 0x80000000) | (r->mt[z+1] & 0x7fffffff);
371 r->mt[z] = r->mt[z+397] ^ (y>>1) ^ mag01[(int)(y & 0x1)]; /* yes, the (int) cast is necessary; xref bug #e7; some compilers may try to cast y to signed int otherwise, to use it in an array index */
372 }
373 for (; z < 623; z++)
374 {
375 y = (r->mt[z] & 0x80000000) | (r->mt[z+1] & 0x7fffffff);
376 r->mt[z] = r->mt[z-227] ^ (y>>1) ^ mag01[(int)(y & 0x1)];
377 }
378 y = (r->mt[623] & 0x80000000) | (r->mt[0] & 0x7fffffff);
379 r->mt[623] = r->mt[396] ^ (y>>1) ^ mag01[(int)(y & 0x1)];
380 r->mti = 0;
381
382 return;
383 }
384
385
386 /* choose_arbitrary_seed()
387 * Return a 'quasirandom' seed > 0, concocted by hashing time(),
388 * clock(), and getpid() together.
389 * See RFC1750 for a discussion of securely seeding RNGs.
390 */
391 static uint32_t
choose_arbitrary_seed(void)392 choose_arbitrary_seed(void)
393 {
394 uint32_t a = (uint32_t) time ((time_t *) NULL);
395 uint32_t b = 87654321; // we'll use getpid() below, if we can
396 uint32_t c = (uint32_t) clock(); // clock() gives time since process invocation, in msec at least, if not usec
397 uint32_t seed;
398 #ifdef HAVE_GETPID
399 b = (uint32_t) getpid(); // preferable b choice, if we have POSIX getpid()
400 #endif
401 seed = esl_rnd_mix3(a,b,c); // try to decorrelate closely spaced choices of pid/times
402 return (seed == 0) ? 42 : seed; // 42 is arbitrary, just to avoid seed==0.
403 }
404
405 /* Function: esl_rnd_mix3()
406 * Synopsis: Make a quasirandom number by mixing three inputs.
407 * Incept: SRE, Tue 21 Aug 2018
408 *
409 * Purpose: This is Bob Jenkin's <mix()>. Given <a,b,c>,
410 * generate a number that's generated reasonably
411 * uniformly on $[0,2^{32}-1]$ even for closely
412 * spaced choices of $a,b,c$.
413 */
414 uint32_t
esl_rnd_mix3(uint32_t a,uint32_t b,uint32_t c)415 esl_rnd_mix3(uint32_t a, uint32_t b, uint32_t c)
416 {
417 a -= b; a -= c; a ^= (c>>13);
418 b -= c; b -= a; b ^= (a<<8);
419 c -= a; c -= b; c ^= (b>>13);
420 a -= b; a -= c; a ^= (c>>12);
421 b -= c; b -= a; b ^= (a<<16);
422 c -= a; c -= b; c ^= (b>>5);
423 a -= b; a -= c; a ^= (c>>3);
424 b -= c; b -= a; b ^= (a<<10);
425 c -= a; c -= b; c ^= (b>>15);
426 return c;
427 }
428 /*----------- end of esl_random() --------------*/
429
430
431
432 /*****************************************************************
433 *# 3. Debugging and development tools
434 *****************************************************************/
435
436 /* Function: esl_randomness_Dump()
437 * Synopsis: Dump ESL_RANDOMNESS object to stream, for debugging/examination.
438 */
439 int
esl_randomness_Dump(FILE * fp,ESL_RANDOMNESS * r)440 esl_randomness_Dump(FILE *fp, ESL_RANDOMNESS *r)
441 {
442 if (r->type == eslRND_FAST)
443 {
444 fputs ("type = knuth\n", fp );
445 fprintf(fp, "state = %" PRIu32 "\n", r->x);
446 fprintf(fp, "seed = %" PRIu32 "\n", r->seed);
447 }
448 else if (r->type == eslRND_MERSENNE)
449 {
450 int i,j;
451
452 fputs ("type = mersenne twister\n", fp );
453 fprintf(fp, "mti = %d (0..623)\n", r->mti);
454 fprintf(fp, "mt[mti] = %" PRIu32 "\n", r->mt[r->mti]);
455
456 fprintf(fp, "%6d: ", 0);
457 for (i = 0, j=0; i < 624; i++)
458 {
459 fprintf(fp, "%11" PRIu32 " ", r->mt[i]);
460 if (++j == 20) { fprintf(fp, "\n%6d: ", i+1); j=0; }
461 }
462 fputs("\n", fp);
463 }
464 return eslOK;
465 }
466 /*----------- end, debugging/development tools ------------------*/
467
468
469 /*****************************************************************
470 *# 4. Other fundamental sampling (including Gaussian, gamma)
471 *****************************************************************/
472
473 /* Function: esl_rnd_UniformPositive()
474 * Synopsis: Generate a uniform positive random deviate on interval (0,1).
475 *
476 * Purpose: Same as <esl_random()>, but assure $0 < x < 1$;
477 * (positive uniform deviate).
478 */
479 double
esl_rnd_UniformPositive(ESL_RANDOMNESS * r)480 esl_rnd_UniformPositive(ESL_RANDOMNESS *r)
481 {
482 double x;
483 do { x = esl_random(r); } while (x == 0.0);
484 return x;
485 }
486
487
488 /* Function: esl_rnd_Gaussian()
489 * Synopsis: Generate a Gaussian-distributed sample.
490 *
491 * Purpose: Pick a Gaussian-distributed random variable
492 * with a given <mean> and standard deviation <stddev>, and
493 * return it.
494 *
495 * Implementation is derived from the public domain
496 * RANLIB.c <gennor()> function, written by Barry W. Brown
497 * and James Lovato (M.D. Anderson Cancer Center, Texas
498 * USA) using the method described in
499 * \citep{AhrensDieter73}.
500 *
501 * Original algorithm said to use uniform deviates on [0,1)
502 * interval (i.e. <esl_random()>), but this appears to be
503 * wrong. Use uniform deviates on (0,1) interval instead
504 * (i.e., <esl_rnd_UniformPositive()>). RANLIB, GNU Octave
505 * have made this alteration, possibly inadvertently.
506 * [xref cryptogenomicon post, 13 Oct 2014].
507 *
508 * Method: Impenetrability of the code is to be blamed on
509 * FORTRAN/f2c lineage.
510 *
511 * Args: r - ESL_RANDOMNESS object
512 * mean - mean of the Gaussian we're sampling from
513 * stddev - standard deviation of the Gaussian
514 */
515 double
esl_rnd_Gaussian(ESL_RANDOMNESS * r,double mean,double stddev)516 esl_rnd_Gaussian(ESL_RANDOMNESS *r, double mean, double stddev)
517 {
518 long i;
519 double snorm,u,s,ustar,aa,w,y,tt;
520
521 /* These static's are threadsafe: they are magic constants
522 * we will not touch.
523 */
524 static double a[32] = {
525 0.0,3.917609E-2,7.841241E-2,0.11777,0.1573107,0.1970991,0.2372021,0.2776904,
526 0.3186394,0.36013,0.4022501,0.4450965,0.4887764,0.5334097,0.5791322,
527 0.626099,0.6744898,0.7245144,0.7764218,0.8305109,0.8871466,0.9467818,
528 1.00999,1.077516,1.150349,1.229859,1.318011,1.417797,1.534121,1.67594,
529 1.862732,2.153875
530 };
531 static double d[31] = {
532 0.0,0.0,0.0,0.0,0.0,0.2636843,0.2425085,0.2255674,0.2116342,0.1999243,
533 0.1899108,0.1812252,0.1736014,0.1668419,0.1607967,0.1553497,0.1504094,
534 0.1459026,0.14177,0.1379632,0.1344418,0.1311722,0.128126,0.1252791,
535 0.1226109,0.1201036,0.1177417,0.1155119,0.1134023,0.1114027,0.1095039
536 };
537 static double t[31] = {
538 7.673828E-4,2.30687E-3,3.860618E-3,5.438454E-3,7.0507E-3,8.708396E-3,
539 1.042357E-2,1.220953E-2,1.408125E-2,1.605579E-2,1.81529E-2,2.039573E-2,
540 2.281177E-2,2.543407E-2,2.830296E-2,3.146822E-2,3.499233E-2,3.895483E-2,
541 4.345878E-2,4.864035E-2,5.468334E-2,6.184222E-2,7.047983E-2,8.113195E-2,
542 9.462444E-2,0.1123001,0.136498,0.1716886,0.2276241,0.330498,0.5847031
543 };
544 static double h[31] = {
545 3.920617E-2,3.932705E-2,3.951E-2,3.975703E-2,4.007093E-2,4.045533E-2,
546 4.091481E-2,4.145507E-2,4.208311E-2,4.280748E-2,4.363863E-2,4.458932E-2,
547 4.567523E-2,4.691571E-2,4.833487E-2,4.996298E-2,5.183859E-2,5.401138E-2,
548 5.654656E-2,5.95313E-2,6.308489E-2,6.737503E-2,7.264544E-2,7.926471E-2,
549 8.781922E-2,9.930398E-2,0.11556,0.1404344,0.1836142,0.2790016,0.7010474
550 };
551
552 u = esl_rnd_UniformPositive(r);
553 s = 0.0;
554 if(u > 0.5) s = 1.0;
555 u += (u-s);
556 u = 32.0*u;
557 i = (long) (u);
558 if(i == 32) i = 31;
559 if(i == 0) goto S100;
560 /*
561 * START CENTER
562 */
563 ustar = u-(double)i;
564 aa = a[i-1];
565 S40:
566 if (ustar <= t[i-1]) goto S60;
567 w = (ustar - t[i-1]) * h[i-1];
568 S50:
569 /*
570 * EXIT (BOTH CASES)
571 */
572 y = aa+w;
573 snorm = y;
574 if(s == 1.0) snorm = -y;
575 return (stddev*snorm + mean);
576 S60:
577 /*
578 * CENTER CONTINUED
579 */
580 u = esl_rnd_UniformPositive(r);
581 w = u*(a[i]-aa);
582 tt = (0.5*w+aa)*w;
583 goto S80;
584 S70:
585 tt = u;
586 ustar = esl_rnd_UniformPositive(r);
587 S80:
588 if(ustar > tt) goto S50;
589 u = esl_rnd_UniformPositive(r);
590 if(ustar >= u) goto S70;
591 ustar = esl_rnd_UniformPositive(r);
592 goto S40;
593 S100:
594 /*
595 * START TAIL
596 */
597 i = 6;
598 aa = a[31];
599 goto S120;
600 S110:
601 aa += d[i-1];
602 i += 1;
603 ESL_DASSERT1(( i <= 31 ));
604 S120:
605 u += u;
606 if(u < 1.0) goto S110;
607 u -= 1.0;
608 S140:
609 w = u*d[i-1];
610 tt = (0.5*w+aa)*w;
611 goto S160;
612 S150:
613 tt = u;
614 S160:
615 ustar = esl_rnd_UniformPositive(r);
616 if(ustar > tt) goto S50;
617 u = esl_rnd_UniformPositive(r);
618 if(ustar >= u) goto S150;
619 u = esl_rnd_UniformPositive(r);
620 goto S140;
621 }
622
623
624
625 /* subfunctions that esl_rnd_Gamma() is going to call:
626 */
627 static double
gamma_ahrens(ESL_RANDOMNESS * r,double a)628 gamma_ahrens(ESL_RANDOMNESS *r, double a) /* for a >= 3 */
629 {
630 double V; /* uniform deviates */
631 double X,Y;
632 double test;
633
634 do {
635 do { /* generate candidate X */
636 Y = tan(eslCONST_PI * esl_random(r));
637 X = Y * sqrt(2.*a -1.) + a - 1.;
638 } while (X <= 0.);
639 /* accept/reject X */
640 V = esl_random(r);
641 test = (1+Y*Y) * exp( (a-1.)* log(X/(a-1.)) - Y*sqrt(2.*a-1.));
642 } while (V > test);
643 return X;
644 }
645 static double
gamma_integer(ESL_RANDOMNESS * r,unsigned int a)646 gamma_integer(ESL_RANDOMNESS *r, unsigned int a) /* for small integer a, a < 12 */
647 {
648 int i;
649 double U,X;
650
651 U = 1.;
652 for (i = 0; i < a; i++)
653 U *= esl_rnd_UniformPositive(r);
654 X = -log(U);
655
656 return X;
657 }
658 static double
gamma_fraction(ESL_RANDOMNESS * r,double a)659 gamma_fraction(ESL_RANDOMNESS *r, double a) /* for fractional a, 0 < a < 1 */
660 { /* Knuth 3.4.1, exercise 16, pp. 586-587 */
661 double p, U, V, X, q;
662
663 p = eslCONST_E / (a + eslCONST_E);
664 do {
665 U = esl_random(r);
666 V = esl_rnd_UniformPositive(r);
667 if (U < p) {
668 X = pow(V, 1./a);
669 q = exp(-X);
670 } else {
671 X = 1. - log(V);
672 q = pow(X, a-1.);
673 }
674 U = esl_random(r);
675 } while (U >= q);
676 return X;
677 }
678
679
680 /* Function: esl_rnd_Gamma()
681 * Synopsis: Returns a random deviate from a Gamma(a, 1) distribution.
682 *
683 * Purpose: Return a random deviate distributed as Gamma(a, 1.)
684 * \citep[pp. 133--134]{Knu-81a}.
685 *
686 * The implementation follows not only Knuth \citep{Knu-81a},
687 * but also relied on examination of the implementation in
688 * the GNU Scientific Library (libgsl) \citep{Galassi06}.
689 *
690 * Args: r - random number generation seed
691 * a - order of the gamma function; a > 0
692 */
693 double
esl_rnd_Gamma(ESL_RANDOMNESS * r,double a)694 esl_rnd_Gamma(ESL_RANDOMNESS *r, double a)
695 {
696 double aint;
697
698 ESL_DASSERT1(( a > 0. ));
699
700 aint = floor(a);
701 if (a == aint && a < 12.)
702 return gamma_integer(r, (unsigned int) a);
703 else if (a > 3.)
704 return gamma_ahrens(r, a);
705 else if (a < 1.)
706 return gamma_fraction(r, a);
707 else
708 return gamma_integer(r, aint) + gamma_fraction(r, a-aint);
709 }
710
711
712 /* Function: esl_rnd_Dirichlet()
713 * Synopsis: Sample a Dirichlet-distributed random probability vector
714 * Incept: SRE, Wed Feb 17 12:20:53 2016 [H1/76]
715 *
716 * Purpose: Using generator <rng>, sample a Dirichlet-distributed
717 * probabilty vector <p> of <K> elements, using Dirichlet
718 * parameters <alpha> (also of <K> elements).
719 *
720 * Caller provides the allocated space for <p>.
721 *
722 * <alpha> is optional. If it isn't provided (i.e. is
723 * <NULL>), sample <p> uniformly. (That is, use <alpha[i] =
724 * 1.> for all i=0..K-1.)
725 *
726 * This routine is redundant with <esl_dirichlet_DSample()>
727 * and <esl_dirichlet_DSampleUniform()> in the dirichlet
728 * module. Provided here because there's cases where we
729 * just want to sample a probability vector without
730 * introducing a dependency on all the stats/dirichlet code
731 * in Easel.
732 *
733 * Args: rng : random number generator
734 * alpha : OPTIONAL: Dirichlet parameters 0..K-1, or NULL to use alpha[i]=1 for all i
735 * K : number of elements in alpha, p
736 * p : RESULT: sampled probability vector
737 *
738 * Returns: <eslOK> on success, and <p> is a sampled probability vector.
739 */
740 int
esl_rnd_Dirichlet(ESL_RANDOMNESS * rng,const double * alpha,int K,double * p)741 esl_rnd_Dirichlet(ESL_RANDOMNESS *rng, const double *alpha, int K, double *p)
742 {
743 int i;
744 double norm = 0.;
745
746 for (i = 0; i < K; i++)
747 {
748 p[i] = esl_rnd_Gamma(rng, (alpha ? alpha[i] : 1.0));
749 norm += p[i];
750 }
751 for (i = 0; i < K; i++)
752 p[i] /= norm;
753
754 return eslOK;
755 }
756
757
758 /* Function: esl_rnd_Deal()
759 * Synopsis: Sequential random sample of <m> random integers in range <n>
760 * Incept: SRE, Sun 17 Mar 2019
761 *
762 * Purpose: Obtain a random sequential sample of <m> integers without
763 * replacement from <n> possible ones, like dealing a hand
764 * of m=5 cards from a deck of n=52 possible cards with
765 * cards numbered <0..n-1>. Caller provides allocated space
766 * <deal>, allocated for at least <m> elements. Return the
767 * sample in <deal>, in sorted order (smallest to largest).
768 *
769 * Uses the selection sampling algorithm [Knuth, 3.4.2],
770 * which is O(n) time. For more impressive O(m)
771 * performance, see <esl_rand64_Deal()>.
772 *
773 * As a special case, if m = 0 (for whatever reason,
774 * probably an edge-case-exercising unit test), do
775 * nothing. <deal> is untouched, and can even be <NULL>.
776 *
777 * Args: m - number of integers to sample
778 * n - range: each sample is 0..n-1
779 * deal - RESULT: allocated space for <m> sampled integers
780 *
781 * Returns: <eslOK> on success, and the sample of <m> integers
782 * is in <deal>.
783 *
784 * Throws: (no abnormal error conditions)
785 */
786 int
esl_rnd_Deal(ESL_RANDOMNESS * rng,int m,int n,int * deal)787 esl_rnd_Deal(ESL_RANDOMNESS *rng, int m, int n, int *deal)
788 {
789 int i = 0;
790 int j = 0;
791
792 ESL_DASSERT1(( m >= 0 )); // m=0 is a special case where <deal> can be NULL.
793 ESL_DASSERT1(( n >= 1 ));
794 ESL_DASSERT1(( m <= n ));
795
796 for (j = 0; j < n && i < m; j++)
797 if ( ((double) (n - j)) * esl_random(rng) < (double) (m - i)) deal[i++] = j;
798 return eslOK;
799 }
800
801
802
803 /*****************************************************************
804 *# 5. Multinomial sampling from discrete probability n-vectors
805 *****************************************************************/
806
807 /* Function: esl_rnd_DChoose()
808 * Synopsis: Return random choice from discrete multinomial distribution.
809 *
810 * Purpose: Make a random choice from a normalized discrete
811 * distribution <p> of <N> elements, where <p>
812 * is double-precision. Returns the index of the
813 * selected element, $0..N-1$.
814 *
815 * <p> must be a normalized probability distribution
816 * (i.e. must sum to one). Sampling distribution is
817 * undefined otherwise: that is, a choice will always
818 * be returned, but it might be an arbitrary one.
819 *
820 * All $p_i$ must be $>$ <DBL_EPSILON> in order to
821 * have a non-zero probability of being sampled.
822 *
823 * <esl_rnd_FChoose()> is the same, but for floats in <p>,
824 * and all $p_i$ must be $>$ <FLT_EPSILON>.
825 */
826 int
esl_rnd_DChoose(ESL_RANDOMNESS * r,const double * p,int N)827 esl_rnd_DChoose(ESL_RANDOMNESS *r, const double *p, int N)
828 {
829 double norm = 0.0; /* ~ 1.0 */
830 double sum = 0.0; /* integrated prob */
831 double roll = esl_random(r); /* random fraction */
832 int i; /* counter over the probs */
833
834 /* we need to deal with finite roundoff error in p's sum */
835 for (i = 0; i < N; i++) norm += p[i];
836 ESL_DASSERT1((norm > 0.999 && norm < 1.001));
837
838 for (i = 0; i < N; i++)
839 {
840 sum += p[i];
841 if (roll < (sum / norm) ) return i;
842 }
843 esl_fatal("unreached code was reached. universe collapses.");
844 return 0; /*notreached*/
845 }
846 int
esl_rnd_FChoose(ESL_RANDOMNESS * r,const float * p,int N)847 esl_rnd_FChoose(ESL_RANDOMNESS *r, const float *p, int N)
848 {
849 /* Computing in double precision is important:
850 * casting <roll> to (float) gives a [0,1] number instead of [0,1).
851 */
852 double norm = 0.0; /* ~ 1.0 */
853 double sum = 0.0; /* integrated prob */
854 double roll = esl_random(r); /* random fraction */
855 int i; /* counter over the probs */
856
857 for (i = 0; i < N; i++) norm += p[i];
858 ESL_DASSERT1((norm > 0.99 && norm < 1.01));
859
860 for (i = 0; i < N; i++)
861 {
862 sum += (double) p[i];
863 if (roll < (sum / norm) ) return i;
864 }
865 esl_fatal("unreached code was reached. universe collapses.");
866 return 0; /*notreached*/
867 }
868
869
870 /* Function: esl_rnd_DChooseCDF()
871 * Synopsis: Return random choice from cumulative multinomial distribution.
872 *
873 * Purpose: Given a random number generator <r> and a cumulative
874 * multinomial distribution <cdf[0..N-1]>, sample an element
875 * <0..N-1> from that distribution. Return the index <0..N-1>.
876 *
877 * Caller should be sure that <cdf[0..N-1]> is indeed a
878 * cumulative multinomial distribution -- in particular, that
879 * <cdf[N-1]> is tolerably close to 1.0 (within roundoff error).
880 *
881 * When sampling many times from the same multinomial
882 * distribution <p>, it will generally be faster to
883 * calculate the CDF once using <esl_vec_DCDF(p, N, cdf)>,
884 * then sampling many times from the CDF with
885 * <esl_rnd_DChooseCDF(r, cdf, N)>, as opposed to calling
886 * <esl_rnd_DChoose(r, p, N)> many times, because
887 * <esl_rnd_DChoose()> has to calculated the CDF before
888 * sampling. This also gives you a bit more control over
889 * error detection: you can make sure that the CDF is ok (p
890 * does sum to ~1.0) before doing a lot of sampling from
891 * it.
892 *
893 * <esl_rnd_FChooseCDF()> is the same, but for
894 * a single-precision float <cdf>.
895 *
896 * Args: r - random number generator
897 * cdf - cumulative multinomial distribution, cdf[0..N-1]
898 * N - number of elements in <cdf>
899 *
900 * Returns: index 0..N-1 of the randomly sampled choice from <cdf>.
901 *
902 * Note: For large N, it might be advantageous to bisection search the
903 * cdf. For typical N in Easel (up to 20, for amino acid
904 * prob vectors, for example), the naive code below is
905 * faster. We could revisit this if we start sampling
906 * larger vectors.
907 */
908 int
esl_rnd_DChooseCDF(ESL_RANDOMNESS * r,const double * cdf,int N)909 esl_rnd_DChooseCDF(ESL_RANDOMNESS *r, const double *cdf, int N)
910 {
911 double roll = esl_random(r); /* uniform 0.0 <= x < 1.0 */
912 int i;
913
914 ESL_DASSERT1((cdf[0] >= 0.0));
915 ESL_DASSERT1((cdf[N-1] > 0.999 && cdf[N-1] < 1.001));
916
917 for (i = 0; i < N; i++)
918 if (roll < cdf[i] / cdf[N-1]) return i;
919 esl_fatal("unreached code is reached. universe goes foom");
920 return 0; /*notreached*/
921 }
922 int
esl_rnd_FChooseCDF(ESL_RANDOMNESS * r,const float * cdf,int N)923 esl_rnd_FChooseCDF(ESL_RANDOMNESS *r, const float *cdf, int N)
924 {
925 double roll = esl_random(r); /* uniform 0.0 <= x < 1.0. must be double, not float, to guarantee x <1 */
926 int i;
927
928 ESL_DASSERT1((cdf[0] >= 0.0));
929 ESL_DASSERT1((cdf[N-1] > 0.99 && cdf[N-1] < 1.01));
930
931 for (i = 0; i < N; i++)
932 if (roll < (double) cdf[i] / (double) cdf[N-1]) return i; // yes, the casts are NECESSARY. Without them, you get a heisenbug on icc/linux.
933 esl_fatal("unreached code is reached. universe goes foom");
934 return 0; /*notreached*/
935 }
936
937
938 /*****************************************************************
939 * 6. Random data generators (unit testing, etc.)
940 *****************************************************************/
941
942
943
944 /* Function: esl_rnd_mem()
945 * Synopsis: Overwrite a buffer with random garbage.
946 * Incept: SRE, Fri Feb 19 08:53:07 2016
947 *
948 * Purpose: Write <n> bytes of random garbage into buffer
949 * <buf>, by uniform sampling of values 0..255,
950 * using generator <rng>.
951 *
952 * Used in unit tests that are reusing memory, and that
953 * want to make sure that there's no favorable side effects
954 * from that reuse.
955 */
956 int
esl_rnd_mem(ESL_RANDOMNESS * rng,void * buf,int n)957 esl_rnd_mem(ESL_RANDOMNESS *rng, void *buf, int n)
958 {
959 unsigned char *p = (unsigned char *) buf;
960 int i;
961
962 for (i = 0; i < n; i++)
963 p[i] = (unsigned char) esl_rnd_Roll(rng, 256);
964 return eslOK;
965 }
966
967
968 /* Function: esl_rnd_floatstring()
969 * Synopsis: Generate a string representation of a floating point number.
970 * Incept: SRE, Wed 15 Aug 2018 [Hamilton, It's Quiet Uptown]
971 *
972 * Purpose: Generate a string decimal representation of a random
973 * floating point number in <s>, which is space provided
974 * by the caller, allocated for at least 20 characters.
975 *
976 * The string representation of the number consists of:
977 * * an optional - sign (50%)
978 * * a 1-6 digit integer part; (1/7 '0'; else 1/7 for each len, [1-9][0-9]* uniformly)
979 * * an optional fractional part (50%), consisting of:
980 * - '.'
981 * - a 1-7 digit fractional part; 1/7 for each len, [0-9]* uniformly
982 * * an optional exponent (50%), consisting of:
983 * - 'e'
984 * - -20..e20, 1/41 uniformly.
985 * * '\0' terminator.
986 *
987 * Maximum length is 1+6+1+7+4+1 = 20.
988 *
989 * The string representation is an intersection of what's
990 * valid for <strtod()>, JSON number values, and C. JSON
991 * number values do not permit numbers like "1." (a
992 * decimal part, decimal point, no fractional part), nor
993 * hexadecimal representations, nor leading or trailing
994 * whitespace.
995 *
996 * The generated number does not attempt to exercise
997 * extreme values. The absolute magnitude of non-zero
998 * numbers ranges from 0.0000001e-20..999999e20 (1e-27 to
999 * ~1e27), well within IEEE754 FLT_MIN..FLT_MAX
1000 * range. Special values "infinity" and "nan" are not
1001 * generated. Zero can be represented by many different
1002 * patterns <-?0.0+(e-?..)?>.
1003 *
1004 * Returns: <eslOK> on success.
1005 */
1006 int
esl_rnd_floatstring(ESL_RANDOMNESS * rng,char * s)1007 esl_rnd_floatstring(ESL_RANDOMNESS *rng, char *s)
1008 {
1009 int i = 0;
1010 int n;
1011 int exponent;
1012
1013 if (esl_rnd_Roll(rng, 2)) s[i++] = '-';
1014 n = esl_rnd_Roll(rng, 7); // length of integer part.
1015 s[i++] = (n-- == 0) ? '0' : '0' + (1 + esl_rnd_Roll(rng, 9)); // No 0 prefix except "0."
1016 while (n-- > 0) s[i++] = '0' + esl_rnd_Roll(rng, 10);
1017 if (esl_rnd_Roll(rng, 2)) // optional fractional part
1018 {
1019 n = 1 + esl_rnd_Roll(rng, 7);
1020 s[i++] = '.';
1021 while (n--) s[i++] = '0' + esl_rnd_Roll(rng, 10);
1022 }
1023 if (esl_rnd_Roll(rng, 2)) // optional exponent
1024 {
1025 s[i++] = 'e';
1026 exponent = -20 + esl_rnd_Roll(rng, 41);
1027 i += sprintf(s+i, "%d", exponent);
1028 }
1029 s[i++] = '\0';
1030 ESL_DASSERT1(( i <= 20 ));
1031 return eslOK;
1032 }
1033
1034
1035
1036
1037 /*****************************************************************
1038 * 7. Benchmark driver
1039 *****************************************************************/
1040 #ifdef eslRANDOM_BENCHMARK
1041 /*
1042 gcc -O3 -malign-double -o esl_random_benchmark -I. -L. -DeslRANDOM_BENCHMARK esl_random.c -leasel -lm
1043 ./esl_random_benchmark -N 1000000000
1044 ./esl_random_benchmark -f -N 1000000000
1045 ./esl_random_benchmark -r -N1000000
1046 ./esl_random_benchmark -fr -N 1000000000
1047 esl_random() esl_randomness_Init()
1048 iter cpu time per call iter cpu time per call
1049 ---- -------- -------- ---- ---------- ---------
1050 27 Dec 08 on wanderoo: 1e7 0.78s 78 nsec 1e6 2.08s 2.1 usec ran2() from NR
1051 30 May 09 on wanderoo: 1e9 8.39s 8 nsec 1e6 5.98s 6.0 usec Mersenne Twister
1052 1e9 5.73s 6 nsec 1e8 2.51s 0.03 usec Knuth
1053 22 Aug 18 on wyvern: 1e9 3.31s 3 nsec 1e7 8.71s 0.8 usec Mersenne Twister
1054
1055 */
1056 #include "easel.h"
1057 #include "esl_composition.h"
1058 #include "esl_getopts.h"
1059 #include "esl_random.h"
1060 #include "esl_stopwatch.h"
1061 #include "esl_vectorops.h"
1062
1063 static ESL_OPTIONS options[] = {
1064 /* name type default env range toggles reqs incomp help docgroup*/
1065 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
1066 { "-c", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "benchmark DChooseCDF()", 0 },
1067 { "-d", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "benchmark DChoose()", 0 },
1068 { "-f", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "run fast version instead of MT19937", 0 },
1069 { "-r", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "benchmark _Init(), not just random()", 0 },
1070 { "-N", eslARG_INT, "10000000",NULL, NULL, NULL, NULL, NULL, "number of trials", 0 },
1071 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
1072 };
1073 static char usage[] = "[-options]";
1074 static char banner[] = "benchmarking speed of random number generator";
1075
1076 int
main(int argc,char ** argv)1077 main(int argc, char **argv)
1078 {
1079 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
1080 ESL_RANDOMNESS *r = (esl_opt_GetBoolean(go, "-f") == TRUE ? esl_randomness_CreateFast(42) : esl_randomness_Create(42));
1081 ESL_STOPWATCH *w = esl_stopwatch_Create();
1082 int N = esl_opt_GetInteger(go, "-N");
1083 double p[20];
1084 double cdf[20];
1085
1086 esl_composition_BL62(p);
1087 esl_vec_DCDF(p, 20, cdf);
1088
1089 esl_stopwatch_Start(w);
1090 if (esl_opt_GetBoolean(go, "-c")) { while (N--) esl_rnd_DChoose(r, p, 20); }
1091 else if (esl_opt_GetBoolean(go, "-d")) { while (N--) esl_rnd_DChooseCDF(r, cdf, 20); }
1092 else if (esl_opt_GetBoolean(go, "-r")) { while (N--) esl_randomness_Init(r, 42); }
1093 else { while (N--) esl_random(r); }
1094
1095 esl_stopwatch_Stop(w);
1096 esl_stopwatch_Display(stdout, w, "# CPU Time: ");
1097
1098 esl_stopwatch_Destroy(w);
1099 esl_randomness_Destroy(r);
1100 esl_getopts_Destroy(go);
1101 return 0;
1102 }
1103 #endif /*eslRANDOM_BENCHMARK*/
1104 /*----------------- end, benchmark driver -----------------------*/
1105
1106
1107
1108
1109 /*****************************************************************
1110 * 8. Unit tests.
1111 *****************************************************************/
1112
1113 #ifdef eslRANDOM_TESTDRIVE
1114 #include "esl_vectorops.h"
1115 #include "esl_stats.h"
1116 #include "esl_dirichlet.h"
1117
1118
1119 /* The esl_random() unit test:
1120 * a binned frequency test.
1121 */
1122 static void
utest_random(ESL_RANDOMNESS * r,int n,int nbins,int be_verbose)1123 utest_random(ESL_RANDOMNESS *r, int n, int nbins, int be_verbose)
1124 {
1125 char msg[] = "esl_random() unit test failed";
1126 int *counts = NULL;
1127 double X2p = 0.;
1128 int i;
1129 int sample;
1130 double X2, exp, diff;
1131
1132 if ((counts = malloc(sizeof(int) * nbins)) == NULL) esl_fatal(msg);
1133 esl_vec_ISet(counts, nbins, 0);
1134
1135 for (i = 0; i < n; i++)
1136 {
1137 sample = esl_rnd_Roll(r, nbins);
1138 if (sample < 0 || sample >= nbins) esl_fatal(msg);
1139 counts[sample]++;
1140 }
1141
1142 /* X^2 value: \sum (o_i - e_i)^2 / e_i */
1143 for (X2 = 0., i = 0; i < nbins; i++) {
1144 exp = (double) n / (double) nbins;
1145 diff = (double) counts[i] - exp;
1146 X2 += diff*diff/exp;
1147 }
1148 if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal(msg);
1149 if (be_verbose) printf("random(): \t%g\n", X2p);
1150 if (X2p < 0.01) esl_fatal(msg);
1151
1152 free(counts);
1153 return;
1154 }
1155
1156 /* The DChoose() and FChoose() unit tests.
1157 */
1158 static void
utest_choose(ESL_RANDOMNESS * r,int n,int nbins,int be_verbose)1159 utest_choose(ESL_RANDOMNESS *r, int n, int nbins, int be_verbose)
1160 {
1161 double *pd = NULL; /* probability vector, double */
1162 double *pdc = NULL; /* CDF, double */
1163 float *pf = NULL; /* probability vector, float */
1164 float *pfc = NULL; /* CDF, float */
1165 int *ct = NULL;
1166 int i;
1167 double X2, diff, exp, X2p;
1168
1169 if ((pd = malloc(sizeof(double) * nbins)) == NULL) esl_fatal("malloc failed");
1170 if ((pdc = malloc(sizeof(double) * nbins)) == NULL) esl_fatal("malloc failed");
1171 if ((pf = malloc(sizeof(float) * nbins)) == NULL) esl_fatal("malloc failed");
1172 if ((pfc = malloc(sizeof(float) * nbins)) == NULL) esl_fatal("malloc failed");
1173 if ((ct = malloc(sizeof(int) * nbins)) == NULL) esl_fatal("malloc failed");
1174
1175 /* Sample a random multinomial probability vector. */
1176 if (esl_dirichlet_DSampleUniform(r, nbins, pd) != eslOK) esl_fatal("dirichlet sample failed");
1177 esl_vec_D2F(pd, nbins, pf);
1178
1179 /* Test esl_rnd_DChoose():
1180 * sample observed counts, chi-squared test against expected
1181 */
1182 esl_vec_ISet(ct, nbins, 0);
1183 for (i = 0; i < n; i++)
1184 ct[esl_rnd_DChoose(r, pd, nbins)]++;
1185 for (X2 = 0., i=0; i < nbins; i++) {
1186 exp = (double) n * pd[i];
1187 diff = (double) ct[i] - exp;
1188 X2 += diff*diff/exp;
1189 }
1190 if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed");
1191 if (be_verbose) printf("DChoose(): \t%g\n", X2p);
1192 if (X2p < 0.01) esl_fatal("chi squared test failed");
1193
1194 /* Repeat above for FChoose(). */
1195 esl_vec_ISet(ct, nbins, 0);
1196 for (i = 0; i < n; i++)
1197 ct[esl_rnd_FChoose(r, pf, nbins)]++;
1198 for (X2 = 0., i=0; i < nbins; i++) {
1199 exp = (double) n * pd[i];
1200 diff = (double) ct[i] - exp;
1201 X2 += diff*diff/exp;
1202 }
1203 if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed");
1204 if (be_verbose) printf("FChoose(): \t%g\n", X2p);
1205 if (X2p < 0.01) esl_fatal("chi squared test failed");
1206
1207 /* esl_rnd_DChooseCDF(). */
1208 esl_vec_ISet(ct, nbins, 0);
1209 esl_vec_DCDF(pd, nbins, pdc);
1210 for (i = 0; i < n; i++)
1211 ct[esl_rnd_DChooseCDF(r, pdc, nbins)]++;
1212 for (X2 = 0., i=0; i < nbins; i++) {
1213 exp = (double) n * pd[i];
1214 diff = (double) ct[i] - exp;
1215 X2 += diff*diff/exp;
1216 }
1217 if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed");
1218 if (be_verbose) printf("DChoose(): \t%g\n", X2p);
1219 if (X2p < 0.01) esl_fatal("chi squared test failed");
1220
1221 /* esl_rnd_FChooseCDF() */
1222 esl_vec_ISet(ct, nbins, 0);
1223 esl_vec_FCDF(pf, nbins, pfc);
1224 for (i = 0; i < n; i++)
1225 ct[esl_rnd_FChooseCDF(r, pfc, nbins)]++;
1226 for (X2 = 0., i=0; i < nbins; i++) {
1227 exp = (double) n * pf[i];
1228 diff = (double) ct[i] - exp;
1229 X2 += diff*diff/exp;
1230 }
1231 if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed");
1232 if (be_verbose) printf("DChoose(): \t%g\n", X2p);
1233 if (X2p < 0.01) esl_fatal("chi squared test failed");
1234
1235 free(pd);
1236 free(pdc);
1237 free(pf);
1238 free(pfc);
1239 free(ct);
1240 return;
1241 }
1242
1243
1244 /* utest_Deal()
1245 * tests esl_rnd_Deal()
1246 *
1247 * If deals are random, each possible integer is sampled uniformly.
1248 * Take <nsamp> deals of <m> integers from possible <n>.
1249 * Expected count of each integer = nsamp * (m/n), +/- s.d. \sqrt(u)
1250 * Test that min, max are within +/- 6 sd.
1251 *
1252 * Can fail stochastically, so caller should default to an <rng>
1253 * with a fixed RNG seed.
1254 */
1255 static void
utest_Deal(ESL_RANDOMNESS * rng)1256 utest_Deal(ESL_RANDOMNESS *rng)
1257 {
1258 char msg[] = "esl_random deal unit test failed";
1259 int m = 100;
1260 int n = 1000;
1261 int nsamp = 10000;
1262 int *deal = malloc(sizeof(int) * m);
1263 int *ct = malloc(sizeof(int) * n);
1264 double expected_mean = ((double) m / (double) n) * (double) nsamp;
1265 double expected_sd = sqrt(expected_mean);
1266 int max_allowed = (int) round( expected_mean + 6. * expected_sd);
1267 int min_allowed = (int) round( expected_mean - 6. * expected_sd);
1268 int i;
1269
1270 if (deal == NULL || ct == NULL) esl_fatal(msg);
1271 esl_vec_ISet(ct, n, 0);
1272
1273 while (nsamp--)
1274 {
1275 esl_rnd_Deal(rng, m, n, deal);
1276 for (i = 0; i < m; i++) ct[deal[i]]++;
1277 }
1278 if (esl_vec_IMax(ct, n) > max_allowed) esl_fatal(msg);
1279 if (esl_vec_IMin(ct, n) < min_allowed) esl_fatal(msg);
1280
1281 free(deal);
1282 free(ct);
1283 }
1284 #endif /*eslRANDOM_TESTDRIVE*/
1285 /*-------------------- end, unit tests --------------------------*/
1286
1287
1288 /*****************************************************************
1289 * 9. Test driver.
1290 *****************************************************************/
1291 #ifdef eslRANDOM_TESTDRIVE
1292 #include "esl_config.h"
1293
1294 #include <stdio.h>
1295
1296 #include "easel.h"
1297 #include "esl_dirichlet.h"
1298 #include "esl_getopts.h"
1299 #include "esl_random.h"
1300 #include "esl_randomseq.h"
1301 #include "esl_vectorops.h"
1302
1303 static ESL_OPTIONS options[] = {
1304 /* name type default env range togs reqs incomp help docgrp */
1305 {"-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show help and usage", 0},
1306 {"-b", eslARG_INT, "20", NULL, "n>0",NULL, NULL, NULL, "number of test bins", 0},
1307 {"-n", eslARG_INT, "1000000", NULL, "n>0",NULL, NULL, NULL, "number of samples", 0},
1308 {"-s", eslARG_INT, "42", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0},
1309 {"-v", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show verbose output", 0},
1310 {"--mtbits",eslARG_STRING,NULL,NULL, NULL, NULL, NULL, NULL, "save MT bit file for NIST benchmark",0},
1311 {"--kbits", eslARG_STRING,NULL,NULL, NULL, NULL, NULL, NULL, "save Knuth bit file for NIST benchmark",0},
1312 { 0,0,0,0,0,0,0,0,0,0},
1313 };
1314 static char usage[] = "[-options]";
1315 static char banner[] = "test driver for random module";
1316
1317 static int save_bitfile(char *bitfile, ESL_RANDOMNESS *r, int n);
1318
1319 int
main(int argc,char ** argv)1320 main(int argc, char **argv)
1321 {
1322 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
1323 ESL_RANDOMNESS *r1 = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
1324 ESL_RANDOMNESS *r2 = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
1325 char *mtbitfile = esl_opt_GetString (go, "--mtbits");
1326 char *kbitfile = esl_opt_GetString (go, "--kbits");
1327 int nbins = esl_opt_GetInteger(go, "-b");
1328 int n = esl_opt_GetInteger(go, "-n");
1329 int be_verbose = esl_opt_GetBoolean(go, "-v");
1330
1331 fprintf(stderr, "## %s\n", argv[0]);
1332 fprintf(stderr, "# rng seed 1 (slow) = %" PRIu32 "\n", esl_randomness_GetSeed(r1));
1333 fprintf(stderr, "# rng seed 2 (fast) = %" PRIu32 "\n", esl_randomness_GetSeed(r2));
1334
1335 utest_random(r1, n, nbins, be_verbose);
1336 utest_choose(r1, n, nbins, be_verbose);
1337 utest_random(r2, n, nbins, be_verbose);
1338 utest_choose(r2, n, nbins, be_verbose);
1339
1340 utest_Deal(r1);
1341
1342 if (mtbitfile) save_bitfile(mtbitfile, r1, n);
1343 if (kbitfile) save_bitfile(kbitfile, r2, n);
1344
1345 fprintf(stderr, "# status = ok\n");
1346
1347 esl_randomness_Destroy(r1);
1348 esl_randomness_Destroy(r2);
1349 esl_getopts_Destroy(go);
1350 return 0;
1351 }
1352
1353 static int
save_bitfile(char * bitfile,ESL_RANDOMNESS * r,int n)1354 save_bitfile(char *bitfile, ESL_RANDOMNESS *r, int n)
1355 {
1356 FILE *fp = NULL;
1357 int b,i;
1358 long x;
1359
1360 /* Open the file.
1361 */
1362 if ((fp = fopen(bitfile, "w")) == NULL)
1363 esl_fatal("failed to open %s for writing", bitfile);
1364
1365 /* Sample <n> random numbers, output 32n random bits to the file.
1366 */
1367 for (i = 0; i < n; i++)
1368 {
1369 x = (r->type == eslRND_FAST ? knuth(r) : mersenne_twister(r)); /* generate a 32 bit random variate by MT19937 */
1370 for (b = 0; b < 32; b++)
1371 {
1372 if (x & 01) fprintf(fp, "1");
1373 else fprintf(fp, "0");
1374 x >>= 1;
1375 }
1376 fprintf(fp, "\n");
1377 }
1378 fclose(fp);
1379 return eslOK;
1380 }
1381 #endif /*eslRANDOM_TESTDRIVE*/
1382
1383
1384
1385 /*****************************************************************
1386 * 10. Example.
1387 *****************************************************************/
1388 #ifdef eslRANDOM_EXAMPLE
1389 /*::cexcerpt::random_example::begin::*/
1390 /* compile: cc -I. -o esl_random_example -DeslRANDOM_EXAMPLE esl_random.c esl_getopts.c easel.c -lm
1391 * run: ./random_example 42
1392 */
1393 #include <stdio.h>
1394 #include "easel.h"
1395 #include "esl_getopts.h"
1396 #include "esl_random.h"
1397
1398 /* In Easel and HMMER, the option for setting the seed is typically -s, sometimes --seed.
1399 * Default is usually 0 because we want a "random" seed. Less commonly, "42" for a fixed seed;
1400 * rarely, a different fixed seed.
1401 *
1402 * Generally you want to use esl_randomness_Create(), to get a Mersenne Twister. The "fast"
1403 * RNG you get from esl_randomness_CreateFast() isn't all that much faster (~25% per sample)
1404 * but has much worse quality in its randomness.
1405 */
1406 static ESL_OPTIONS options[] = {
1407 /* name type default env range toggles reqs incomp help docgroup*/
1408 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
1409 { "-i", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "sample uint32's instead of doubles", 0 },
1410 { "-n", eslARG_INT, "20", NULL, NULL, NULL, NULL, NULL, "number of random samples to show", 0 },
1411 { "-s", eslARG_INT, "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0 },
1412 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
1413 };
1414 static char usage[] = "[-options]";
1415 static char banner[] = "example of using random module";
1416
1417 int
main(int argc,char ** argv)1418 main(int argc, char **argv)
1419 {
1420 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
1421 ESL_RANDOMNESS *rng = esl_randomness_Create( esl_opt_GetInteger(go, "-s") );
1422 int do_uint32 = esl_opt_GetBoolean(go, "-i");
1423 int n = esl_opt_GetInteger(go, "-n");
1424
1425 printf("RNG seed: %" PRIu32 "\n", esl_randomness_GetSeed(rng));
1426 printf("\nA sequence of %d pseudorandom numbers:\n", n);
1427 if (do_uint32) while (n--) printf("%" PRIu32 "\n", esl_random_uint32(rng));
1428 else while (n--) printf("%f\n", esl_random(rng));
1429
1430 printf("\nInternal dump of RNG state:\n");
1431 esl_randomness_Dump(stdout, rng);
1432
1433 esl_randomness_Destroy(rng);
1434 esl_getopts_Destroy(go);
1435 return 0;
1436 }
1437 /*::cexcerpt::random_example::end::*/
1438 #endif /*eslRANDOM_EXAMPLE*/
1439
1440
1441
1442