1 #include "numpy/random/distributions.h"
2 #include "ziggurat_constants.h"
3 #include "logfactorial.h"
4 
5 #if defined(_MSC_VER) && defined(_WIN64)
6 #include <intrin.h>
7 #endif
8 
9 #include <assert.h>
10 
11 /* Inline generators for internal use */
next_uint32(bitgen_t * bitgen_state)12 static NPY_INLINE uint32_t next_uint32(bitgen_t *bitgen_state) {
13   return bitgen_state->next_uint32(bitgen_state->state);
14 }
next_uint64(bitgen_t * bitgen_state)15 static NPY_INLINE uint64_t next_uint64(bitgen_t *bitgen_state) {
16   return bitgen_state->next_uint64(bitgen_state->state);
17 }
18 
next_float(bitgen_t * bitgen_state)19 static NPY_INLINE float next_float(bitgen_t *bitgen_state) {
20   return (next_uint32(bitgen_state) >> 9) * (1.0f / 8388608.0f);
21 }
22 
23 /* Random generators for external use */
random_standard_uniform_f(bitgen_t * bitgen_state)24 float random_standard_uniform_f(bitgen_t *bitgen_state) {
25     return next_float(bitgen_state);
26 }
27 
random_standard_uniform(bitgen_t * bitgen_state)28 double random_standard_uniform(bitgen_t *bitgen_state) {
29     return next_double(bitgen_state);
30 }
31 
random_standard_uniform_fill(bitgen_t * bitgen_state,npy_intp cnt,double * out)32 void random_standard_uniform_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
33   npy_intp i;
34   for (i = 0; i < cnt; i++) {
35     out[i] = next_double(bitgen_state);
36   }
37 }
38 
random_standard_uniform_fill_f(bitgen_t * bitgen_state,npy_intp cnt,float * out)39 void random_standard_uniform_fill_f(bitgen_t *bitgen_state, npy_intp cnt, float *out) {
40   npy_intp i;
41   for (i = 0; i < cnt; i++) {
42     out[i] = next_float(bitgen_state);
43   }
44 }
45 
standard_exponential_unlikely(bitgen_t * bitgen_state,uint8_t idx,double x)46 static double standard_exponential_unlikely(bitgen_t *bitgen_state,
47                                                 uint8_t idx, double x) {
48   if (idx == 0) {
49     /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */
50     return ziggurat_exp_r - log(1.0 - next_double(bitgen_state));
51   } else if ((fe_double[idx - 1] - fe_double[idx]) * next_double(bitgen_state) +
52                  fe_double[idx] <
53              exp(-x)) {
54     return x;
55   } else {
56     return random_standard_exponential(bitgen_state);
57   }
58 }
59 
random_standard_exponential(bitgen_t * bitgen_state)60 double random_standard_exponential(bitgen_t *bitgen_state) {
61   uint64_t ri;
62   uint8_t idx;
63   double x;
64   ri = next_uint64(bitgen_state);
65   ri >>= 3;
66   idx = ri & 0xFF;
67   ri >>= 8;
68   x = ri * we_double[idx];
69   if (ri < ke_double[idx]) {
70     return x; /* 98.9% of the time we return here 1st try */
71   }
72   return standard_exponential_unlikely(bitgen_state, idx, x);
73 }
74 
random_standard_exponential_fill(bitgen_t * bitgen_state,npy_intp cnt,double * out)75 void random_standard_exponential_fill(bitgen_t * bitgen_state, npy_intp cnt, double * out)
76 {
77   npy_intp i;
78   for (i = 0; i < cnt; i++) {
79     out[i] = random_standard_exponential(bitgen_state);
80   }
81 }
82 
standard_exponential_unlikely_f(bitgen_t * bitgen_state,uint8_t idx,float x)83 static float standard_exponential_unlikely_f(bitgen_t *bitgen_state,
84                                                  uint8_t idx, float x) {
85   if (idx == 0) {
86     /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */
87     return ziggurat_exp_r_f - logf(1.0f - next_float(bitgen_state));
88   } else if ((fe_float[idx - 1] - fe_float[idx]) * next_float(bitgen_state) +
89                  fe_float[idx] <
90              expf(-x)) {
91     return x;
92   } else {
93     return random_standard_exponential_f(bitgen_state);
94   }
95 }
96 
random_standard_exponential_f(bitgen_t * bitgen_state)97 float random_standard_exponential_f(bitgen_t *bitgen_state) {
98   uint32_t ri;
99   uint8_t idx;
100   float x;
101   ri = next_uint32(bitgen_state);
102   ri >>= 1;
103   idx = ri & 0xFF;
104   ri >>= 8;
105   x = ri * we_float[idx];
106   if (ri < ke_float[idx]) {
107     return x; /* 98.9% of the time we return here 1st try */
108   }
109   return standard_exponential_unlikely_f(bitgen_state, idx, x);
110 }
111 
random_standard_exponential_fill_f(bitgen_t * bitgen_state,npy_intp cnt,float * out)112 void random_standard_exponential_fill_f(bitgen_t * bitgen_state, npy_intp cnt, float * out)
113 {
114   npy_intp i;
115   for (i = 0; i < cnt; i++) {
116     out[i] = random_standard_exponential_f(bitgen_state);
117   }
118 }
119 
random_standard_exponential_inv_fill(bitgen_t * bitgen_state,npy_intp cnt,double * out)120 void random_standard_exponential_inv_fill(bitgen_t * bitgen_state, npy_intp cnt, double * out)
121 {
122   npy_intp i;
123   for (i = 0; i < cnt; i++) {
124     out[i] = -log(1.0 - next_double(bitgen_state));
125   }
126 }
127 
random_standard_exponential_inv_fill_f(bitgen_t * bitgen_state,npy_intp cnt,float * out)128 void random_standard_exponential_inv_fill_f(bitgen_t * bitgen_state, npy_intp cnt, float * out)
129 {
130   npy_intp i;
131   for (i = 0; i < cnt; i++) {
132     out[i] = -log(1.0 - next_float(bitgen_state));
133   }
134 }
135 
136 
random_standard_normal(bitgen_t * bitgen_state)137 double random_standard_normal(bitgen_t *bitgen_state) {
138   uint64_t r;
139   int sign;
140   uint64_t rabs;
141   int idx;
142   double x, xx, yy;
143   for (;;) {
144     /* r = e3n52sb8 */
145     r = next_uint64(bitgen_state);
146     idx = r & 0xff;
147     r >>= 8;
148     sign = r & 0x1;
149     rabs = (r >> 1) & 0x000fffffffffffff;
150     x = rabs * wi_double[idx];
151     if (sign & 0x1)
152       x = -x;
153     if (rabs < ki_double[idx])
154       return x; /* 99.3% of the time return here */
155     if (idx == 0) {
156       for (;;) {
157         /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */
158         xx = -ziggurat_nor_inv_r * log(1.0 - next_double(bitgen_state));
159         yy = -log(1.0 - next_double(bitgen_state));
160         if (yy + yy > xx * xx)
161           return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r + xx)
162                                      : ziggurat_nor_r + xx;
163       }
164     } else {
165       if (((fi_double[idx - 1] - fi_double[idx]) * next_double(bitgen_state) +
166            fi_double[idx]) < exp(-0.5 * x * x))
167         return x;
168     }
169   }
170 }
171 
random_standard_normal_fill(bitgen_t * bitgen_state,npy_intp cnt,double * out)172 void random_standard_normal_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
173   npy_intp i;
174   for (i = 0; i < cnt; i++) {
175     out[i] = random_standard_normal(bitgen_state);
176   }
177 }
178 
random_standard_normal_f(bitgen_t * bitgen_state)179 float random_standard_normal_f(bitgen_t *bitgen_state) {
180   uint32_t r;
181   int sign;
182   uint32_t rabs;
183   int idx;
184   float x, xx, yy;
185   for (;;) {
186     /* r = n23sb8 */
187     r = next_uint32(bitgen_state);
188     idx = r & 0xff;
189     sign = (r >> 8) & 0x1;
190     rabs = (r >> 9) & 0x0007fffff;
191     x = rabs * wi_float[idx];
192     if (sign & 0x1)
193       x = -x;
194     if (rabs < ki_float[idx])
195       return x; /* # 99.3% of the time return here */
196     if (idx == 0) {
197       for (;;) {
198         /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */
199         xx = -ziggurat_nor_inv_r_f * logf(1.0f - next_float(bitgen_state));
200         yy = -logf(1.0f - next_float(bitgen_state));
201         if (yy + yy > xx * xx)
202           return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r_f + xx)
203                                      : ziggurat_nor_r_f + xx;
204       }
205     } else {
206       if (((fi_float[idx - 1] - fi_float[idx]) * next_float(bitgen_state) +
207            fi_float[idx]) < exp(-0.5 * x * x))
208         return x;
209     }
210   }
211 }
212 
random_standard_normal_fill_f(bitgen_t * bitgen_state,npy_intp cnt,float * out)213 void random_standard_normal_fill_f(bitgen_t *bitgen_state, npy_intp cnt, float *out) {
214   npy_intp i;
215   for (i = 0; i < cnt; i++) {
216     out[i] = random_standard_normal_f(bitgen_state);
217   }
218 }
219 
random_standard_gamma(bitgen_t * bitgen_state,double shape)220 double random_standard_gamma(bitgen_t *bitgen_state,
221                                             double shape) {
222   double b, c;
223   double U, V, X, Y;
224 
225   if (shape == 1.0) {
226     return random_standard_exponential(bitgen_state);
227   } else if (shape == 0.0) {
228     return 0.0;
229   } else if (shape < 1.0) {
230     for (;;) {
231       U = next_double(bitgen_state);
232       V = random_standard_exponential(bitgen_state);
233       if (U <= 1.0 - shape) {
234         X = pow(U, 1. / shape);
235         if (X <= V) {
236           return X;
237         }
238       } else {
239         Y = -log((1 - U) / shape);
240         X = pow(1.0 - shape + shape * Y, 1. / shape);
241         if (X <= (V + Y)) {
242           return X;
243         }
244       }
245     }
246   } else {
247     b = shape - 1. / 3.;
248     c = 1. / sqrt(9 * b);
249     for (;;) {
250       do {
251         X = random_standard_normal(bitgen_state);
252         V = 1.0 + c * X;
253       } while (V <= 0.0);
254 
255       V = V * V * V;
256       U = next_double(bitgen_state);
257       if (U < 1.0 - 0.0331 * (X * X) * (X * X))
258         return (b * V);
259       /* log(0.0) ok here */
260       if (log(U) < 0.5 * X * X + b * (1. - V + log(V)))
261         return (b * V);
262     }
263   }
264 }
265 
random_standard_gamma_f(bitgen_t * bitgen_state,float shape)266 float random_standard_gamma_f(bitgen_t *bitgen_state,
267                                              float shape) {
268   float b, c;
269   float U, V, X, Y;
270 
271   if (shape == 1.0f) {
272     return random_standard_exponential_f(bitgen_state);
273   } else if (shape == 0.0) {
274     return 0.0;
275   } else if (shape < 1.0f) {
276     for (;;) {
277       U = next_float(bitgen_state);
278       V = random_standard_exponential_f(bitgen_state);
279       if (U <= 1.0f - shape) {
280         X = powf(U, 1.0f / shape);
281         if (X <= V) {
282           return X;
283         }
284       } else {
285         Y = -logf((1.0f - U) / shape);
286         X = powf(1.0f - shape + shape * Y, 1.0f / shape);
287         if (X <= (V + Y)) {
288           return X;
289         }
290       }
291     }
292   } else {
293     b = shape - 1.0f / 3.0f;
294     c = 1.0f / sqrtf(9.0f * b);
295     for (;;) {
296       do {
297         X = random_standard_normal_f(bitgen_state);
298         V = 1.0f + c * X;
299       } while (V <= 0.0f);
300 
301       V = V * V * V;
302       U = next_float(bitgen_state);
303       if (U < 1.0f - 0.0331f * (X * X) * (X * X))
304         return (b * V);
305       /* logf(0.0) ok here */
306       if (logf(U) < 0.5f * X * X + b * (1.0f - V + logf(V)))
307         return (b * V);
308     }
309   }
310 }
311 
random_positive_int64(bitgen_t * bitgen_state)312 int64_t random_positive_int64(bitgen_t *bitgen_state) {
313   return next_uint64(bitgen_state) >> 1;
314 }
315 
random_positive_int32(bitgen_t * bitgen_state)316 int32_t random_positive_int32(bitgen_t *bitgen_state) {
317   return next_uint32(bitgen_state) >> 1;
318 }
319 
random_positive_int(bitgen_t * bitgen_state)320 int64_t random_positive_int(bitgen_t *bitgen_state) {
321 #if ULONG_MAX <= 0xffffffffUL
322   return (int64_t)(next_uint32(bitgen_state) >> 1);
323 #else
324   return (int64_t)(next_uint64(bitgen_state) >> 1);
325 #endif
326 }
327 
random_uint(bitgen_t * bitgen_state)328 uint64_t random_uint(bitgen_t *bitgen_state) {
329 #if ULONG_MAX <= 0xffffffffUL
330   return next_uint32(bitgen_state);
331 #else
332   return next_uint64(bitgen_state);
333 #endif
334 }
335 
336 /*
337  * log-gamma function to support some of these distributions. The
338  * algorithm comes from SPECFUN by Shanjie Zhang and Jianming Jin and their
339  * book "Computation of Special Functions", 1996, John Wiley & Sons, Inc.
340  *
341  * If random_loggam(k+1) is being used to compute log(k!) for an integer k, consider
342  * using logfactorial(k) instead.
343  */
random_loggam(double x)344 double random_loggam(double x) {
345   double x0, x2, lg2pi, gl, gl0;
346   RAND_INT_TYPE k, n;
347 
348   static double a[10] = {8.333333333333333e-02, -2.777777777777778e-03,
349                          7.936507936507937e-04, -5.952380952380952e-04,
350                          8.417508417508418e-04, -1.917526917526918e-03,
351                          6.410256410256410e-03, -2.955065359477124e-02,
352                          1.796443723688307e-01, -1.39243221690590e+00};
353 
354   if ((x == 1.0) || (x == 2.0)) {
355     return 0.0;
356   } else if (x < 7.0) {
357     n = (RAND_INT_TYPE)(7 - x);
358   } else {
359     n = 0;
360   }
361   x0 = x + n;
362   x2 = (1.0 / x0) * (1.0 / x0);
363   /* log(2 * M_PI) */
364   lg2pi = 1.8378770664093453e+00;
365   gl0 = a[9];
366   for (k = 8; k >= 0; k--) {
367     gl0 *= x2;
368     gl0 += a[k];
369   }
370   gl = gl0 / x0 + 0.5 * lg2pi + (x0 - 0.5) * log(x0) - x0;
371   if (x < 7.0) {
372     for (k = 1; k <= n; k++) {
373       gl -= log(x0 - 1.0);
374       x0 -= 1.0;
375     }
376   }
377   return gl;
378 }
379 
380 /*
381 double random_normal(bitgen_t *bitgen_state, double loc, double scale) {
382   return loc + scale * random_gauss(bitgen_state);
383 }
384 */
385 
random_normal(bitgen_t * bitgen_state,double loc,double scale)386 double random_normal(bitgen_t *bitgen_state, double loc, double scale) {
387   return loc + scale * random_standard_normal(bitgen_state);
388 }
389 
random_exponential(bitgen_t * bitgen_state,double scale)390 double random_exponential(bitgen_t *bitgen_state, double scale) {
391   return scale * random_standard_exponential(bitgen_state);
392 }
393 
random_uniform(bitgen_t * bitgen_state,double lower,double range)394 double random_uniform(bitgen_t *bitgen_state, double lower, double range) {
395   return lower + range * next_double(bitgen_state);
396 }
397 
random_gamma(bitgen_t * bitgen_state,double shape,double scale)398 double random_gamma(bitgen_t *bitgen_state, double shape, double scale) {
399   return scale * random_standard_gamma(bitgen_state, shape);
400 }
401 
random_gamma_f(bitgen_t * bitgen_state,float shape,float scale)402 float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale) {
403   return scale * random_standard_gamma_f(bitgen_state, shape);
404 }
405 
random_beta(bitgen_t * bitgen_state,double a,double b)406 double random_beta(bitgen_t *bitgen_state, double a, double b) {
407   double Ga, Gb;
408 
409   if ((a <= 1.0) && (b <= 1.0)) {
410     double U, V, X, Y, XpY;
411     /* Use Johnk's algorithm */
412 
413     while (1) {
414       U = next_double(bitgen_state);
415       V = next_double(bitgen_state);
416       X = pow(U, 1.0 / a);
417       Y = pow(V, 1.0 / b);
418       XpY = X + Y;
419       /* Reject if both U and V are 0.0, which is approx 1 in 10^106 */
420       if ((XpY <= 1.0) && (XpY > 0.0)) {
421         if (X + Y > 0) {
422           return X / XpY;
423         } else {
424           double logX = log(U) / a;
425           double logY = log(V) / b;
426           double logM = logX > logY ? logX : logY;
427           logX -= logM;
428           logY -= logM;
429 
430           return exp(logX - log(exp(logX) + exp(logY)));
431         }
432       }
433     }
434   } else {
435     Ga = random_standard_gamma(bitgen_state, a);
436     Gb = random_standard_gamma(bitgen_state, b);
437     return Ga / (Ga + Gb);
438   }
439 }
440 
random_chisquare(bitgen_t * bitgen_state,double df)441 double random_chisquare(bitgen_t *bitgen_state, double df) {
442   return 2.0 * random_standard_gamma(bitgen_state, df / 2.0);
443 }
444 
random_f(bitgen_t * bitgen_state,double dfnum,double dfden)445 double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) {
446   return ((random_chisquare(bitgen_state, dfnum) * dfden) /
447           (random_chisquare(bitgen_state, dfden) * dfnum));
448 }
449 
random_standard_cauchy(bitgen_t * bitgen_state)450 double random_standard_cauchy(bitgen_t *bitgen_state) {
451   return random_standard_normal(bitgen_state) / random_standard_normal(bitgen_state);
452 }
453 
random_pareto(bitgen_t * bitgen_state,double a)454 double random_pareto(bitgen_t *bitgen_state, double a) {
455   return exp(random_standard_exponential(bitgen_state) / a) - 1;
456 }
457 
random_weibull(bitgen_t * bitgen_state,double a)458 double random_weibull(bitgen_t *bitgen_state, double a) {
459   if (a == 0.0) {
460     return 0.0;
461   }
462   return pow(random_standard_exponential(bitgen_state), 1. / a);
463 }
464 
random_power(bitgen_t * bitgen_state,double a)465 double random_power(bitgen_t *bitgen_state, double a) {
466   return pow(1 - exp(-random_standard_exponential(bitgen_state)), 1. / a);
467 }
468 
random_laplace(bitgen_t * bitgen_state,double loc,double scale)469 double random_laplace(bitgen_t *bitgen_state, double loc, double scale) {
470   double U;
471 
472   U = next_double(bitgen_state);
473   if (U >= 0.5) {
474     U = loc - scale * log(2.0 - U - U);
475   } else if (U > 0.0) {
476     U = loc + scale * log(U + U);
477   } else {
478     /* Reject U == 0.0 and call again to get next value */
479     U = random_laplace(bitgen_state, loc, scale);
480   }
481   return U;
482 }
483 
random_gumbel(bitgen_t * bitgen_state,double loc,double scale)484 double random_gumbel(bitgen_t *bitgen_state, double loc, double scale) {
485   double U;
486 
487   U = 1.0 - next_double(bitgen_state);
488   if (U < 1.0) {
489     return loc - scale * log(-log(U));
490   }
491   /* Reject U == 1.0 and call again to get next value */
492   return random_gumbel(bitgen_state, loc, scale);
493 }
494 
random_logistic(bitgen_t * bitgen_state,double loc,double scale)495 double random_logistic(bitgen_t *bitgen_state, double loc, double scale) {
496   double U;
497 
498   U = next_double(bitgen_state);
499   if (U > 0.0) {
500     return loc + scale * log(U / (1.0 - U));
501   }
502   /* Reject U == 0.0 and call again to get next value */
503   return random_logistic(bitgen_state, loc, scale);
504 }
505 
random_lognormal(bitgen_t * bitgen_state,double mean,double sigma)506 double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma) {
507   return exp(random_normal(bitgen_state, mean, sigma));
508 }
509 
random_rayleigh(bitgen_t * bitgen_state,double mode)510 double random_rayleigh(bitgen_t *bitgen_state, double mode) {
511   return mode * sqrt(-2.0 * log(1.0 - next_double(bitgen_state)));
512 }
513 
random_standard_t(bitgen_t * bitgen_state,double df)514 double random_standard_t(bitgen_t *bitgen_state, double df) {
515   double num, denom;
516 
517   num = random_standard_normal(bitgen_state);
518   denom = random_standard_gamma(bitgen_state, df / 2);
519   return sqrt(df / 2) * num / sqrt(denom);
520 }
521 
random_poisson_mult(bitgen_t * bitgen_state,double lam)522 static RAND_INT_TYPE random_poisson_mult(bitgen_t *bitgen_state, double lam) {
523   RAND_INT_TYPE X;
524   double prod, U, enlam;
525 
526   enlam = exp(-lam);
527   X = 0;
528   prod = 1.0;
529   while (1) {
530     U = next_double(bitgen_state);
531     prod *= U;
532     if (prod > enlam) {
533       X += 1;
534     } else {
535       return X;
536     }
537   }
538 }
539 
540 /*
541  * The transformed rejection method for generating Poisson random variables
542  * W. Hoermann
543  * Insurance: Mathematics and Economics 12, 39-45 (1993)
544  */
545 #define LS2PI 0.91893853320467267
546 #define TWELFTH 0.083333333333333333333333
random_poisson_ptrs(bitgen_t * bitgen_state,double lam)547 static RAND_INT_TYPE random_poisson_ptrs(bitgen_t *bitgen_state, double lam) {
548   RAND_INT_TYPE k;
549   double U, V, slam, loglam, a, b, invalpha, vr, us;
550 
551   slam = sqrt(lam);
552   loglam = log(lam);
553   b = 0.931 + 2.53 * slam;
554   a = -0.059 + 0.02483 * b;
555   invalpha = 1.1239 + 1.1328 / (b - 3.4);
556   vr = 0.9277 - 3.6224 / (b - 2);
557 
558   while (1) {
559     U = next_double(bitgen_state) - 0.5;
560     V = next_double(bitgen_state);
561     us = 0.5 - fabs(U);
562     k = (RAND_INT_TYPE)floor((2 * a / us + b) * U + lam + 0.43);
563     if ((us >= 0.07) && (V <= vr)) {
564       return k;
565     }
566     if ((k < 0) || ((us < 0.013) && (V > us))) {
567       continue;
568     }
569     /* log(V) == log(0.0) ok here */
570     /* if U==0.0 so that us==0.0, log is ok since always returns */
571     if ((log(V) + log(invalpha) - log(a / (us * us) + b)) <=
572         (-lam + k * loglam - random_loggam(k + 1))) {
573       return k;
574     }
575   }
576 }
577 
random_poisson(bitgen_t * bitgen_state,double lam)578 RAND_INT_TYPE random_poisson(bitgen_t *bitgen_state, double lam) {
579   if (lam >= 10) {
580     return random_poisson_ptrs(bitgen_state, lam);
581   } else if (lam == 0) {
582     return 0;
583   } else {
584     return random_poisson_mult(bitgen_state, lam);
585   }
586 }
587 
random_negative_binomial(bitgen_t * bitgen_state,double n,double p)588 RAND_INT_TYPE random_negative_binomial(bitgen_t *bitgen_state, double n,
589                                        double p) {
590   double Y = random_gamma(bitgen_state, n, (1 - p) / p);
591   return random_poisson(bitgen_state, Y);
592 }
593 
random_binomial_btpe(bitgen_t * bitgen_state,RAND_INT_TYPE n,double p,binomial_t * binomial)594 RAND_INT_TYPE random_binomial_btpe(bitgen_t *bitgen_state, RAND_INT_TYPE n,
595                                    double p, binomial_t *binomial) {
596   double r, q, fm, p1, xm, xl, xr, c, laml, lamr, p2, p3, p4;
597   double a, u, v, s, F, rho, t, A, nrq, x1, x2, f1, f2, z, z2, w, w2, x;
598   RAND_INT_TYPE m, y, k, i;
599 
600   if (!(binomial->has_binomial) || (binomial->nsave != n) ||
601       (binomial->psave != p)) {
602     /* initialize */
603     binomial->nsave = n;
604     binomial->psave = p;
605     binomial->has_binomial = 1;
606     binomial->r = r = MIN(p, 1.0 - p);
607     binomial->q = q = 1.0 - r;
608     binomial->fm = fm = n * r + r;
609     binomial->m = m = (RAND_INT_TYPE)floor(binomial->fm);
610     binomial->p1 = p1 = floor(2.195 * sqrt(n * r * q) - 4.6 * q) + 0.5;
611     binomial->xm = xm = m + 0.5;
612     binomial->xl = xl = xm - p1;
613     binomial->xr = xr = xm + p1;
614     binomial->c = c = 0.134 + 20.5 / (15.3 + m);
615     a = (fm - xl) / (fm - xl * r);
616     binomial->laml = laml = a * (1.0 + a / 2.0);
617     a = (xr - fm) / (xr * q);
618     binomial->lamr = lamr = a * (1.0 + a / 2.0);
619     binomial->p2 = p2 = p1 * (1.0 + 2.0 * c);
620     binomial->p3 = p3 = p2 + c / laml;
621     binomial->p4 = p4 = p3 + c / lamr;
622   } else {
623     r = binomial->r;
624     q = binomial->q;
625     fm = binomial->fm;
626     m = binomial->m;
627     p1 = binomial->p1;
628     xm = binomial->xm;
629     xl = binomial->xl;
630     xr = binomial->xr;
631     c = binomial->c;
632     laml = binomial->laml;
633     lamr = binomial->lamr;
634     p2 = binomial->p2;
635     p3 = binomial->p3;
636     p4 = binomial->p4;
637   }
638 
639 /* sigh ... */
640 Step10:
641   nrq = n * r * q;
642   u = next_double(bitgen_state) * p4;
643   v = next_double(bitgen_state);
644   if (u > p1)
645     goto Step20;
646   y = (RAND_INT_TYPE)floor(xm - p1 * v + u);
647   goto Step60;
648 
649 Step20:
650   if (u > p2)
651     goto Step30;
652   x = xl + (u - p1) / c;
653   v = v * c + 1.0 - fabs(m - x + 0.5) / p1;
654   if (v > 1.0)
655     goto Step10;
656   y = (RAND_INT_TYPE)floor(x);
657   goto Step50;
658 
659 Step30:
660   if (u > p3)
661     goto Step40;
662   y = (RAND_INT_TYPE)floor(xl + log(v) / laml);
663   /* Reject if v==0.0 since previous cast is undefined */
664   if ((y < 0) || (v == 0.0))
665     goto Step10;
666   v = v * (u - p2) * laml;
667   goto Step50;
668 
669 Step40:
670   y = (RAND_INT_TYPE)floor(xr - log(v) / lamr);
671   /* Reject if v==0.0 since previous cast is undefined */
672   if ((y > n) || (v == 0.0))
673     goto Step10;
674   v = v * (u - p3) * lamr;
675 
676 Step50:
677   k = llabs(y - m);
678   if ((k > 20) && (k < ((nrq) / 2.0 - 1)))
679     goto Step52;
680 
681   s = r / q;
682   a = s * (n + 1);
683   F = 1.0;
684   if (m < y) {
685     for (i = m + 1; i <= y; i++) {
686       F *= (a / i - s);
687     }
688   } else if (m > y) {
689     for (i = y + 1; i <= m; i++) {
690       F /= (a / i - s);
691     }
692   }
693   if (v > F)
694     goto Step10;
695   goto Step60;
696 
697 Step52:
698   rho =
699       (k / (nrq)) * ((k * (k / 3.0 + 0.625) + 0.16666666666666666) / nrq + 0.5);
700   t = -k * k / (2 * nrq);
701   /* log(0.0) ok here */
702   A = log(v);
703   if (A < (t - rho))
704     goto Step60;
705   if (A > (t + rho))
706     goto Step10;
707 
708   x1 = y + 1;
709   f1 = m + 1;
710   z = n + 1 - m;
711   w = n - y + 1;
712   x2 = x1 * x1;
713   f2 = f1 * f1;
714   z2 = z * z;
715   w2 = w * w;
716   if (A > (xm * log(f1 / x1) + (n - m + 0.5) * log(z / w) +
717            (y - m) * log(w * r / (x1 * q)) +
718            (13680. - (462. - (132. - (99. - 140. / f2) / f2) / f2) / f2) / f1 /
719                166320. +
720            (13680. - (462. - (132. - (99. - 140. / z2) / z2) / z2) / z2) / z /
721                166320. +
722            (13680. - (462. - (132. - (99. - 140. / x2) / x2) / x2) / x2) / x1 /
723                166320. +
724            (13680. - (462. - (132. - (99. - 140. / w2) / w2) / w2) / w2) / w /
725                166320.)) {
726     goto Step10;
727   }
728 
729 Step60:
730   if (p > 0.5) {
731     y = n - y;
732   }
733 
734   return y;
735 }
736 
random_binomial_inversion(bitgen_t * bitgen_state,RAND_INT_TYPE n,double p,binomial_t * binomial)737 RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state, RAND_INT_TYPE n,
738                                         double p, binomial_t *binomial) {
739   double q, qn, np, px, U;
740   RAND_INT_TYPE X, bound;
741 
742   if (!(binomial->has_binomial) || (binomial->nsave != n) ||
743       (binomial->psave != p)) {
744     binomial->nsave = n;
745     binomial->psave = p;
746     binomial->has_binomial = 1;
747     binomial->q = q = 1.0 - p;
748     binomial->r = qn = exp(n * log(q));
749     binomial->c = np = n * p;
750     binomial->m = bound = (RAND_INT_TYPE)MIN(n, np + 10.0 * sqrt(np * q + 1));
751   } else {
752     q = binomial->q;
753     qn = binomial->r;
754     np = binomial->c;
755     bound = binomial->m;
756   }
757   X = 0;
758   px = qn;
759   U = next_double(bitgen_state);
760   while (U > px) {
761     X++;
762     if (X > bound) {
763       X = 0;
764       px = qn;
765       U = next_double(bitgen_state);
766     } else {
767       U -= px;
768       px = ((n - X + 1) * p * px) / (X * q);
769     }
770   }
771   return X;
772 }
773 
random_binomial(bitgen_t * bitgen_state,double p,int64_t n,binomial_t * binomial)774 int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n,
775                         binomial_t *binomial) {
776   double q;
777 
778   if ((n == 0LL) || (p == 0.0f))
779     return 0;
780 
781   if (p <= 0.5) {
782     if (p * n <= 30.0) {
783       return random_binomial_inversion(bitgen_state, n, p, binomial);
784     } else {
785       return random_binomial_btpe(bitgen_state, n, p, binomial);
786     }
787   } else {
788     q = 1.0 - p;
789     if (q * n <= 30.0) {
790       return n - random_binomial_inversion(bitgen_state, n, q, binomial);
791     } else {
792       return n - random_binomial_btpe(bitgen_state, n, q, binomial);
793     }
794   }
795 }
796 
random_noncentral_chisquare(bitgen_t * bitgen_state,double df,double nonc)797 double random_noncentral_chisquare(bitgen_t *bitgen_state, double df,
798                                    double nonc) {
799   if (npy_isnan(nonc)) {
800     return NPY_NAN;
801   }
802   if (nonc == 0) {
803     return random_chisquare(bitgen_state, df);
804   }
805   if (1 < df) {
806     const double Chi2 = random_chisquare(bitgen_state, df - 1);
807     const double n = random_standard_normal(bitgen_state) + sqrt(nonc);
808     return Chi2 + n * n;
809   } else {
810     const RAND_INT_TYPE i = random_poisson(bitgen_state, nonc / 2.0);
811     return random_chisquare(bitgen_state, df + 2 * i);
812   }
813 }
814 
random_noncentral_f(bitgen_t * bitgen_state,double dfnum,double dfden,double nonc)815 double random_noncentral_f(bitgen_t *bitgen_state, double dfnum, double dfden,
816                            double nonc) {
817   double t = random_noncentral_chisquare(bitgen_state, dfnum, nonc) * dfden;
818   return t / (random_chisquare(bitgen_state, dfden) * dfnum);
819 }
820 
random_wald(bitgen_t * bitgen_state,double mean,double scale)821 double random_wald(bitgen_t *bitgen_state, double mean, double scale) {
822   double U, X, Y;
823   double mu_2l;
824 
825   mu_2l = mean / (2 * scale);
826   Y = random_standard_normal(bitgen_state);
827   Y = mean * Y * Y;
828   X = mean + mu_2l * (Y - sqrt(4 * scale * Y + Y * Y));
829   U = next_double(bitgen_state);
830   if (U <= mean / (mean + X)) {
831     return X;
832   } else {
833     return mean * mean / X;
834   }
835 }
836 
random_vonmises(bitgen_t * bitgen_state,double mu,double kappa)837 double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) {
838   double s;
839   double U, V, W, Y, Z;
840   double result, mod;
841   int neg;
842   if (npy_isnan(kappa)) {
843     return NPY_NAN;
844   }
845   if (kappa < 1e-8) {
846     return M_PI * (2 * next_double(bitgen_state) - 1);
847   } else {
848     /* with double precision rho is zero until 1.4e-8 */
849     if (kappa < 1e-5) {
850       /*
851        * second order taylor expansion around kappa = 0
852        * precise until relatively large kappas as second order is 0
853        */
854       s = (1. / kappa + kappa);
855     } else {
856       double r = 1 + sqrt(1 + 4 * kappa * kappa);
857       double rho = (r - sqrt(2 * r)) / (2 * kappa);
858       s = (1 + rho * rho) / (2 * rho);
859     }
860 
861     while (1) {
862       U = next_double(bitgen_state);
863       Z = cos(M_PI * U);
864       W = (1 + s * Z) / (s + Z);
865       Y = kappa * (s - W);
866       V = next_double(bitgen_state);
867       /*
868        * V==0.0 is ok here since Y >= 0 always leads
869        * to accept, while Y < 0 always rejects
870        */
871       if ((Y * (2 - Y) - V >= 0) || (log(Y / V) + 1 - Y >= 0)) {
872         break;
873       }
874     }
875 
876     U = next_double(bitgen_state);
877 
878     result = acos(W);
879     if (U < 0.5) {
880       result = -result;
881     }
882     result += mu;
883     neg = (result < 0);
884     mod = fabs(result);
885     mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI);
886     if (neg) {
887       mod *= -1;
888     }
889 
890     return mod;
891   }
892 }
893 
894 /*
895  * RAND_INT_TYPE is used to share integer generators with RandomState which
896  * used long in place of int64_t. If changing a distribution that uses
897  * RAND_INT_TYPE, then the original unmodified copy must be retained for
898  * use in RandomState by copying to the legacy distributions source file.
899  */
random_logseries(bitgen_t * bitgen_state,double p)900 RAND_INT_TYPE random_logseries(bitgen_t *bitgen_state, double p) {
901   double q, r, U, V;
902   RAND_INT_TYPE result;
903 
904   r = log(1.0 - p);
905 
906   while (1) {
907     V = next_double(bitgen_state);
908     if (V >= p) {
909       return 1;
910     }
911     U = next_double(bitgen_state);
912     q = 1.0 - exp(r * U);
913     if (V <= q * q) {
914       result = (RAND_INT_TYPE)floor(1 + log(V) / log(q));
915       if ((result < 1) || (V == 0.0)) {
916         continue;
917       } else {
918         return result;
919       }
920     }
921     if (V >= q) {
922       return 1;
923     }
924     return 2;
925   }
926 }
927 
random_geometric_search(bitgen_t * bitgen_state,double p)928 RAND_INT_TYPE random_geometric_search(bitgen_t *bitgen_state, double p) {
929   double U;
930   RAND_INT_TYPE X;
931   double sum, prod, q;
932 
933   X = 1;
934   sum = prod = p;
935   q = 1.0 - p;
936   U = next_double(bitgen_state);
937   while (U > sum) {
938     prod *= q;
939     sum += prod;
940     X++;
941   }
942   return X;
943 }
944 
random_geometric_inversion(bitgen_t * bitgen_state,double p)945 RAND_INT_TYPE random_geometric_inversion(bitgen_t *bitgen_state, double p) {
946   return (RAND_INT_TYPE)ceil(log(1.0 - next_double(bitgen_state)) / log(1.0 - p));
947 }
948 
random_geometric(bitgen_t * bitgen_state,double p)949 RAND_INT_TYPE random_geometric(bitgen_t *bitgen_state, double p) {
950   if (p >= 0.333333333333333333333333) {
951     return random_geometric_search(bitgen_state, p);
952   } else {
953     return random_geometric_inversion(bitgen_state, p);
954   }
955 }
956 
random_zipf(bitgen_t * bitgen_state,double a)957 RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a) {
958   double am1, b;
959 
960   am1 = a - 1.0;
961   b = pow(2.0, am1);
962   while (1) {
963     double T, U, V, X;
964 
965     U = 1.0 - next_double(bitgen_state);
966     V = next_double(bitgen_state);
967     X = floor(pow(U, -1.0 / am1));
968     /*
969      * The real result may be above what can be represented in a signed
970      * long. Since this is a straightforward rejection algorithm, we can
971      * just reject this value. This function then models a Zipf
972      * distribution truncated to sys.maxint.
973      */
974     if (X > RAND_INT_MAX || X < 1.0) {
975       continue;
976     }
977 
978     T = pow(1.0 + 1.0 / X, am1);
979     if (V * X * (T - 1.0) / (b - 1.0) <= T / b) {
980       return (RAND_INT_TYPE)X;
981     }
982   }
983 }
984 
random_triangular(bitgen_t * bitgen_state,double left,double mode,double right)985 double random_triangular(bitgen_t *bitgen_state, double left, double mode,
986                          double right) {
987   double base, leftbase, ratio, leftprod, rightprod;
988   double U;
989 
990   base = right - left;
991   leftbase = mode - left;
992   ratio = leftbase / base;
993   leftprod = leftbase * base;
994   rightprod = (right - mode) * base;
995 
996   U = next_double(bitgen_state);
997   if (U <= ratio) {
998     return left + sqrt(U * leftprod);
999   } else {
1000     return right - sqrt((1.0 - U) * rightprod);
1001   }
1002 }
1003 
1004 
random_interval(bitgen_t * bitgen_state,uint64_t max)1005 uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max) {
1006   uint64_t mask, value;
1007   if (max == 0) {
1008     return 0;
1009   }
1010 
1011   mask = max;
1012 
1013   /* Smallest bit mask >= max */
1014   mask |= mask >> 1;
1015   mask |= mask >> 2;
1016   mask |= mask >> 4;
1017   mask |= mask >> 8;
1018   mask |= mask >> 16;
1019   mask |= mask >> 32;
1020 
1021   /* Search a random value in [0..mask] <= max */
1022   if (max <= 0xffffffffUL) {
1023     while ((value = (next_uint32(bitgen_state) & mask)) > max)
1024       ;
1025   } else {
1026     while ((value = (next_uint64(bitgen_state) & mask)) > max)
1027       ;
1028   }
1029   return value;
1030 }
1031 
1032 /* Bounded generators */
gen_mask(uint64_t max)1033 static NPY_INLINE uint64_t gen_mask(uint64_t max) {
1034   uint64_t mask = max;
1035   mask |= mask >> 1;
1036   mask |= mask >> 2;
1037   mask |= mask >> 4;
1038   mask |= mask >> 8;
1039   mask |= mask >> 16;
1040   mask |= mask >> 32;
1041   return mask;
1042 }
1043 
1044 /* Generate 16 bit random numbers using a 32 bit buffer. */
buffered_uint16(bitgen_t * bitgen_state,int * bcnt,uint32_t * buf)1045 static NPY_INLINE uint16_t buffered_uint16(bitgen_t *bitgen_state, int *bcnt,
1046                                            uint32_t *buf) {
1047   if (!(bcnt[0])) {
1048     buf[0] = next_uint32(bitgen_state);
1049     bcnt[0] = 1;
1050   } else {
1051     buf[0] >>= 16;
1052     bcnt[0] -= 1;
1053   }
1054 
1055   return (uint16_t)buf[0];
1056 }
1057 
1058 /* Generate 8 bit random numbers using a 32 bit buffer. */
buffered_uint8(bitgen_t * bitgen_state,int * bcnt,uint32_t * buf)1059 static NPY_INLINE uint8_t buffered_uint8(bitgen_t *bitgen_state, int *bcnt,
1060                                          uint32_t *buf) {
1061   if (!(bcnt[0])) {
1062     buf[0] = next_uint32(bitgen_state);
1063     bcnt[0] = 3;
1064   } else {
1065     buf[0] >>= 8;
1066     bcnt[0] -= 1;
1067   }
1068 
1069   return (uint8_t)buf[0];
1070 }
1071 
1072 /* Static `masked rejection` function called by random_bounded_uint64(...) */
bounded_masked_uint64(bitgen_t * bitgen_state,uint64_t rng,uint64_t mask)1073 static NPY_INLINE uint64_t bounded_masked_uint64(bitgen_t *bitgen_state,
1074                                                  uint64_t rng, uint64_t mask) {
1075   uint64_t val;
1076 
1077   while ((val = (next_uint64(bitgen_state) & mask)) > rng)
1078     ;
1079 
1080   return val;
1081 }
1082 
1083 /* Static `masked rejection` function called by
1084  * random_buffered_bounded_uint32(...) */
1085 static NPY_INLINE uint32_t
buffered_bounded_masked_uint32(bitgen_t * bitgen_state,uint32_t rng,uint32_t mask,int * bcnt,uint32_t * buf)1086 buffered_bounded_masked_uint32(bitgen_t *bitgen_state, uint32_t rng,
1087                                uint32_t mask, int *bcnt, uint32_t *buf) {
1088   /*
1089    * The buffer and buffer count are not used here but are included to allow
1090    * this function to be templated with the similar uint8 and uint16
1091    * functions
1092    */
1093 
1094   uint32_t val;
1095 
1096   while ((val = (next_uint32(bitgen_state) & mask)) > rng)
1097     ;
1098 
1099   return val;
1100 }
1101 
1102 /* Static `masked rejection` function called by
1103  * random_buffered_bounded_uint16(...) */
1104 static NPY_INLINE uint16_t
buffered_bounded_masked_uint16(bitgen_t * bitgen_state,uint16_t rng,uint16_t mask,int * bcnt,uint32_t * buf)1105 buffered_bounded_masked_uint16(bitgen_t *bitgen_state, uint16_t rng,
1106                                uint16_t mask, int *bcnt, uint32_t *buf) {
1107   uint16_t val;
1108 
1109   while ((val = (buffered_uint16(bitgen_state, bcnt, buf) & mask)) > rng)
1110     ;
1111 
1112   return val;
1113 }
1114 
1115 /* Static `masked rejection` function called by
1116  * random_buffered_bounded_uint8(...) */
buffered_bounded_masked_uint8(bitgen_t * bitgen_state,uint8_t rng,uint8_t mask,int * bcnt,uint32_t * buf)1117 static NPY_INLINE uint8_t buffered_bounded_masked_uint8(bitgen_t *bitgen_state,
1118                                                         uint8_t rng,
1119                                                         uint8_t mask, int *bcnt,
1120                                                         uint32_t *buf) {
1121   uint8_t val;
1122 
1123   while ((val = (buffered_uint8(bitgen_state, bcnt, buf) & mask)) > rng)
1124     ;
1125 
1126   return val;
1127 }
1128 
buffered_bounded_bool(bitgen_t * bitgen_state,npy_bool off,npy_bool rng,npy_bool mask,int * bcnt,uint32_t * buf)1129 static NPY_INLINE npy_bool buffered_bounded_bool(bitgen_t *bitgen_state,
1130                                                  npy_bool off, npy_bool rng,
1131                                                  npy_bool mask, int *bcnt,
1132                                                  uint32_t *buf) {
1133   if (rng == 0)
1134     return off;
1135   if (!(bcnt[0])) {
1136     buf[0] = next_uint32(bitgen_state);
1137     bcnt[0] = 31;
1138   } else {
1139     buf[0] >>= 1;
1140     bcnt[0] -= 1;
1141   }
1142   return (buf[0] & 0x00000001UL) != 0;
1143 }
1144 
1145 /* Static `Lemire rejection` function called by random_bounded_uint64(...) */
bounded_lemire_uint64(bitgen_t * bitgen_state,uint64_t rng)1146 static NPY_INLINE uint64_t bounded_lemire_uint64(bitgen_t *bitgen_state,
1147                                                  uint64_t rng) {
1148   /*
1149    * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941
1150    *
1151    * Note: `rng` should not be 0xFFFFFFFFFFFFFFFF. When this happens `rng_excl`
1152    * becomes zero.
1153    */
1154   const uint64_t rng_excl = rng + 1;
1155 
1156   assert(rng != 0xFFFFFFFFFFFFFFFFULL);
1157 
1158 #if __SIZEOF_INT128__
1159   /* 128-bit uint available (e.g. GCC/clang). `m` is the __uint128_t scaled
1160    * integer. */
1161   __uint128_t m;
1162   uint64_t leftover;
1163 
1164   /* Generate a scaled random number. */
1165   m = ((__uint128_t)next_uint64(bitgen_state)) * rng_excl;
1166 
1167   /* Rejection sampling to remove any bias. */
1168   leftover = m & 0xFFFFFFFFFFFFFFFFULL;
1169 
1170   if (leftover < rng_excl) {
1171     /* `rng_excl` is a simple upper bound for `threshold`. */
1172     const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
1173 
1174     while (leftover < threshold) {
1175       m = ((__uint128_t)next_uint64(bitgen_state)) * rng_excl;
1176       leftover = m & 0xFFFFFFFFFFFFFFFFULL;
1177     }
1178   }
1179 
1180   return (m >> 64);
1181 #else
1182   /* 128-bit uint NOT available (e.g. MSVS). `m1` is the upper 64-bits of the
1183    * scaled integer. */
1184   uint64_t m1;
1185   uint64_t x;
1186   uint64_t leftover;
1187 
1188   x = next_uint64(bitgen_state);
1189 
1190   /* Rejection sampling to remove any bias. */
1191   leftover = x * rng_excl; /* The lower 64-bits of the mult. */
1192 
1193   if (leftover < rng_excl) {
1194     /* `rng_excl` is a simple upper bound for `threshold`. */
1195     const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
1196 
1197     while (leftover < threshold) {
1198       x = next_uint64(bitgen_state);
1199       leftover = x * rng_excl;
1200     }
1201   }
1202 
1203 #if defined(_MSC_VER) && defined(_WIN64)
1204   /* _WIN64 architecture. Use the __umulh intrinsic to calc `m1`. */
1205   m1 = __umulh(x, rng_excl);
1206 #else
1207   /* 32-bit architecture. Emulate __umulh to calc `m1`. */
1208   {
1209     uint64_t x0, x1, rng_excl0, rng_excl1;
1210     uint64_t w0, w1, w2, t;
1211 
1212     x0 = x & 0xFFFFFFFFULL;
1213     x1 = x >> 32;
1214     rng_excl0 = rng_excl & 0xFFFFFFFFULL;
1215     rng_excl1 = rng_excl >> 32;
1216     w0 = x0 * rng_excl0;
1217     t = x1 * rng_excl0 + (w0 >> 32);
1218     w1 = t & 0xFFFFFFFFULL;
1219     w2 = t >> 32;
1220     w1 += x0 * rng_excl1;
1221     m1 = x1 * rng_excl1 + w2 + (w1 >> 32);
1222   }
1223 #endif
1224 
1225   return m1;
1226 #endif
1227 }
1228 
1229 /* Static `Lemire rejection` function called by
1230  * random_buffered_bounded_uint32(...) */
buffered_bounded_lemire_uint32(bitgen_t * bitgen_state,uint32_t rng,int * bcnt,uint32_t * buf)1231 static NPY_INLINE uint32_t buffered_bounded_lemire_uint32(
1232     bitgen_t *bitgen_state, uint32_t rng, int *bcnt, uint32_t *buf) {
1233   /*
1234    * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941
1235    *
1236    * The buffer and buffer count are not used here but are included to allow
1237    * this function to be templated with the similar uint8 and uint16
1238    * functions
1239    *
1240    * Note: `rng` should not be 0xFFFFFFFF. When this happens `rng_excl` becomes
1241    * zero.
1242    */
1243   const uint32_t rng_excl = rng + 1;
1244 
1245   uint64_t m;
1246   uint32_t leftover;
1247 
1248   assert(rng != 0xFFFFFFFFUL);
1249 
1250   /* Generate a scaled random number. */
1251   m = ((uint64_t)next_uint32(bitgen_state)) * rng_excl;
1252 
1253   /* Rejection sampling to remove any bias */
1254   leftover = m & 0xFFFFFFFFUL;
1255 
1256   if (leftover < rng_excl) {
1257     /* `rng_excl` is a simple upper bound for `threshold`. */
1258     const uint32_t threshold = (UINT32_MAX - rng) % rng_excl;
1259 
1260     while (leftover < threshold) {
1261       m = ((uint64_t)next_uint32(bitgen_state)) * rng_excl;
1262       leftover = m & 0xFFFFFFFFUL;
1263     }
1264   }
1265 
1266   return (m >> 32);
1267 }
1268 
1269 /* Static `Lemire rejection` function called by
1270  * random_buffered_bounded_uint16(...) */
buffered_bounded_lemire_uint16(bitgen_t * bitgen_state,uint16_t rng,int * bcnt,uint32_t * buf)1271 static NPY_INLINE uint16_t buffered_bounded_lemire_uint16(
1272     bitgen_t *bitgen_state, uint16_t rng, int *bcnt, uint32_t *buf) {
1273   /*
1274    * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941
1275    *
1276    * Note: `rng` should not be 0xFFFF. When this happens `rng_excl` becomes
1277    * zero.
1278    */
1279   const uint16_t rng_excl = rng + 1;
1280 
1281   uint32_t m;
1282   uint16_t leftover;
1283 
1284   assert(rng != 0xFFFFU);
1285 
1286   /* Generate a scaled random number. */
1287   m = ((uint32_t)buffered_uint16(bitgen_state, bcnt, buf)) * rng_excl;
1288 
1289   /* Rejection sampling to remove any bias */
1290   leftover = m & 0xFFFFUL;
1291 
1292   if (leftover < rng_excl) {
1293     /* `rng_excl` is a simple upper bound for `threshold`. */
1294     const uint16_t threshold = (UINT16_MAX - rng) % rng_excl;
1295 
1296     while (leftover < threshold) {
1297       m = ((uint32_t)buffered_uint16(bitgen_state, bcnt, buf)) * rng_excl;
1298       leftover = m & 0xFFFFUL;
1299     }
1300   }
1301 
1302   return (m >> 16);
1303 }
1304 
1305 /* Static `Lemire rejection` function called by
1306  * random_buffered_bounded_uint8(...) */
buffered_bounded_lemire_uint8(bitgen_t * bitgen_state,uint8_t rng,int * bcnt,uint32_t * buf)1307 static NPY_INLINE uint8_t buffered_bounded_lemire_uint8(bitgen_t *bitgen_state,
1308                                                         uint8_t rng, int *bcnt,
1309                                                         uint32_t *buf) {
1310   /*
1311    * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941
1312    *
1313    * Note: `rng` should not be 0xFF. When this happens `rng_excl` becomes
1314    * zero.
1315    */
1316   const uint8_t rng_excl = rng + 1;
1317 
1318   uint16_t m;
1319   uint8_t leftover;
1320 
1321   assert(rng != 0xFFU);
1322 
1323 
1324   /* Generate a scaled random number. */
1325   m = ((uint16_t)buffered_uint8(bitgen_state, bcnt, buf)) * rng_excl;
1326 
1327   /* Rejection sampling to remove any bias */
1328   leftover = m & 0xFFUL;
1329 
1330   if (leftover < rng_excl) {
1331     /* `rng_excl` is a simple upper bound for `threshold`. */
1332     const uint8_t threshold = (UINT8_MAX - rng) % rng_excl;
1333 
1334     while (leftover < threshold) {
1335       m = ((uint16_t)buffered_uint8(bitgen_state, bcnt, buf)) * rng_excl;
1336       leftover = m & 0xFFUL;
1337     }
1338   }
1339 
1340   return (m >> 8);
1341 }
1342 
1343 /*
1344  * Returns a single random npy_uint64 between off and off + rng
1345  * inclusive. The numbers wrap if rng is sufficiently large.
1346  */
random_bounded_uint64(bitgen_t * bitgen_state,uint64_t off,uint64_t rng,uint64_t mask,bool use_masked)1347 uint64_t random_bounded_uint64(bitgen_t *bitgen_state, uint64_t off,
1348                                uint64_t rng, uint64_t mask, bool use_masked) {
1349   if (rng == 0) {
1350     return off;
1351   } else if (rng <= 0xFFFFFFFFUL) {
1352     /* Call 32-bit generator if range in 32-bit. */
1353     if (rng == 0xFFFFFFFFUL) {
1354       /*
1355        * The 32-bit Lemire method does not handle rng=0xFFFFFFFF, so we'll
1356        * call next_uint32 directly.  This also works when use_masked is True,
1357        * so we handle both cases here.
1358        */
1359       return off + (uint64_t) next_uint32(bitgen_state);
1360     }
1361     if (use_masked) {
1362       return off + buffered_bounded_masked_uint32(bitgen_state, rng, mask, NULL,
1363                                                   NULL);
1364     } else {
1365       return off +
1366              buffered_bounded_lemire_uint32(bitgen_state, rng, NULL, NULL);
1367     }
1368   } else if (rng == 0xFFFFFFFFFFFFFFFFULL) {
1369     /* Lemire64 doesn't support inclusive rng = 0xFFFFFFFFFFFFFFFF. */
1370     return off + next_uint64(bitgen_state);
1371   } else {
1372     if (use_masked) {
1373       return off + bounded_masked_uint64(bitgen_state, rng, mask);
1374     } else {
1375       return off + bounded_lemire_uint64(bitgen_state, rng);
1376     }
1377   }
1378 }
1379 
1380 /*
1381  * Returns a single random npy_uint64 between off and off + rng
1382  * inclusive. The numbers wrap if rng is sufficiently large.
1383  */
random_buffered_bounded_uint32(bitgen_t * bitgen_state,uint32_t off,uint32_t rng,uint32_t mask,bool use_masked,int * bcnt,uint32_t * buf)1384 uint32_t random_buffered_bounded_uint32(bitgen_t *bitgen_state, uint32_t off,
1385                                         uint32_t rng, uint32_t mask,
1386                                         bool use_masked, int *bcnt,
1387                                         uint32_t *buf) {
1388   /*
1389    * Unused bcnt and buf are here only to allow templating with other uint
1390    * generators.
1391    */
1392   if (rng == 0) {
1393     return off;
1394   } else if (rng == 0xFFFFFFFFUL) {
1395     /* Lemire32 doesn't support inclusive rng = 0xFFFFFFFF. */
1396     return off + next_uint32(bitgen_state);
1397   } else {
1398     if (use_masked) {
1399       return off +
1400              buffered_bounded_masked_uint32(bitgen_state, rng, mask, bcnt, buf);
1401     } else {
1402       return off + buffered_bounded_lemire_uint32(bitgen_state, rng, bcnt, buf);
1403     }
1404   }
1405 }
1406 
1407 /*
1408  * Returns a single random npy_uint16 between off and off + rng
1409  * inclusive. The numbers wrap if rng is sufficiently large.
1410  */
random_buffered_bounded_uint16(bitgen_t * bitgen_state,uint16_t off,uint16_t rng,uint16_t mask,bool use_masked,int * bcnt,uint32_t * buf)1411 uint16_t random_buffered_bounded_uint16(bitgen_t *bitgen_state, uint16_t off,
1412                                         uint16_t rng, uint16_t mask,
1413                                         bool use_masked, int *bcnt,
1414                                         uint32_t *buf) {
1415   if (rng == 0) {
1416     return off;
1417   } else if (rng == 0xFFFFUL) {
1418     /* Lemire16 doesn't support inclusive rng = 0xFFFF. */
1419     return off + buffered_uint16(bitgen_state, bcnt, buf);
1420   } else {
1421     if (use_masked) {
1422       return off +
1423              buffered_bounded_masked_uint16(bitgen_state, rng, mask, bcnt, buf);
1424     } else {
1425       return off + buffered_bounded_lemire_uint16(bitgen_state, rng, bcnt, buf);
1426     }
1427   }
1428 }
1429 
1430 /*
1431  * Returns a single random npy_uint8 between off and off + rng
1432  * inclusive. The numbers wrap if rng is sufficiently large.
1433  */
random_buffered_bounded_uint8(bitgen_t * bitgen_state,uint8_t off,uint8_t rng,uint8_t mask,bool use_masked,int * bcnt,uint32_t * buf)1434 uint8_t random_buffered_bounded_uint8(bitgen_t *bitgen_state, uint8_t off,
1435                                       uint8_t rng, uint8_t mask,
1436                                       bool use_masked, int *bcnt,
1437                                       uint32_t *buf) {
1438   if (rng == 0) {
1439     return off;
1440   } else if (rng == 0xFFUL) {
1441     /* Lemire8 doesn't support inclusive rng = 0xFF. */
1442     return off + buffered_uint8(bitgen_state, bcnt, buf);
1443   } else {
1444     if (use_masked) {
1445       return off +
1446              buffered_bounded_masked_uint8(bitgen_state, rng, mask, bcnt, buf);
1447     } else {
1448       return off + buffered_bounded_lemire_uint8(bitgen_state, rng, bcnt, buf);
1449     }
1450   }
1451 }
1452 
random_buffered_bounded_bool(bitgen_t * bitgen_state,npy_bool off,npy_bool rng,npy_bool mask,bool use_masked,int * bcnt,uint32_t * buf)1453 npy_bool random_buffered_bounded_bool(bitgen_t *bitgen_state, npy_bool off,
1454                                       npy_bool rng, npy_bool mask,
1455                                       bool use_masked, int *bcnt,
1456                                       uint32_t *buf) {
1457   return buffered_bounded_bool(bitgen_state, off, rng, mask, bcnt, buf);
1458 }
1459 
1460 /*
1461  * Fills an array with cnt random npy_uint64 between off and off + rng
1462  * inclusive. The numbers wrap if rng is sufficiently large.
1463  */
random_bounded_uint64_fill(bitgen_t * bitgen_state,uint64_t off,uint64_t rng,npy_intp cnt,bool use_masked,uint64_t * out)1464 void random_bounded_uint64_fill(bitgen_t *bitgen_state, uint64_t off,
1465                                 uint64_t rng, npy_intp cnt, bool use_masked,
1466                                 uint64_t *out) {
1467   npy_intp i;
1468 
1469   if (rng == 0) {
1470     for (i = 0; i < cnt; i++) {
1471       out[i] = off;
1472     }
1473   } else if (rng <= 0xFFFFFFFFUL) {
1474     /* Call 32-bit generator if range in 32-bit. */
1475 
1476     /*
1477      * The 32-bit Lemire method does not handle rng=0xFFFFFFFF, so we'll
1478      * call next_uint32 directly.  This also works when use_masked is True,
1479      * so we handle both cases here.
1480      */
1481     if (rng == 0xFFFFFFFFUL) {
1482       for (i = 0; i < cnt; i++) {
1483         out[i] = off + (uint64_t) next_uint32(bitgen_state);
1484       }
1485     } else {
1486       uint32_t buf = 0;
1487       int bcnt = 0;
1488 
1489       if (use_masked) {
1490         /* Smallest bit mask >= max */
1491         uint64_t mask = gen_mask(rng);
1492 
1493         for (i = 0; i < cnt; i++) {
1494           out[i] = off + buffered_bounded_masked_uint32(bitgen_state, rng, mask,
1495                                                         &bcnt, &buf);
1496         }
1497       } else {
1498         for (i = 0; i < cnt; i++) {
1499           out[i] = off +
1500                    buffered_bounded_lemire_uint32(bitgen_state, rng, &bcnt, &buf);
1501         }
1502       }
1503     }
1504   } else if (rng == 0xFFFFFFFFFFFFFFFFULL) {
1505     /* Lemire64 doesn't support rng = 0xFFFFFFFFFFFFFFFF. */
1506     for (i = 0; i < cnt; i++) {
1507       out[i] = off + next_uint64(bitgen_state);
1508     }
1509   } else {
1510     if (use_masked) {
1511       /* Smallest bit mask >= max */
1512       uint64_t mask = gen_mask(rng);
1513 
1514       for (i = 0; i < cnt; i++) {
1515         out[i] = off + bounded_masked_uint64(bitgen_state, rng, mask);
1516       }
1517     } else {
1518       for (i = 0; i < cnt; i++) {
1519         out[i] = off + bounded_lemire_uint64(bitgen_state, rng);
1520       }
1521     }
1522   }
1523 }
1524 
1525 /*
1526  * Fills an array with cnt random npy_uint32 between off and off + rng
1527  * inclusive. The numbers wrap if rng is sufficiently large.
1528  */
random_bounded_uint32_fill(bitgen_t * bitgen_state,uint32_t off,uint32_t rng,npy_intp cnt,bool use_masked,uint32_t * out)1529 void random_bounded_uint32_fill(bitgen_t *bitgen_state, uint32_t off,
1530                                 uint32_t rng, npy_intp cnt, bool use_masked,
1531                                 uint32_t *out) {
1532   npy_intp i;
1533   uint32_t buf = 0;
1534   int bcnt = 0;
1535 
1536   if (rng == 0) {
1537     for (i = 0; i < cnt; i++) {
1538       out[i] = off;
1539     }
1540   } else if (rng == 0xFFFFFFFFUL) {
1541     /* Lemire32 doesn't support rng = 0xFFFFFFFF. */
1542     for (i = 0; i < cnt; i++) {
1543       out[i] = off + next_uint32(bitgen_state);
1544     }
1545   } else {
1546     if (use_masked) {
1547       /* Smallest bit mask >= max */
1548       uint32_t mask = (uint32_t)gen_mask(rng);
1549 
1550       for (i = 0; i < cnt; i++) {
1551         out[i] = off + buffered_bounded_masked_uint32(bitgen_state, rng, mask,
1552                                                       &bcnt, &buf);
1553       }
1554     } else {
1555       for (i = 0; i < cnt; i++) {
1556         out[i] = off +
1557                  buffered_bounded_lemire_uint32(bitgen_state, rng, &bcnt, &buf);
1558       }
1559     }
1560   }
1561 }
1562 
1563 /*
1564  * Fills an array with cnt random npy_uint16 between off and off + rng
1565  * inclusive. The numbers wrap if rng is sufficiently large.
1566  */
random_bounded_uint16_fill(bitgen_t * bitgen_state,uint16_t off,uint16_t rng,npy_intp cnt,bool use_masked,uint16_t * out)1567 void random_bounded_uint16_fill(bitgen_t *bitgen_state, uint16_t off,
1568                                 uint16_t rng, npy_intp cnt, bool use_masked,
1569                                 uint16_t *out) {
1570   npy_intp i;
1571   uint32_t buf = 0;
1572   int bcnt = 0;
1573 
1574   if (rng == 0) {
1575     for (i = 0; i < cnt; i++) {
1576       out[i] = off;
1577     }
1578   } else if (rng == 0xFFFFUL) {
1579     /* Lemire16 doesn't support rng = 0xFFFF. */
1580     for (i = 0; i < cnt; i++) {
1581       out[i] = off + buffered_uint16(bitgen_state, &bcnt, &buf);
1582     }
1583   } else {
1584     if (use_masked) {
1585       /* Smallest bit mask >= max */
1586       uint16_t mask = (uint16_t)gen_mask(rng);
1587 
1588       for (i = 0; i < cnt; i++) {
1589         out[i] = off + buffered_bounded_masked_uint16(bitgen_state, rng, mask,
1590                                                       &bcnt, &buf);
1591       }
1592     } else {
1593       for (i = 0; i < cnt; i++) {
1594         out[i] = off +
1595                  buffered_bounded_lemire_uint16(bitgen_state, rng, &bcnt, &buf);
1596       }
1597     }
1598   }
1599 }
1600 
1601 /*
1602  * Fills an array with cnt random npy_uint8 between off and off + rng
1603  * inclusive. The numbers wrap if rng is sufficiently large.
1604  */
random_bounded_uint8_fill(bitgen_t * bitgen_state,uint8_t off,uint8_t rng,npy_intp cnt,bool use_masked,uint8_t * out)1605 void random_bounded_uint8_fill(bitgen_t *bitgen_state, uint8_t off, uint8_t rng,
1606                                npy_intp cnt, bool use_masked, uint8_t *out) {
1607   npy_intp i;
1608   uint32_t buf = 0;
1609   int bcnt = 0;
1610 
1611   if (rng == 0) {
1612     for (i = 0; i < cnt; i++) {
1613       out[i] = off;
1614     }
1615   } else if (rng == 0xFFUL) {
1616     /* Lemire8 doesn't support rng = 0xFF. */
1617     for (i = 0; i < cnt; i++) {
1618       out[i] = off + buffered_uint8(bitgen_state, &bcnt, &buf);
1619     }
1620   } else {
1621     if (use_masked) {
1622       /* Smallest bit mask >= max */
1623       uint8_t mask = (uint8_t)gen_mask(rng);
1624 
1625       for (i = 0; i < cnt; i++) {
1626         out[i] = off + buffered_bounded_masked_uint8(bitgen_state, rng, mask,
1627                                                      &bcnt, &buf);
1628       }
1629     } else {
1630       for (i = 0; i < cnt; i++) {
1631         out[i] =
1632             off + buffered_bounded_lemire_uint8(bitgen_state, rng, &bcnt, &buf);
1633       }
1634     }
1635   }
1636 }
1637 
1638 /*
1639  * Fills an array with cnt random npy_bool between off and off + rng
1640  * inclusive.
1641  */
random_bounded_bool_fill(bitgen_t * bitgen_state,npy_bool off,npy_bool rng,npy_intp cnt,bool use_masked,npy_bool * out)1642 void random_bounded_bool_fill(bitgen_t *bitgen_state, npy_bool off,
1643                               npy_bool rng, npy_intp cnt, bool use_masked,
1644                               npy_bool *out) {
1645   npy_bool mask = 0;
1646   npy_intp i;
1647   uint32_t buf = 0;
1648   int bcnt = 0;
1649 
1650   for (i = 0; i < cnt; i++) {
1651     out[i] = buffered_bounded_bool(bitgen_state, off, rng, mask, &bcnt, &buf);
1652   }
1653 }
1654 
random_multinomial(bitgen_t * bitgen_state,RAND_INT_TYPE n,RAND_INT_TYPE * mnix,double * pix,npy_intp d,binomial_t * binomial)1655 void random_multinomial(bitgen_t *bitgen_state, RAND_INT_TYPE n,
1656                         RAND_INT_TYPE *mnix, double *pix, npy_intp d,
1657                         binomial_t *binomial) {
1658   double remaining_p = 1.0;
1659   npy_intp j;
1660   RAND_INT_TYPE dn = n;
1661   for (j = 0; j < (d - 1); j++) {
1662     mnix[j] = random_binomial(bitgen_state, pix[j] / remaining_p, dn, binomial);
1663     dn = dn - mnix[j];
1664     if (dn <= 0) {
1665       break;
1666     }
1667     remaining_p -= pix[j];
1668   }
1669   if (dn > 0) {
1670       mnix[d - 1] = dn;
1671   }
1672 }
1673