1 #include "numpy/random/distributions.h"
2 #include "ziggurat_constants.h"
3 #include "logfactorial.h"
4
5 #if defined(_MSC_VER) && defined(_WIN64)
6 #include <intrin.h>
7 #endif
8
9 #include <assert.h>
10
11 /* Inline generators for internal use */
next_uint32(bitgen_t * bitgen_state)12 static NPY_INLINE uint32_t next_uint32(bitgen_t *bitgen_state) {
13 return bitgen_state->next_uint32(bitgen_state->state);
14 }
next_uint64(bitgen_t * bitgen_state)15 static NPY_INLINE uint64_t next_uint64(bitgen_t *bitgen_state) {
16 return bitgen_state->next_uint64(bitgen_state->state);
17 }
18
next_float(bitgen_t * bitgen_state)19 static NPY_INLINE float next_float(bitgen_t *bitgen_state) {
20 return (next_uint32(bitgen_state) >> 9) * (1.0f / 8388608.0f);
21 }
22
23 /* Random generators for external use */
random_standard_uniform_f(bitgen_t * bitgen_state)24 float random_standard_uniform_f(bitgen_t *bitgen_state) {
25 return next_float(bitgen_state);
26 }
27
random_standard_uniform(bitgen_t * bitgen_state)28 double random_standard_uniform(bitgen_t *bitgen_state) {
29 return next_double(bitgen_state);
30 }
31
random_standard_uniform_fill(bitgen_t * bitgen_state,npy_intp cnt,double * out)32 void random_standard_uniform_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
33 npy_intp i;
34 for (i = 0; i < cnt; i++) {
35 out[i] = next_double(bitgen_state);
36 }
37 }
38
random_standard_uniform_fill_f(bitgen_t * bitgen_state,npy_intp cnt,float * out)39 void random_standard_uniform_fill_f(bitgen_t *bitgen_state, npy_intp cnt, float *out) {
40 npy_intp i;
41 for (i = 0; i < cnt; i++) {
42 out[i] = next_float(bitgen_state);
43 }
44 }
45
standard_exponential_unlikely(bitgen_t * bitgen_state,uint8_t idx,double x)46 static double standard_exponential_unlikely(bitgen_t *bitgen_state,
47 uint8_t idx, double x) {
48 if (idx == 0) {
49 /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */
50 return ziggurat_exp_r - log(1.0 - next_double(bitgen_state));
51 } else if ((fe_double[idx - 1] - fe_double[idx]) * next_double(bitgen_state) +
52 fe_double[idx] <
53 exp(-x)) {
54 return x;
55 } else {
56 return random_standard_exponential(bitgen_state);
57 }
58 }
59
random_standard_exponential(bitgen_t * bitgen_state)60 double random_standard_exponential(bitgen_t *bitgen_state) {
61 uint64_t ri;
62 uint8_t idx;
63 double x;
64 ri = next_uint64(bitgen_state);
65 ri >>= 3;
66 idx = ri & 0xFF;
67 ri >>= 8;
68 x = ri * we_double[idx];
69 if (ri < ke_double[idx]) {
70 return x; /* 98.9% of the time we return here 1st try */
71 }
72 return standard_exponential_unlikely(bitgen_state, idx, x);
73 }
74
random_standard_exponential_fill(bitgen_t * bitgen_state,npy_intp cnt,double * out)75 void random_standard_exponential_fill(bitgen_t * bitgen_state, npy_intp cnt, double * out)
76 {
77 npy_intp i;
78 for (i = 0; i < cnt; i++) {
79 out[i] = random_standard_exponential(bitgen_state);
80 }
81 }
82
standard_exponential_unlikely_f(bitgen_t * bitgen_state,uint8_t idx,float x)83 static float standard_exponential_unlikely_f(bitgen_t *bitgen_state,
84 uint8_t idx, float x) {
85 if (idx == 0) {
86 /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */
87 return ziggurat_exp_r_f - logf(1.0f - next_float(bitgen_state));
88 } else if ((fe_float[idx - 1] - fe_float[idx]) * next_float(bitgen_state) +
89 fe_float[idx] <
90 expf(-x)) {
91 return x;
92 } else {
93 return random_standard_exponential_f(bitgen_state);
94 }
95 }
96
random_standard_exponential_f(bitgen_t * bitgen_state)97 float random_standard_exponential_f(bitgen_t *bitgen_state) {
98 uint32_t ri;
99 uint8_t idx;
100 float x;
101 ri = next_uint32(bitgen_state);
102 ri >>= 1;
103 idx = ri & 0xFF;
104 ri >>= 8;
105 x = ri * we_float[idx];
106 if (ri < ke_float[idx]) {
107 return x; /* 98.9% of the time we return here 1st try */
108 }
109 return standard_exponential_unlikely_f(bitgen_state, idx, x);
110 }
111
random_standard_exponential_fill_f(bitgen_t * bitgen_state,npy_intp cnt,float * out)112 void random_standard_exponential_fill_f(bitgen_t * bitgen_state, npy_intp cnt, float * out)
113 {
114 npy_intp i;
115 for (i = 0; i < cnt; i++) {
116 out[i] = random_standard_exponential_f(bitgen_state);
117 }
118 }
119
random_standard_exponential_inv_fill(bitgen_t * bitgen_state,npy_intp cnt,double * out)120 void random_standard_exponential_inv_fill(bitgen_t * bitgen_state, npy_intp cnt, double * out)
121 {
122 npy_intp i;
123 for (i = 0; i < cnt; i++) {
124 out[i] = -log(1.0 - next_double(bitgen_state));
125 }
126 }
127
random_standard_exponential_inv_fill_f(bitgen_t * bitgen_state,npy_intp cnt,float * out)128 void random_standard_exponential_inv_fill_f(bitgen_t * bitgen_state, npy_intp cnt, float * out)
129 {
130 npy_intp i;
131 for (i = 0; i < cnt; i++) {
132 out[i] = -log(1.0 - next_float(bitgen_state));
133 }
134 }
135
136
random_standard_normal(bitgen_t * bitgen_state)137 double random_standard_normal(bitgen_t *bitgen_state) {
138 uint64_t r;
139 int sign;
140 uint64_t rabs;
141 int idx;
142 double x, xx, yy;
143 for (;;) {
144 /* r = e3n52sb8 */
145 r = next_uint64(bitgen_state);
146 idx = r & 0xff;
147 r >>= 8;
148 sign = r & 0x1;
149 rabs = (r >> 1) & 0x000fffffffffffff;
150 x = rabs * wi_double[idx];
151 if (sign & 0x1)
152 x = -x;
153 if (rabs < ki_double[idx])
154 return x; /* 99.3% of the time return here */
155 if (idx == 0) {
156 for (;;) {
157 /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */
158 xx = -ziggurat_nor_inv_r * log(1.0 - next_double(bitgen_state));
159 yy = -log(1.0 - next_double(bitgen_state));
160 if (yy + yy > xx * xx)
161 return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r + xx)
162 : ziggurat_nor_r + xx;
163 }
164 } else {
165 if (((fi_double[idx - 1] - fi_double[idx]) * next_double(bitgen_state) +
166 fi_double[idx]) < exp(-0.5 * x * x))
167 return x;
168 }
169 }
170 }
171
random_standard_normal_fill(bitgen_t * bitgen_state,npy_intp cnt,double * out)172 void random_standard_normal_fill(bitgen_t *bitgen_state, npy_intp cnt, double *out) {
173 npy_intp i;
174 for (i = 0; i < cnt; i++) {
175 out[i] = random_standard_normal(bitgen_state);
176 }
177 }
178
random_standard_normal_f(bitgen_t * bitgen_state)179 float random_standard_normal_f(bitgen_t *bitgen_state) {
180 uint32_t r;
181 int sign;
182 uint32_t rabs;
183 int idx;
184 float x, xx, yy;
185 for (;;) {
186 /* r = n23sb8 */
187 r = next_uint32(bitgen_state);
188 idx = r & 0xff;
189 sign = (r >> 8) & 0x1;
190 rabs = (r >> 9) & 0x0007fffff;
191 x = rabs * wi_float[idx];
192 if (sign & 0x1)
193 x = -x;
194 if (rabs < ki_float[idx])
195 return x; /* # 99.3% of the time return here */
196 if (idx == 0) {
197 for (;;) {
198 /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */
199 xx = -ziggurat_nor_inv_r_f * logf(1.0f - next_float(bitgen_state));
200 yy = -logf(1.0f - next_float(bitgen_state));
201 if (yy + yy > xx * xx)
202 return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r_f + xx)
203 : ziggurat_nor_r_f + xx;
204 }
205 } else {
206 if (((fi_float[idx - 1] - fi_float[idx]) * next_float(bitgen_state) +
207 fi_float[idx]) < exp(-0.5 * x * x))
208 return x;
209 }
210 }
211 }
212
random_standard_normal_fill_f(bitgen_t * bitgen_state,npy_intp cnt,float * out)213 void random_standard_normal_fill_f(bitgen_t *bitgen_state, npy_intp cnt, float *out) {
214 npy_intp i;
215 for (i = 0; i < cnt; i++) {
216 out[i] = random_standard_normal_f(bitgen_state);
217 }
218 }
219
random_standard_gamma(bitgen_t * bitgen_state,double shape)220 double random_standard_gamma(bitgen_t *bitgen_state,
221 double shape) {
222 double b, c;
223 double U, V, X, Y;
224
225 if (shape == 1.0) {
226 return random_standard_exponential(bitgen_state);
227 } else if (shape == 0.0) {
228 return 0.0;
229 } else if (shape < 1.0) {
230 for (;;) {
231 U = next_double(bitgen_state);
232 V = random_standard_exponential(bitgen_state);
233 if (U <= 1.0 - shape) {
234 X = pow(U, 1. / shape);
235 if (X <= V) {
236 return X;
237 }
238 } else {
239 Y = -log((1 - U) / shape);
240 X = pow(1.0 - shape + shape * Y, 1. / shape);
241 if (X <= (V + Y)) {
242 return X;
243 }
244 }
245 }
246 } else {
247 b = shape - 1. / 3.;
248 c = 1. / sqrt(9 * b);
249 for (;;) {
250 do {
251 X = random_standard_normal(bitgen_state);
252 V = 1.0 + c * X;
253 } while (V <= 0.0);
254
255 V = V * V * V;
256 U = next_double(bitgen_state);
257 if (U < 1.0 - 0.0331 * (X * X) * (X * X))
258 return (b * V);
259 /* log(0.0) ok here */
260 if (log(U) < 0.5 * X * X + b * (1. - V + log(V)))
261 return (b * V);
262 }
263 }
264 }
265
random_standard_gamma_f(bitgen_t * bitgen_state,float shape)266 float random_standard_gamma_f(bitgen_t *bitgen_state,
267 float shape) {
268 float b, c;
269 float U, V, X, Y;
270
271 if (shape == 1.0f) {
272 return random_standard_exponential_f(bitgen_state);
273 } else if (shape == 0.0) {
274 return 0.0;
275 } else if (shape < 1.0f) {
276 for (;;) {
277 U = next_float(bitgen_state);
278 V = random_standard_exponential_f(bitgen_state);
279 if (U <= 1.0f - shape) {
280 X = powf(U, 1.0f / shape);
281 if (X <= V) {
282 return X;
283 }
284 } else {
285 Y = -logf((1.0f - U) / shape);
286 X = powf(1.0f - shape + shape * Y, 1.0f / shape);
287 if (X <= (V + Y)) {
288 return X;
289 }
290 }
291 }
292 } else {
293 b = shape - 1.0f / 3.0f;
294 c = 1.0f / sqrtf(9.0f * b);
295 for (;;) {
296 do {
297 X = random_standard_normal_f(bitgen_state);
298 V = 1.0f + c * X;
299 } while (V <= 0.0f);
300
301 V = V * V * V;
302 U = next_float(bitgen_state);
303 if (U < 1.0f - 0.0331f * (X * X) * (X * X))
304 return (b * V);
305 /* logf(0.0) ok here */
306 if (logf(U) < 0.5f * X * X + b * (1.0f - V + logf(V)))
307 return (b * V);
308 }
309 }
310 }
311
random_positive_int64(bitgen_t * bitgen_state)312 int64_t random_positive_int64(bitgen_t *bitgen_state) {
313 return next_uint64(bitgen_state) >> 1;
314 }
315
random_positive_int32(bitgen_t * bitgen_state)316 int32_t random_positive_int32(bitgen_t *bitgen_state) {
317 return next_uint32(bitgen_state) >> 1;
318 }
319
random_positive_int(bitgen_t * bitgen_state)320 int64_t random_positive_int(bitgen_t *bitgen_state) {
321 #if ULONG_MAX <= 0xffffffffUL
322 return (int64_t)(next_uint32(bitgen_state) >> 1);
323 #else
324 return (int64_t)(next_uint64(bitgen_state) >> 1);
325 #endif
326 }
327
random_uint(bitgen_t * bitgen_state)328 uint64_t random_uint(bitgen_t *bitgen_state) {
329 #if ULONG_MAX <= 0xffffffffUL
330 return next_uint32(bitgen_state);
331 #else
332 return next_uint64(bitgen_state);
333 #endif
334 }
335
336 /*
337 * log-gamma function to support some of these distributions. The
338 * algorithm comes from SPECFUN by Shanjie Zhang and Jianming Jin and their
339 * book "Computation of Special Functions", 1996, John Wiley & Sons, Inc.
340 *
341 * If random_loggam(k+1) is being used to compute log(k!) for an integer k, consider
342 * using logfactorial(k) instead.
343 */
random_loggam(double x)344 double random_loggam(double x) {
345 double x0, x2, lg2pi, gl, gl0;
346 RAND_INT_TYPE k, n;
347
348 static double a[10] = {8.333333333333333e-02, -2.777777777777778e-03,
349 7.936507936507937e-04, -5.952380952380952e-04,
350 8.417508417508418e-04, -1.917526917526918e-03,
351 6.410256410256410e-03, -2.955065359477124e-02,
352 1.796443723688307e-01, -1.39243221690590e+00};
353
354 if ((x == 1.0) || (x == 2.0)) {
355 return 0.0;
356 } else if (x < 7.0) {
357 n = (RAND_INT_TYPE)(7 - x);
358 } else {
359 n = 0;
360 }
361 x0 = x + n;
362 x2 = (1.0 / x0) * (1.0 / x0);
363 /* log(2 * M_PI) */
364 lg2pi = 1.8378770664093453e+00;
365 gl0 = a[9];
366 for (k = 8; k >= 0; k--) {
367 gl0 *= x2;
368 gl0 += a[k];
369 }
370 gl = gl0 / x0 + 0.5 * lg2pi + (x0 - 0.5) * log(x0) - x0;
371 if (x < 7.0) {
372 for (k = 1; k <= n; k++) {
373 gl -= log(x0 - 1.0);
374 x0 -= 1.0;
375 }
376 }
377 return gl;
378 }
379
380 /*
381 double random_normal(bitgen_t *bitgen_state, double loc, double scale) {
382 return loc + scale * random_gauss(bitgen_state);
383 }
384 */
385
random_normal(bitgen_t * bitgen_state,double loc,double scale)386 double random_normal(bitgen_t *bitgen_state, double loc, double scale) {
387 return loc + scale * random_standard_normal(bitgen_state);
388 }
389
random_exponential(bitgen_t * bitgen_state,double scale)390 double random_exponential(bitgen_t *bitgen_state, double scale) {
391 return scale * random_standard_exponential(bitgen_state);
392 }
393
random_uniform(bitgen_t * bitgen_state,double lower,double range)394 double random_uniform(bitgen_t *bitgen_state, double lower, double range) {
395 return lower + range * next_double(bitgen_state);
396 }
397
random_gamma(bitgen_t * bitgen_state,double shape,double scale)398 double random_gamma(bitgen_t *bitgen_state, double shape, double scale) {
399 return scale * random_standard_gamma(bitgen_state, shape);
400 }
401
random_gamma_f(bitgen_t * bitgen_state,float shape,float scale)402 float random_gamma_f(bitgen_t *bitgen_state, float shape, float scale) {
403 return scale * random_standard_gamma_f(bitgen_state, shape);
404 }
405
random_beta(bitgen_t * bitgen_state,double a,double b)406 double random_beta(bitgen_t *bitgen_state, double a, double b) {
407 double Ga, Gb;
408
409 if ((a <= 1.0) && (b <= 1.0)) {
410 double U, V, X, Y, XpY;
411 /* Use Johnk's algorithm */
412
413 while (1) {
414 U = next_double(bitgen_state);
415 V = next_double(bitgen_state);
416 X = pow(U, 1.0 / a);
417 Y = pow(V, 1.0 / b);
418 XpY = X + Y;
419 /* Reject if both U and V are 0.0, which is approx 1 in 10^106 */
420 if ((XpY <= 1.0) && (XpY > 0.0)) {
421 if (X + Y > 0) {
422 return X / XpY;
423 } else {
424 double logX = log(U) / a;
425 double logY = log(V) / b;
426 double logM = logX > logY ? logX : logY;
427 logX -= logM;
428 logY -= logM;
429
430 return exp(logX - log(exp(logX) + exp(logY)));
431 }
432 }
433 }
434 } else {
435 Ga = random_standard_gamma(bitgen_state, a);
436 Gb = random_standard_gamma(bitgen_state, b);
437 return Ga / (Ga + Gb);
438 }
439 }
440
random_chisquare(bitgen_t * bitgen_state,double df)441 double random_chisquare(bitgen_t *bitgen_state, double df) {
442 return 2.0 * random_standard_gamma(bitgen_state, df / 2.0);
443 }
444
random_f(bitgen_t * bitgen_state,double dfnum,double dfden)445 double random_f(bitgen_t *bitgen_state, double dfnum, double dfden) {
446 return ((random_chisquare(bitgen_state, dfnum) * dfden) /
447 (random_chisquare(bitgen_state, dfden) * dfnum));
448 }
449
random_standard_cauchy(bitgen_t * bitgen_state)450 double random_standard_cauchy(bitgen_t *bitgen_state) {
451 return random_standard_normal(bitgen_state) / random_standard_normal(bitgen_state);
452 }
453
random_pareto(bitgen_t * bitgen_state,double a)454 double random_pareto(bitgen_t *bitgen_state, double a) {
455 return exp(random_standard_exponential(bitgen_state) / a) - 1;
456 }
457
random_weibull(bitgen_t * bitgen_state,double a)458 double random_weibull(bitgen_t *bitgen_state, double a) {
459 if (a == 0.0) {
460 return 0.0;
461 }
462 return pow(random_standard_exponential(bitgen_state), 1. / a);
463 }
464
random_power(bitgen_t * bitgen_state,double a)465 double random_power(bitgen_t *bitgen_state, double a) {
466 return pow(1 - exp(-random_standard_exponential(bitgen_state)), 1. / a);
467 }
468
random_laplace(bitgen_t * bitgen_state,double loc,double scale)469 double random_laplace(bitgen_t *bitgen_state, double loc, double scale) {
470 double U;
471
472 U = next_double(bitgen_state);
473 if (U >= 0.5) {
474 U = loc - scale * log(2.0 - U - U);
475 } else if (U > 0.0) {
476 U = loc + scale * log(U + U);
477 } else {
478 /* Reject U == 0.0 and call again to get next value */
479 U = random_laplace(bitgen_state, loc, scale);
480 }
481 return U;
482 }
483
random_gumbel(bitgen_t * bitgen_state,double loc,double scale)484 double random_gumbel(bitgen_t *bitgen_state, double loc, double scale) {
485 double U;
486
487 U = 1.0 - next_double(bitgen_state);
488 if (U < 1.0) {
489 return loc - scale * log(-log(U));
490 }
491 /* Reject U == 1.0 and call again to get next value */
492 return random_gumbel(bitgen_state, loc, scale);
493 }
494
random_logistic(bitgen_t * bitgen_state,double loc,double scale)495 double random_logistic(bitgen_t *bitgen_state, double loc, double scale) {
496 double U;
497
498 U = next_double(bitgen_state);
499 if (U > 0.0) {
500 return loc + scale * log(U / (1.0 - U));
501 }
502 /* Reject U == 0.0 and call again to get next value */
503 return random_logistic(bitgen_state, loc, scale);
504 }
505
random_lognormal(bitgen_t * bitgen_state,double mean,double sigma)506 double random_lognormal(bitgen_t *bitgen_state, double mean, double sigma) {
507 return exp(random_normal(bitgen_state, mean, sigma));
508 }
509
random_rayleigh(bitgen_t * bitgen_state,double mode)510 double random_rayleigh(bitgen_t *bitgen_state, double mode) {
511 return mode * sqrt(-2.0 * log(1.0 - next_double(bitgen_state)));
512 }
513
random_standard_t(bitgen_t * bitgen_state,double df)514 double random_standard_t(bitgen_t *bitgen_state, double df) {
515 double num, denom;
516
517 num = random_standard_normal(bitgen_state);
518 denom = random_standard_gamma(bitgen_state, df / 2);
519 return sqrt(df / 2) * num / sqrt(denom);
520 }
521
random_poisson_mult(bitgen_t * bitgen_state,double lam)522 static RAND_INT_TYPE random_poisson_mult(bitgen_t *bitgen_state, double lam) {
523 RAND_INT_TYPE X;
524 double prod, U, enlam;
525
526 enlam = exp(-lam);
527 X = 0;
528 prod = 1.0;
529 while (1) {
530 U = next_double(bitgen_state);
531 prod *= U;
532 if (prod > enlam) {
533 X += 1;
534 } else {
535 return X;
536 }
537 }
538 }
539
540 /*
541 * The transformed rejection method for generating Poisson random variables
542 * W. Hoermann
543 * Insurance: Mathematics and Economics 12, 39-45 (1993)
544 */
545 #define LS2PI 0.91893853320467267
546 #define TWELFTH 0.083333333333333333333333
random_poisson_ptrs(bitgen_t * bitgen_state,double lam)547 static RAND_INT_TYPE random_poisson_ptrs(bitgen_t *bitgen_state, double lam) {
548 RAND_INT_TYPE k;
549 double U, V, slam, loglam, a, b, invalpha, vr, us;
550
551 slam = sqrt(lam);
552 loglam = log(lam);
553 b = 0.931 + 2.53 * slam;
554 a = -0.059 + 0.02483 * b;
555 invalpha = 1.1239 + 1.1328 / (b - 3.4);
556 vr = 0.9277 - 3.6224 / (b - 2);
557
558 while (1) {
559 U = next_double(bitgen_state) - 0.5;
560 V = next_double(bitgen_state);
561 us = 0.5 - fabs(U);
562 k = (RAND_INT_TYPE)floor((2 * a / us + b) * U + lam + 0.43);
563 if ((us >= 0.07) && (V <= vr)) {
564 return k;
565 }
566 if ((k < 0) || ((us < 0.013) && (V > us))) {
567 continue;
568 }
569 /* log(V) == log(0.0) ok here */
570 /* if U==0.0 so that us==0.0, log is ok since always returns */
571 if ((log(V) + log(invalpha) - log(a / (us * us) + b)) <=
572 (-lam + k * loglam - random_loggam(k + 1))) {
573 return k;
574 }
575 }
576 }
577
random_poisson(bitgen_t * bitgen_state,double lam)578 RAND_INT_TYPE random_poisson(bitgen_t *bitgen_state, double lam) {
579 if (lam >= 10) {
580 return random_poisson_ptrs(bitgen_state, lam);
581 } else if (lam == 0) {
582 return 0;
583 } else {
584 return random_poisson_mult(bitgen_state, lam);
585 }
586 }
587
random_negative_binomial(bitgen_t * bitgen_state,double n,double p)588 RAND_INT_TYPE random_negative_binomial(bitgen_t *bitgen_state, double n,
589 double p) {
590 double Y = random_gamma(bitgen_state, n, (1 - p) / p);
591 return random_poisson(bitgen_state, Y);
592 }
593
random_binomial_btpe(bitgen_t * bitgen_state,RAND_INT_TYPE n,double p,binomial_t * binomial)594 RAND_INT_TYPE random_binomial_btpe(bitgen_t *bitgen_state, RAND_INT_TYPE n,
595 double p, binomial_t *binomial) {
596 double r, q, fm, p1, xm, xl, xr, c, laml, lamr, p2, p3, p4;
597 double a, u, v, s, F, rho, t, A, nrq, x1, x2, f1, f2, z, z2, w, w2, x;
598 RAND_INT_TYPE m, y, k, i;
599
600 if (!(binomial->has_binomial) || (binomial->nsave != n) ||
601 (binomial->psave != p)) {
602 /* initialize */
603 binomial->nsave = n;
604 binomial->psave = p;
605 binomial->has_binomial = 1;
606 binomial->r = r = MIN(p, 1.0 - p);
607 binomial->q = q = 1.0 - r;
608 binomial->fm = fm = n * r + r;
609 binomial->m = m = (RAND_INT_TYPE)floor(binomial->fm);
610 binomial->p1 = p1 = floor(2.195 * sqrt(n * r * q) - 4.6 * q) + 0.5;
611 binomial->xm = xm = m + 0.5;
612 binomial->xl = xl = xm - p1;
613 binomial->xr = xr = xm + p1;
614 binomial->c = c = 0.134 + 20.5 / (15.3 + m);
615 a = (fm - xl) / (fm - xl * r);
616 binomial->laml = laml = a * (1.0 + a / 2.0);
617 a = (xr - fm) / (xr * q);
618 binomial->lamr = lamr = a * (1.0 + a / 2.0);
619 binomial->p2 = p2 = p1 * (1.0 + 2.0 * c);
620 binomial->p3 = p3 = p2 + c / laml;
621 binomial->p4 = p4 = p3 + c / lamr;
622 } else {
623 r = binomial->r;
624 q = binomial->q;
625 fm = binomial->fm;
626 m = binomial->m;
627 p1 = binomial->p1;
628 xm = binomial->xm;
629 xl = binomial->xl;
630 xr = binomial->xr;
631 c = binomial->c;
632 laml = binomial->laml;
633 lamr = binomial->lamr;
634 p2 = binomial->p2;
635 p3 = binomial->p3;
636 p4 = binomial->p4;
637 }
638
639 /* sigh ... */
640 Step10:
641 nrq = n * r * q;
642 u = next_double(bitgen_state) * p4;
643 v = next_double(bitgen_state);
644 if (u > p1)
645 goto Step20;
646 y = (RAND_INT_TYPE)floor(xm - p1 * v + u);
647 goto Step60;
648
649 Step20:
650 if (u > p2)
651 goto Step30;
652 x = xl + (u - p1) / c;
653 v = v * c + 1.0 - fabs(m - x + 0.5) / p1;
654 if (v > 1.0)
655 goto Step10;
656 y = (RAND_INT_TYPE)floor(x);
657 goto Step50;
658
659 Step30:
660 if (u > p3)
661 goto Step40;
662 y = (RAND_INT_TYPE)floor(xl + log(v) / laml);
663 /* Reject if v==0.0 since previous cast is undefined */
664 if ((y < 0) || (v == 0.0))
665 goto Step10;
666 v = v * (u - p2) * laml;
667 goto Step50;
668
669 Step40:
670 y = (RAND_INT_TYPE)floor(xr - log(v) / lamr);
671 /* Reject if v==0.0 since previous cast is undefined */
672 if ((y > n) || (v == 0.0))
673 goto Step10;
674 v = v * (u - p3) * lamr;
675
676 Step50:
677 k = llabs(y - m);
678 if ((k > 20) && (k < ((nrq) / 2.0 - 1)))
679 goto Step52;
680
681 s = r / q;
682 a = s * (n + 1);
683 F = 1.0;
684 if (m < y) {
685 for (i = m + 1; i <= y; i++) {
686 F *= (a / i - s);
687 }
688 } else if (m > y) {
689 for (i = y + 1; i <= m; i++) {
690 F /= (a / i - s);
691 }
692 }
693 if (v > F)
694 goto Step10;
695 goto Step60;
696
697 Step52:
698 rho =
699 (k / (nrq)) * ((k * (k / 3.0 + 0.625) + 0.16666666666666666) / nrq + 0.5);
700 t = -k * k / (2 * nrq);
701 /* log(0.0) ok here */
702 A = log(v);
703 if (A < (t - rho))
704 goto Step60;
705 if (A > (t + rho))
706 goto Step10;
707
708 x1 = y + 1;
709 f1 = m + 1;
710 z = n + 1 - m;
711 w = n - y + 1;
712 x2 = x1 * x1;
713 f2 = f1 * f1;
714 z2 = z * z;
715 w2 = w * w;
716 if (A > (xm * log(f1 / x1) + (n - m + 0.5) * log(z / w) +
717 (y - m) * log(w * r / (x1 * q)) +
718 (13680. - (462. - (132. - (99. - 140. / f2) / f2) / f2) / f2) / f1 /
719 166320. +
720 (13680. - (462. - (132. - (99. - 140. / z2) / z2) / z2) / z2) / z /
721 166320. +
722 (13680. - (462. - (132. - (99. - 140. / x2) / x2) / x2) / x2) / x1 /
723 166320. +
724 (13680. - (462. - (132. - (99. - 140. / w2) / w2) / w2) / w2) / w /
725 166320.)) {
726 goto Step10;
727 }
728
729 Step60:
730 if (p > 0.5) {
731 y = n - y;
732 }
733
734 return y;
735 }
736
random_binomial_inversion(bitgen_t * bitgen_state,RAND_INT_TYPE n,double p,binomial_t * binomial)737 RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state, RAND_INT_TYPE n,
738 double p, binomial_t *binomial) {
739 double q, qn, np, px, U;
740 RAND_INT_TYPE X, bound;
741
742 if (!(binomial->has_binomial) || (binomial->nsave != n) ||
743 (binomial->psave != p)) {
744 binomial->nsave = n;
745 binomial->psave = p;
746 binomial->has_binomial = 1;
747 binomial->q = q = 1.0 - p;
748 binomial->r = qn = exp(n * log(q));
749 binomial->c = np = n * p;
750 binomial->m = bound = (RAND_INT_TYPE)MIN(n, np + 10.0 * sqrt(np * q + 1));
751 } else {
752 q = binomial->q;
753 qn = binomial->r;
754 np = binomial->c;
755 bound = binomial->m;
756 }
757 X = 0;
758 px = qn;
759 U = next_double(bitgen_state);
760 while (U > px) {
761 X++;
762 if (X > bound) {
763 X = 0;
764 px = qn;
765 U = next_double(bitgen_state);
766 } else {
767 U -= px;
768 px = ((n - X + 1) * p * px) / (X * q);
769 }
770 }
771 return X;
772 }
773
random_binomial(bitgen_t * bitgen_state,double p,int64_t n,binomial_t * binomial)774 int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n,
775 binomial_t *binomial) {
776 double q;
777
778 if ((n == 0LL) || (p == 0.0f))
779 return 0;
780
781 if (p <= 0.5) {
782 if (p * n <= 30.0) {
783 return random_binomial_inversion(bitgen_state, n, p, binomial);
784 } else {
785 return random_binomial_btpe(bitgen_state, n, p, binomial);
786 }
787 } else {
788 q = 1.0 - p;
789 if (q * n <= 30.0) {
790 return n - random_binomial_inversion(bitgen_state, n, q, binomial);
791 } else {
792 return n - random_binomial_btpe(bitgen_state, n, q, binomial);
793 }
794 }
795 }
796
random_noncentral_chisquare(bitgen_t * bitgen_state,double df,double nonc)797 double random_noncentral_chisquare(bitgen_t *bitgen_state, double df,
798 double nonc) {
799 if (npy_isnan(nonc)) {
800 return NPY_NAN;
801 }
802 if (nonc == 0) {
803 return random_chisquare(bitgen_state, df);
804 }
805 if (1 < df) {
806 const double Chi2 = random_chisquare(bitgen_state, df - 1);
807 const double n = random_standard_normal(bitgen_state) + sqrt(nonc);
808 return Chi2 + n * n;
809 } else {
810 const RAND_INT_TYPE i = random_poisson(bitgen_state, nonc / 2.0);
811 return random_chisquare(bitgen_state, df + 2 * i);
812 }
813 }
814
random_noncentral_f(bitgen_t * bitgen_state,double dfnum,double dfden,double nonc)815 double random_noncentral_f(bitgen_t *bitgen_state, double dfnum, double dfden,
816 double nonc) {
817 double t = random_noncentral_chisquare(bitgen_state, dfnum, nonc) * dfden;
818 return t / (random_chisquare(bitgen_state, dfden) * dfnum);
819 }
820
random_wald(bitgen_t * bitgen_state,double mean,double scale)821 double random_wald(bitgen_t *bitgen_state, double mean, double scale) {
822 double U, X, Y;
823 double mu_2l;
824
825 mu_2l = mean / (2 * scale);
826 Y = random_standard_normal(bitgen_state);
827 Y = mean * Y * Y;
828 X = mean + mu_2l * (Y - sqrt(4 * scale * Y + Y * Y));
829 U = next_double(bitgen_state);
830 if (U <= mean / (mean + X)) {
831 return X;
832 } else {
833 return mean * mean / X;
834 }
835 }
836
random_vonmises(bitgen_t * bitgen_state,double mu,double kappa)837 double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) {
838 double s;
839 double U, V, W, Y, Z;
840 double result, mod;
841 int neg;
842 if (npy_isnan(kappa)) {
843 return NPY_NAN;
844 }
845 if (kappa < 1e-8) {
846 return M_PI * (2 * next_double(bitgen_state) - 1);
847 } else {
848 /* with double precision rho is zero until 1.4e-8 */
849 if (kappa < 1e-5) {
850 /*
851 * second order taylor expansion around kappa = 0
852 * precise until relatively large kappas as second order is 0
853 */
854 s = (1. / kappa + kappa);
855 } else {
856 double r = 1 + sqrt(1 + 4 * kappa * kappa);
857 double rho = (r - sqrt(2 * r)) / (2 * kappa);
858 s = (1 + rho * rho) / (2 * rho);
859 }
860
861 while (1) {
862 U = next_double(bitgen_state);
863 Z = cos(M_PI * U);
864 W = (1 + s * Z) / (s + Z);
865 Y = kappa * (s - W);
866 V = next_double(bitgen_state);
867 /*
868 * V==0.0 is ok here since Y >= 0 always leads
869 * to accept, while Y < 0 always rejects
870 */
871 if ((Y * (2 - Y) - V >= 0) || (log(Y / V) + 1 - Y >= 0)) {
872 break;
873 }
874 }
875
876 U = next_double(bitgen_state);
877
878 result = acos(W);
879 if (U < 0.5) {
880 result = -result;
881 }
882 result += mu;
883 neg = (result < 0);
884 mod = fabs(result);
885 mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI);
886 if (neg) {
887 mod *= -1;
888 }
889
890 return mod;
891 }
892 }
893
894 /*
895 * RAND_INT_TYPE is used to share integer generators with RandomState which
896 * used long in place of int64_t. If changing a distribution that uses
897 * RAND_INT_TYPE, then the original unmodified copy must be retained for
898 * use in RandomState by copying to the legacy distributions source file.
899 */
random_logseries(bitgen_t * bitgen_state,double p)900 RAND_INT_TYPE random_logseries(bitgen_t *bitgen_state, double p) {
901 double q, r, U, V;
902 RAND_INT_TYPE result;
903
904 r = log(1.0 - p);
905
906 while (1) {
907 V = next_double(bitgen_state);
908 if (V >= p) {
909 return 1;
910 }
911 U = next_double(bitgen_state);
912 q = 1.0 - exp(r * U);
913 if (V <= q * q) {
914 result = (RAND_INT_TYPE)floor(1 + log(V) / log(q));
915 if ((result < 1) || (V == 0.0)) {
916 continue;
917 } else {
918 return result;
919 }
920 }
921 if (V >= q) {
922 return 1;
923 }
924 return 2;
925 }
926 }
927
random_geometric_search(bitgen_t * bitgen_state,double p)928 RAND_INT_TYPE random_geometric_search(bitgen_t *bitgen_state, double p) {
929 double U;
930 RAND_INT_TYPE X;
931 double sum, prod, q;
932
933 X = 1;
934 sum = prod = p;
935 q = 1.0 - p;
936 U = next_double(bitgen_state);
937 while (U > sum) {
938 prod *= q;
939 sum += prod;
940 X++;
941 }
942 return X;
943 }
944
random_geometric_inversion(bitgen_t * bitgen_state,double p)945 RAND_INT_TYPE random_geometric_inversion(bitgen_t *bitgen_state, double p) {
946 return (RAND_INT_TYPE)ceil(log(1.0 - next_double(bitgen_state)) / log(1.0 - p));
947 }
948
random_geometric(bitgen_t * bitgen_state,double p)949 RAND_INT_TYPE random_geometric(bitgen_t *bitgen_state, double p) {
950 if (p >= 0.333333333333333333333333) {
951 return random_geometric_search(bitgen_state, p);
952 } else {
953 return random_geometric_inversion(bitgen_state, p);
954 }
955 }
956
random_zipf(bitgen_t * bitgen_state,double a)957 RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a) {
958 double am1, b;
959
960 am1 = a - 1.0;
961 b = pow(2.0, am1);
962 while (1) {
963 double T, U, V, X;
964
965 U = 1.0 - next_double(bitgen_state);
966 V = next_double(bitgen_state);
967 X = floor(pow(U, -1.0 / am1));
968 /*
969 * The real result may be above what can be represented in a signed
970 * long. Since this is a straightforward rejection algorithm, we can
971 * just reject this value. This function then models a Zipf
972 * distribution truncated to sys.maxint.
973 */
974 if (X > RAND_INT_MAX || X < 1.0) {
975 continue;
976 }
977
978 T = pow(1.0 + 1.0 / X, am1);
979 if (V * X * (T - 1.0) / (b - 1.0) <= T / b) {
980 return (RAND_INT_TYPE)X;
981 }
982 }
983 }
984
random_triangular(bitgen_t * bitgen_state,double left,double mode,double right)985 double random_triangular(bitgen_t *bitgen_state, double left, double mode,
986 double right) {
987 double base, leftbase, ratio, leftprod, rightprod;
988 double U;
989
990 base = right - left;
991 leftbase = mode - left;
992 ratio = leftbase / base;
993 leftprod = leftbase * base;
994 rightprod = (right - mode) * base;
995
996 U = next_double(bitgen_state);
997 if (U <= ratio) {
998 return left + sqrt(U * leftprod);
999 } else {
1000 return right - sqrt((1.0 - U) * rightprod);
1001 }
1002 }
1003
1004
random_interval(bitgen_t * bitgen_state,uint64_t max)1005 uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max) {
1006 uint64_t mask, value;
1007 if (max == 0) {
1008 return 0;
1009 }
1010
1011 mask = max;
1012
1013 /* Smallest bit mask >= max */
1014 mask |= mask >> 1;
1015 mask |= mask >> 2;
1016 mask |= mask >> 4;
1017 mask |= mask >> 8;
1018 mask |= mask >> 16;
1019 mask |= mask >> 32;
1020
1021 /* Search a random value in [0..mask] <= max */
1022 if (max <= 0xffffffffUL) {
1023 while ((value = (next_uint32(bitgen_state) & mask)) > max)
1024 ;
1025 } else {
1026 while ((value = (next_uint64(bitgen_state) & mask)) > max)
1027 ;
1028 }
1029 return value;
1030 }
1031
1032 /* Bounded generators */
gen_mask(uint64_t max)1033 static NPY_INLINE uint64_t gen_mask(uint64_t max) {
1034 uint64_t mask = max;
1035 mask |= mask >> 1;
1036 mask |= mask >> 2;
1037 mask |= mask >> 4;
1038 mask |= mask >> 8;
1039 mask |= mask >> 16;
1040 mask |= mask >> 32;
1041 return mask;
1042 }
1043
1044 /* Generate 16 bit random numbers using a 32 bit buffer. */
buffered_uint16(bitgen_t * bitgen_state,int * bcnt,uint32_t * buf)1045 static NPY_INLINE uint16_t buffered_uint16(bitgen_t *bitgen_state, int *bcnt,
1046 uint32_t *buf) {
1047 if (!(bcnt[0])) {
1048 buf[0] = next_uint32(bitgen_state);
1049 bcnt[0] = 1;
1050 } else {
1051 buf[0] >>= 16;
1052 bcnt[0] -= 1;
1053 }
1054
1055 return (uint16_t)buf[0];
1056 }
1057
1058 /* Generate 8 bit random numbers using a 32 bit buffer. */
buffered_uint8(bitgen_t * bitgen_state,int * bcnt,uint32_t * buf)1059 static NPY_INLINE uint8_t buffered_uint8(bitgen_t *bitgen_state, int *bcnt,
1060 uint32_t *buf) {
1061 if (!(bcnt[0])) {
1062 buf[0] = next_uint32(bitgen_state);
1063 bcnt[0] = 3;
1064 } else {
1065 buf[0] >>= 8;
1066 bcnt[0] -= 1;
1067 }
1068
1069 return (uint8_t)buf[0];
1070 }
1071
1072 /* Static `masked rejection` function called by random_bounded_uint64(...) */
bounded_masked_uint64(bitgen_t * bitgen_state,uint64_t rng,uint64_t mask)1073 static NPY_INLINE uint64_t bounded_masked_uint64(bitgen_t *bitgen_state,
1074 uint64_t rng, uint64_t mask) {
1075 uint64_t val;
1076
1077 while ((val = (next_uint64(bitgen_state) & mask)) > rng)
1078 ;
1079
1080 return val;
1081 }
1082
1083 /* Static `masked rejection` function called by
1084 * random_buffered_bounded_uint32(...) */
1085 static NPY_INLINE uint32_t
buffered_bounded_masked_uint32(bitgen_t * bitgen_state,uint32_t rng,uint32_t mask,int * bcnt,uint32_t * buf)1086 buffered_bounded_masked_uint32(bitgen_t *bitgen_state, uint32_t rng,
1087 uint32_t mask, int *bcnt, uint32_t *buf) {
1088 /*
1089 * The buffer and buffer count are not used here but are included to allow
1090 * this function to be templated with the similar uint8 and uint16
1091 * functions
1092 */
1093
1094 uint32_t val;
1095
1096 while ((val = (next_uint32(bitgen_state) & mask)) > rng)
1097 ;
1098
1099 return val;
1100 }
1101
1102 /* Static `masked rejection` function called by
1103 * random_buffered_bounded_uint16(...) */
1104 static NPY_INLINE uint16_t
buffered_bounded_masked_uint16(bitgen_t * bitgen_state,uint16_t rng,uint16_t mask,int * bcnt,uint32_t * buf)1105 buffered_bounded_masked_uint16(bitgen_t *bitgen_state, uint16_t rng,
1106 uint16_t mask, int *bcnt, uint32_t *buf) {
1107 uint16_t val;
1108
1109 while ((val = (buffered_uint16(bitgen_state, bcnt, buf) & mask)) > rng)
1110 ;
1111
1112 return val;
1113 }
1114
1115 /* Static `masked rejection` function called by
1116 * random_buffered_bounded_uint8(...) */
buffered_bounded_masked_uint8(bitgen_t * bitgen_state,uint8_t rng,uint8_t mask,int * bcnt,uint32_t * buf)1117 static NPY_INLINE uint8_t buffered_bounded_masked_uint8(bitgen_t *bitgen_state,
1118 uint8_t rng,
1119 uint8_t mask, int *bcnt,
1120 uint32_t *buf) {
1121 uint8_t val;
1122
1123 while ((val = (buffered_uint8(bitgen_state, bcnt, buf) & mask)) > rng)
1124 ;
1125
1126 return val;
1127 }
1128
buffered_bounded_bool(bitgen_t * bitgen_state,npy_bool off,npy_bool rng,npy_bool mask,int * bcnt,uint32_t * buf)1129 static NPY_INLINE npy_bool buffered_bounded_bool(bitgen_t *bitgen_state,
1130 npy_bool off, npy_bool rng,
1131 npy_bool mask, int *bcnt,
1132 uint32_t *buf) {
1133 if (rng == 0)
1134 return off;
1135 if (!(bcnt[0])) {
1136 buf[0] = next_uint32(bitgen_state);
1137 bcnt[0] = 31;
1138 } else {
1139 buf[0] >>= 1;
1140 bcnt[0] -= 1;
1141 }
1142 return (buf[0] & 0x00000001UL) != 0;
1143 }
1144
1145 /* Static `Lemire rejection` function called by random_bounded_uint64(...) */
bounded_lemire_uint64(bitgen_t * bitgen_state,uint64_t rng)1146 static NPY_INLINE uint64_t bounded_lemire_uint64(bitgen_t *bitgen_state,
1147 uint64_t rng) {
1148 /*
1149 * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941
1150 *
1151 * Note: `rng` should not be 0xFFFFFFFFFFFFFFFF. When this happens `rng_excl`
1152 * becomes zero.
1153 */
1154 const uint64_t rng_excl = rng + 1;
1155
1156 assert(rng != 0xFFFFFFFFFFFFFFFFULL);
1157
1158 #if __SIZEOF_INT128__
1159 /* 128-bit uint available (e.g. GCC/clang). `m` is the __uint128_t scaled
1160 * integer. */
1161 __uint128_t m;
1162 uint64_t leftover;
1163
1164 /* Generate a scaled random number. */
1165 m = ((__uint128_t)next_uint64(bitgen_state)) * rng_excl;
1166
1167 /* Rejection sampling to remove any bias. */
1168 leftover = m & 0xFFFFFFFFFFFFFFFFULL;
1169
1170 if (leftover < rng_excl) {
1171 /* `rng_excl` is a simple upper bound for `threshold`. */
1172 const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
1173
1174 while (leftover < threshold) {
1175 m = ((__uint128_t)next_uint64(bitgen_state)) * rng_excl;
1176 leftover = m & 0xFFFFFFFFFFFFFFFFULL;
1177 }
1178 }
1179
1180 return (m >> 64);
1181 #else
1182 /* 128-bit uint NOT available (e.g. MSVS). `m1` is the upper 64-bits of the
1183 * scaled integer. */
1184 uint64_t m1;
1185 uint64_t x;
1186 uint64_t leftover;
1187
1188 x = next_uint64(bitgen_state);
1189
1190 /* Rejection sampling to remove any bias. */
1191 leftover = x * rng_excl; /* The lower 64-bits of the mult. */
1192
1193 if (leftover < rng_excl) {
1194 /* `rng_excl` is a simple upper bound for `threshold`. */
1195 const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
1196
1197 while (leftover < threshold) {
1198 x = next_uint64(bitgen_state);
1199 leftover = x * rng_excl;
1200 }
1201 }
1202
1203 #if defined(_MSC_VER) && defined(_WIN64)
1204 /* _WIN64 architecture. Use the __umulh intrinsic to calc `m1`. */
1205 m1 = __umulh(x, rng_excl);
1206 #else
1207 /* 32-bit architecture. Emulate __umulh to calc `m1`. */
1208 {
1209 uint64_t x0, x1, rng_excl0, rng_excl1;
1210 uint64_t w0, w1, w2, t;
1211
1212 x0 = x & 0xFFFFFFFFULL;
1213 x1 = x >> 32;
1214 rng_excl0 = rng_excl & 0xFFFFFFFFULL;
1215 rng_excl1 = rng_excl >> 32;
1216 w0 = x0 * rng_excl0;
1217 t = x1 * rng_excl0 + (w0 >> 32);
1218 w1 = t & 0xFFFFFFFFULL;
1219 w2 = t >> 32;
1220 w1 += x0 * rng_excl1;
1221 m1 = x1 * rng_excl1 + w2 + (w1 >> 32);
1222 }
1223 #endif
1224
1225 return m1;
1226 #endif
1227 }
1228
1229 /* Static `Lemire rejection` function called by
1230 * random_buffered_bounded_uint32(...) */
buffered_bounded_lemire_uint32(bitgen_t * bitgen_state,uint32_t rng,int * bcnt,uint32_t * buf)1231 static NPY_INLINE uint32_t buffered_bounded_lemire_uint32(
1232 bitgen_t *bitgen_state, uint32_t rng, int *bcnt, uint32_t *buf) {
1233 /*
1234 * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941
1235 *
1236 * The buffer and buffer count are not used here but are included to allow
1237 * this function to be templated with the similar uint8 and uint16
1238 * functions
1239 *
1240 * Note: `rng` should not be 0xFFFFFFFF. When this happens `rng_excl` becomes
1241 * zero.
1242 */
1243 const uint32_t rng_excl = rng + 1;
1244
1245 uint64_t m;
1246 uint32_t leftover;
1247
1248 assert(rng != 0xFFFFFFFFUL);
1249
1250 /* Generate a scaled random number. */
1251 m = ((uint64_t)next_uint32(bitgen_state)) * rng_excl;
1252
1253 /* Rejection sampling to remove any bias */
1254 leftover = m & 0xFFFFFFFFUL;
1255
1256 if (leftover < rng_excl) {
1257 /* `rng_excl` is a simple upper bound for `threshold`. */
1258 const uint32_t threshold = (UINT32_MAX - rng) % rng_excl;
1259
1260 while (leftover < threshold) {
1261 m = ((uint64_t)next_uint32(bitgen_state)) * rng_excl;
1262 leftover = m & 0xFFFFFFFFUL;
1263 }
1264 }
1265
1266 return (m >> 32);
1267 }
1268
1269 /* Static `Lemire rejection` function called by
1270 * random_buffered_bounded_uint16(...) */
buffered_bounded_lemire_uint16(bitgen_t * bitgen_state,uint16_t rng,int * bcnt,uint32_t * buf)1271 static NPY_INLINE uint16_t buffered_bounded_lemire_uint16(
1272 bitgen_t *bitgen_state, uint16_t rng, int *bcnt, uint32_t *buf) {
1273 /*
1274 * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941
1275 *
1276 * Note: `rng` should not be 0xFFFF. When this happens `rng_excl` becomes
1277 * zero.
1278 */
1279 const uint16_t rng_excl = rng + 1;
1280
1281 uint32_t m;
1282 uint16_t leftover;
1283
1284 assert(rng != 0xFFFFU);
1285
1286 /* Generate a scaled random number. */
1287 m = ((uint32_t)buffered_uint16(bitgen_state, bcnt, buf)) * rng_excl;
1288
1289 /* Rejection sampling to remove any bias */
1290 leftover = m & 0xFFFFUL;
1291
1292 if (leftover < rng_excl) {
1293 /* `rng_excl` is a simple upper bound for `threshold`. */
1294 const uint16_t threshold = (UINT16_MAX - rng) % rng_excl;
1295
1296 while (leftover < threshold) {
1297 m = ((uint32_t)buffered_uint16(bitgen_state, bcnt, buf)) * rng_excl;
1298 leftover = m & 0xFFFFUL;
1299 }
1300 }
1301
1302 return (m >> 16);
1303 }
1304
1305 /* Static `Lemire rejection` function called by
1306 * random_buffered_bounded_uint8(...) */
buffered_bounded_lemire_uint8(bitgen_t * bitgen_state,uint8_t rng,int * bcnt,uint32_t * buf)1307 static NPY_INLINE uint8_t buffered_bounded_lemire_uint8(bitgen_t *bitgen_state,
1308 uint8_t rng, int *bcnt,
1309 uint32_t *buf) {
1310 /*
1311 * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941
1312 *
1313 * Note: `rng` should not be 0xFF. When this happens `rng_excl` becomes
1314 * zero.
1315 */
1316 const uint8_t rng_excl = rng + 1;
1317
1318 uint16_t m;
1319 uint8_t leftover;
1320
1321 assert(rng != 0xFFU);
1322
1323
1324 /* Generate a scaled random number. */
1325 m = ((uint16_t)buffered_uint8(bitgen_state, bcnt, buf)) * rng_excl;
1326
1327 /* Rejection sampling to remove any bias */
1328 leftover = m & 0xFFUL;
1329
1330 if (leftover < rng_excl) {
1331 /* `rng_excl` is a simple upper bound for `threshold`. */
1332 const uint8_t threshold = (UINT8_MAX - rng) % rng_excl;
1333
1334 while (leftover < threshold) {
1335 m = ((uint16_t)buffered_uint8(bitgen_state, bcnt, buf)) * rng_excl;
1336 leftover = m & 0xFFUL;
1337 }
1338 }
1339
1340 return (m >> 8);
1341 }
1342
1343 /*
1344 * Returns a single random npy_uint64 between off and off + rng
1345 * inclusive. The numbers wrap if rng is sufficiently large.
1346 */
random_bounded_uint64(bitgen_t * bitgen_state,uint64_t off,uint64_t rng,uint64_t mask,bool use_masked)1347 uint64_t random_bounded_uint64(bitgen_t *bitgen_state, uint64_t off,
1348 uint64_t rng, uint64_t mask, bool use_masked) {
1349 if (rng == 0) {
1350 return off;
1351 } else if (rng <= 0xFFFFFFFFUL) {
1352 /* Call 32-bit generator if range in 32-bit. */
1353 if (rng == 0xFFFFFFFFUL) {
1354 /*
1355 * The 32-bit Lemire method does not handle rng=0xFFFFFFFF, so we'll
1356 * call next_uint32 directly. This also works when use_masked is True,
1357 * so we handle both cases here.
1358 */
1359 return off + (uint64_t) next_uint32(bitgen_state);
1360 }
1361 if (use_masked) {
1362 return off + buffered_bounded_masked_uint32(bitgen_state, rng, mask, NULL,
1363 NULL);
1364 } else {
1365 return off +
1366 buffered_bounded_lemire_uint32(bitgen_state, rng, NULL, NULL);
1367 }
1368 } else if (rng == 0xFFFFFFFFFFFFFFFFULL) {
1369 /* Lemire64 doesn't support inclusive rng = 0xFFFFFFFFFFFFFFFF. */
1370 return off + next_uint64(bitgen_state);
1371 } else {
1372 if (use_masked) {
1373 return off + bounded_masked_uint64(bitgen_state, rng, mask);
1374 } else {
1375 return off + bounded_lemire_uint64(bitgen_state, rng);
1376 }
1377 }
1378 }
1379
1380 /*
1381 * Returns a single random npy_uint64 between off and off + rng
1382 * inclusive. The numbers wrap if rng is sufficiently large.
1383 */
random_buffered_bounded_uint32(bitgen_t * bitgen_state,uint32_t off,uint32_t rng,uint32_t mask,bool use_masked,int * bcnt,uint32_t * buf)1384 uint32_t random_buffered_bounded_uint32(bitgen_t *bitgen_state, uint32_t off,
1385 uint32_t rng, uint32_t mask,
1386 bool use_masked, int *bcnt,
1387 uint32_t *buf) {
1388 /*
1389 * Unused bcnt and buf are here only to allow templating with other uint
1390 * generators.
1391 */
1392 if (rng == 0) {
1393 return off;
1394 } else if (rng == 0xFFFFFFFFUL) {
1395 /* Lemire32 doesn't support inclusive rng = 0xFFFFFFFF. */
1396 return off + next_uint32(bitgen_state);
1397 } else {
1398 if (use_masked) {
1399 return off +
1400 buffered_bounded_masked_uint32(bitgen_state, rng, mask, bcnt, buf);
1401 } else {
1402 return off + buffered_bounded_lemire_uint32(bitgen_state, rng, bcnt, buf);
1403 }
1404 }
1405 }
1406
1407 /*
1408 * Returns a single random npy_uint16 between off and off + rng
1409 * inclusive. The numbers wrap if rng is sufficiently large.
1410 */
random_buffered_bounded_uint16(bitgen_t * bitgen_state,uint16_t off,uint16_t rng,uint16_t mask,bool use_masked,int * bcnt,uint32_t * buf)1411 uint16_t random_buffered_bounded_uint16(bitgen_t *bitgen_state, uint16_t off,
1412 uint16_t rng, uint16_t mask,
1413 bool use_masked, int *bcnt,
1414 uint32_t *buf) {
1415 if (rng == 0) {
1416 return off;
1417 } else if (rng == 0xFFFFUL) {
1418 /* Lemire16 doesn't support inclusive rng = 0xFFFF. */
1419 return off + buffered_uint16(bitgen_state, bcnt, buf);
1420 } else {
1421 if (use_masked) {
1422 return off +
1423 buffered_bounded_masked_uint16(bitgen_state, rng, mask, bcnt, buf);
1424 } else {
1425 return off + buffered_bounded_lemire_uint16(bitgen_state, rng, bcnt, buf);
1426 }
1427 }
1428 }
1429
1430 /*
1431 * Returns a single random npy_uint8 between off and off + rng
1432 * inclusive. The numbers wrap if rng is sufficiently large.
1433 */
random_buffered_bounded_uint8(bitgen_t * bitgen_state,uint8_t off,uint8_t rng,uint8_t mask,bool use_masked,int * bcnt,uint32_t * buf)1434 uint8_t random_buffered_bounded_uint8(bitgen_t *bitgen_state, uint8_t off,
1435 uint8_t rng, uint8_t mask,
1436 bool use_masked, int *bcnt,
1437 uint32_t *buf) {
1438 if (rng == 0) {
1439 return off;
1440 } else if (rng == 0xFFUL) {
1441 /* Lemire8 doesn't support inclusive rng = 0xFF. */
1442 return off + buffered_uint8(bitgen_state, bcnt, buf);
1443 } else {
1444 if (use_masked) {
1445 return off +
1446 buffered_bounded_masked_uint8(bitgen_state, rng, mask, bcnt, buf);
1447 } else {
1448 return off + buffered_bounded_lemire_uint8(bitgen_state, rng, bcnt, buf);
1449 }
1450 }
1451 }
1452
random_buffered_bounded_bool(bitgen_t * bitgen_state,npy_bool off,npy_bool rng,npy_bool mask,bool use_masked,int * bcnt,uint32_t * buf)1453 npy_bool random_buffered_bounded_bool(bitgen_t *bitgen_state, npy_bool off,
1454 npy_bool rng, npy_bool mask,
1455 bool use_masked, int *bcnt,
1456 uint32_t *buf) {
1457 return buffered_bounded_bool(bitgen_state, off, rng, mask, bcnt, buf);
1458 }
1459
1460 /*
1461 * Fills an array with cnt random npy_uint64 between off and off + rng
1462 * inclusive. The numbers wrap if rng is sufficiently large.
1463 */
random_bounded_uint64_fill(bitgen_t * bitgen_state,uint64_t off,uint64_t rng,npy_intp cnt,bool use_masked,uint64_t * out)1464 void random_bounded_uint64_fill(bitgen_t *bitgen_state, uint64_t off,
1465 uint64_t rng, npy_intp cnt, bool use_masked,
1466 uint64_t *out) {
1467 npy_intp i;
1468
1469 if (rng == 0) {
1470 for (i = 0; i < cnt; i++) {
1471 out[i] = off;
1472 }
1473 } else if (rng <= 0xFFFFFFFFUL) {
1474 /* Call 32-bit generator if range in 32-bit. */
1475
1476 /*
1477 * The 32-bit Lemire method does not handle rng=0xFFFFFFFF, so we'll
1478 * call next_uint32 directly. This also works when use_masked is True,
1479 * so we handle both cases here.
1480 */
1481 if (rng == 0xFFFFFFFFUL) {
1482 for (i = 0; i < cnt; i++) {
1483 out[i] = off + (uint64_t) next_uint32(bitgen_state);
1484 }
1485 } else {
1486 uint32_t buf = 0;
1487 int bcnt = 0;
1488
1489 if (use_masked) {
1490 /* Smallest bit mask >= max */
1491 uint64_t mask = gen_mask(rng);
1492
1493 for (i = 0; i < cnt; i++) {
1494 out[i] = off + buffered_bounded_masked_uint32(bitgen_state, rng, mask,
1495 &bcnt, &buf);
1496 }
1497 } else {
1498 for (i = 0; i < cnt; i++) {
1499 out[i] = off +
1500 buffered_bounded_lemire_uint32(bitgen_state, rng, &bcnt, &buf);
1501 }
1502 }
1503 }
1504 } else if (rng == 0xFFFFFFFFFFFFFFFFULL) {
1505 /* Lemire64 doesn't support rng = 0xFFFFFFFFFFFFFFFF. */
1506 for (i = 0; i < cnt; i++) {
1507 out[i] = off + next_uint64(bitgen_state);
1508 }
1509 } else {
1510 if (use_masked) {
1511 /* Smallest bit mask >= max */
1512 uint64_t mask = gen_mask(rng);
1513
1514 for (i = 0; i < cnt; i++) {
1515 out[i] = off + bounded_masked_uint64(bitgen_state, rng, mask);
1516 }
1517 } else {
1518 for (i = 0; i < cnt; i++) {
1519 out[i] = off + bounded_lemire_uint64(bitgen_state, rng);
1520 }
1521 }
1522 }
1523 }
1524
1525 /*
1526 * Fills an array with cnt random npy_uint32 between off and off + rng
1527 * inclusive. The numbers wrap if rng is sufficiently large.
1528 */
random_bounded_uint32_fill(bitgen_t * bitgen_state,uint32_t off,uint32_t rng,npy_intp cnt,bool use_masked,uint32_t * out)1529 void random_bounded_uint32_fill(bitgen_t *bitgen_state, uint32_t off,
1530 uint32_t rng, npy_intp cnt, bool use_masked,
1531 uint32_t *out) {
1532 npy_intp i;
1533 uint32_t buf = 0;
1534 int bcnt = 0;
1535
1536 if (rng == 0) {
1537 for (i = 0; i < cnt; i++) {
1538 out[i] = off;
1539 }
1540 } else if (rng == 0xFFFFFFFFUL) {
1541 /* Lemire32 doesn't support rng = 0xFFFFFFFF. */
1542 for (i = 0; i < cnt; i++) {
1543 out[i] = off + next_uint32(bitgen_state);
1544 }
1545 } else {
1546 if (use_masked) {
1547 /* Smallest bit mask >= max */
1548 uint32_t mask = (uint32_t)gen_mask(rng);
1549
1550 for (i = 0; i < cnt; i++) {
1551 out[i] = off + buffered_bounded_masked_uint32(bitgen_state, rng, mask,
1552 &bcnt, &buf);
1553 }
1554 } else {
1555 for (i = 0; i < cnt; i++) {
1556 out[i] = off +
1557 buffered_bounded_lemire_uint32(bitgen_state, rng, &bcnt, &buf);
1558 }
1559 }
1560 }
1561 }
1562
1563 /*
1564 * Fills an array with cnt random npy_uint16 between off and off + rng
1565 * inclusive. The numbers wrap if rng is sufficiently large.
1566 */
random_bounded_uint16_fill(bitgen_t * bitgen_state,uint16_t off,uint16_t rng,npy_intp cnt,bool use_masked,uint16_t * out)1567 void random_bounded_uint16_fill(bitgen_t *bitgen_state, uint16_t off,
1568 uint16_t rng, npy_intp cnt, bool use_masked,
1569 uint16_t *out) {
1570 npy_intp i;
1571 uint32_t buf = 0;
1572 int bcnt = 0;
1573
1574 if (rng == 0) {
1575 for (i = 0; i < cnt; i++) {
1576 out[i] = off;
1577 }
1578 } else if (rng == 0xFFFFUL) {
1579 /* Lemire16 doesn't support rng = 0xFFFF. */
1580 for (i = 0; i < cnt; i++) {
1581 out[i] = off + buffered_uint16(bitgen_state, &bcnt, &buf);
1582 }
1583 } else {
1584 if (use_masked) {
1585 /* Smallest bit mask >= max */
1586 uint16_t mask = (uint16_t)gen_mask(rng);
1587
1588 for (i = 0; i < cnt; i++) {
1589 out[i] = off + buffered_bounded_masked_uint16(bitgen_state, rng, mask,
1590 &bcnt, &buf);
1591 }
1592 } else {
1593 for (i = 0; i < cnt; i++) {
1594 out[i] = off +
1595 buffered_bounded_lemire_uint16(bitgen_state, rng, &bcnt, &buf);
1596 }
1597 }
1598 }
1599 }
1600
1601 /*
1602 * Fills an array with cnt random npy_uint8 between off and off + rng
1603 * inclusive. The numbers wrap if rng is sufficiently large.
1604 */
random_bounded_uint8_fill(bitgen_t * bitgen_state,uint8_t off,uint8_t rng,npy_intp cnt,bool use_masked,uint8_t * out)1605 void random_bounded_uint8_fill(bitgen_t *bitgen_state, uint8_t off, uint8_t rng,
1606 npy_intp cnt, bool use_masked, uint8_t *out) {
1607 npy_intp i;
1608 uint32_t buf = 0;
1609 int bcnt = 0;
1610
1611 if (rng == 0) {
1612 for (i = 0; i < cnt; i++) {
1613 out[i] = off;
1614 }
1615 } else if (rng == 0xFFUL) {
1616 /* Lemire8 doesn't support rng = 0xFF. */
1617 for (i = 0; i < cnt; i++) {
1618 out[i] = off + buffered_uint8(bitgen_state, &bcnt, &buf);
1619 }
1620 } else {
1621 if (use_masked) {
1622 /* Smallest bit mask >= max */
1623 uint8_t mask = (uint8_t)gen_mask(rng);
1624
1625 for (i = 0; i < cnt; i++) {
1626 out[i] = off + buffered_bounded_masked_uint8(bitgen_state, rng, mask,
1627 &bcnt, &buf);
1628 }
1629 } else {
1630 for (i = 0; i < cnt; i++) {
1631 out[i] =
1632 off + buffered_bounded_lemire_uint8(bitgen_state, rng, &bcnt, &buf);
1633 }
1634 }
1635 }
1636 }
1637
1638 /*
1639 * Fills an array with cnt random npy_bool between off and off + rng
1640 * inclusive.
1641 */
random_bounded_bool_fill(bitgen_t * bitgen_state,npy_bool off,npy_bool rng,npy_intp cnt,bool use_masked,npy_bool * out)1642 void random_bounded_bool_fill(bitgen_t *bitgen_state, npy_bool off,
1643 npy_bool rng, npy_intp cnt, bool use_masked,
1644 npy_bool *out) {
1645 npy_bool mask = 0;
1646 npy_intp i;
1647 uint32_t buf = 0;
1648 int bcnt = 0;
1649
1650 for (i = 0; i < cnt; i++) {
1651 out[i] = buffered_bounded_bool(bitgen_state, off, rng, mask, &bcnt, &buf);
1652 }
1653 }
1654
random_multinomial(bitgen_t * bitgen_state,RAND_INT_TYPE n,RAND_INT_TYPE * mnix,double * pix,npy_intp d,binomial_t * binomial)1655 void random_multinomial(bitgen_t *bitgen_state, RAND_INT_TYPE n,
1656 RAND_INT_TYPE *mnix, double *pix, npy_intp d,
1657 binomial_t *binomial) {
1658 double remaining_p = 1.0;
1659 npy_intp j;
1660 RAND_INT_TYPE dn = n;
1661 for (j = 0; j < (d - 1); j++) {
1662 mnix[j] = random_binomial(bitgen_state, pix[j] / remaining_p, dn, binomial);
1663 dn = dn - mnix[j];
1664 if (dn <= 0) {
1665 break;
1666 }
1667 remaining_p -= pix[j];
1668 }
1669 if (dn > 0) {
1670 mnix[d - 1] = dn;
1671 }
1672 }
1673