1
2 /* Our API for random numbers.
3 *
4 * We can use ISAAC, ChaCha20, or something else.
5 *
6 * 3700 ns/word ChaCha20 in Perl
7 * 3100 ns/word Salsa20 in Perl
8 * 1600 ns/word ChaCha8 in Perl
9 * 760 ns/word ISAAC in Perl
10 *
11 * 11.20 ns/word ChaCha20 (openbsd)
12 * 10.31 ns/word ChaCha20 (dj)
13 * 8.66 ns/word ChaCha20 (sse2 Peters)
14 * 6.85 ns/word ChaCha12 (dj)
15 * 5.99 ns/word Tyche
16 * 5.11 ns/word ChaCha8 (dj)
17 * 4.37 ns/word MT19937 (Cokus)
18 * 4.14 ns/word Tyche-i
19 * 3.26 ns/word ISAAC
20 * 3.18 ns/word PCG64 (64-bit state, 64-bit types)
21 * 1.95 ns/word PCG64 (64-bit state, 128-bit types)
22 * 1.84 ns/word ChaCha20 (AVX2 chacha-opt)
23 * 1.48 ns/word Xoroshiro128+
24 * 1.16 ns/word SplitMix64
25 *
26 * These functions do locking, the underlying library does not.
27 */
28
29 #include <stdio.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include "ptypes.h"
33 #include "csprng.h"
34
35 #include "chacha.h"
36 #define SEED_BYTES (32+8)
37 #define CSEED(ctx,bytes,data,good) chacha_seed(ctx,bytes,data,good)
38 #define CRBYTES(ctx,bytes,data) chacha_rand_bytes(ctx,bytes,data)
39 #define CIRAND32(ctx) chacha_irand32(ctx)
40 #define CIRAND64(ctx) chacha_irand64(ctx)
41 #define CSELFTEST() chacha_selftest()
42
43 /* Helper macros, similar to ChaCha, so we're consistent. */
44 #ifndef U8TO32_LE
45 #define U8TO32_LE(p) \
46 (((uint32_t)((p)[0]) ) | \
47 ((uint32_t)((p)[1]) << 8) | \
48 ((uint32_t)((p)[2]) << 16) | \
49 ((uint32_t)((p)[3]) << 24))
50 #endif
51 #define U32TO8_LE(p, v) \
52 do { \
53 uint32_t _v = v; \
54 (p)[0] = (((_v) ) & 0xFFU); \
55 (p)[1] = (((_v) >> 8) & 0xFFU); \
56 (p)[2] = (((_v) >> 16) & 0xFFU); \
57 (p)[3] = (((_v) >> 24) & 0xFFU); \
58 } while (0)
59
60 /*****************************************************************************/
61
62 /* We put a simple 32-bit non-CS PRNG here to help fill small seeds. */
63 #if 0
64 /* XOSHIRO128** 32-bit output, 32-bit types, 128-bit state */
65 static INLINE uint32_t rotl(const uint32_t x, int k) {
66 return (x << k) | (x >> (32 - k));
67 }
68 uint32_t prng_next(char* ctx) {
69 uint32_t *s = (uint32_t*) ctx;
70 const uint32_t result_starstar = rotl(s[0] * 5, 7) * 9;
71 const uint32_t t = s[1] << 9;
72 s[2] ^= s[0]; s[3] ^= s[1]; s[1] ^= s[2]; s[0] ^= s[3];
73 s[2] ^= t;
74 s[3] = rotl(s[3], 11);
75 return result_starstar;
76 }
77 char* prng_new(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
78 uint32_t *state;
79 New(0, state, 4, uint32_t);
80 state[0] = 1; state[1] = b; state[2] = c; state[3] = d;
81 (void) prng_next((char*)state);
82 state[0] += a;
83 (void) prng_next((char*)state);
84 return (char*) state;
85 }
86 #else
87 /* PCG RXS M XS 32. 32-bit output, 32-bit state and types. */
prng_next(char * ctx)88 uint32_t prng_next(char* ctx) {
89 uint32_t *rng = (uint32_t*) ctx;
90 uint32_t word, oldstate = rng[0];
91 rng[0] = rng[0] * 747796405U + rng[1];
92 word = ((oldstate >> ((oldstate >> 28u) + 4u)) ^ oldstate) * 277803737u;
93 return (word >> 22u) ^ word;
94 }
prng_new(uint32_t a,uint32_t b,uint32_t c,uint32_t d)95 char* prng_new(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
96 uint32_t *state;
97 New(0, state, 2, uint32_t);
98 state[0] = 0U;
99 state[1] = (b << 1u) | 1u;
100 (void) prng_next((char*)state);
101 state[0] += a;
102 (void) prng_next((char*)state);
103 state[0] ^= c;
104 (void) prng_next((char*)state);
105 state[0] ^= d;
106 (void) prng_next((char*)state);
107 return (char*) state;
108 }
109 #endif
110
111 /*****************************************************************************/
112
csprng_context_size(void)113 uint32_t csprng_context_size(void)
114 {
115 return sizeof(chacha_context_t);
116 }
117 static char _has_selftest_run = 0;
118
csprng_seed(void * ctx,uint32_t bytes,const unsigned char * data)119 void csprng_seed(void *ctx, uint32_t bytes, const unsigned char* data)
120 {
121 unsigned char seed[SEED_BYTES + 4];
122
123 /* If given a short seed, minimize zeros in state */
124 if (bytes >= SEED_BYTES) {
125 memcpy(seed, data, SEED_BYTES);
126 } else {
127 char* rng;
128 uint32_t a, b, c, d, i;
129 memcpy(seed, data, bytes);
130 memset(seed+bytes, 0, sizeof(seed)-bytes);
131 a = U8TO32_LE((seed + 0));
132 b = U8TO32_LE((seed + 4));
133 c = U8TO32_LE((seed + 8));
134 d = U8TO32_LE((seed + 12));
135 rng = prng_new(a,b,c,d);
136 for (i = 4*((bytes+3)/4); i < SEED_BYTES; i += 4)
137 U32TO8_LE(seed + i, prng_next(rng));
138 Safefree(rng);
139 #if 0
140 printf("got %u bytes in expanded to %u\n", bytes, SEED_BYTES);
141 printf("from: ");for(i=0;i<bytes;i++)printf("%02x",data[i]);printf("\n");
142 printf("to: ");for(i=0;i<SEED_BYTES;i++)printf("%02x",seed[i]);printf("\n");
143 #endif
144 }
145
146 if (!_has_selftest_run) {
147 _has_selftest_run = 1;
148 CSELFTEST();
149 }
150 CSEED(ctx, SEED_BYTES, seed, (bytes >= 16));
151 }
152
csprng_srand(void * ctx,UV insecure_seed)153 extern void csprng_srand(void* ctx, UV insecure_seed)
154 {
155 #if BITS_PER_WORD == 32
156 unsigned char seed[4] = {0};
157 U32TO8_LE(seed, insecure_seed);
158 csprng_seed(ctx, 4, seed);
159 #else
160 unsigned char seed[8] = {0};
161 if (insecure_seed <= UVCONST(4294967295)) {
162 U32TO8_LE(seed, insecure_seed);
163 csprng_seed(ctx, 4, seed);
164 } else {
165 U32TO8_LE(seed, insecure_seed);
166 U32TO8_LE(seed + 4, (insecure_seed >> 32));
167 csprng_seed(ctx, 8, seed);
168 }
169 #endif
170 }
171
csprng_rand_bytes(void * ctx,uint32_t bytes,unsigned char * data)172 void csprng_rand_bytes(void* ctx, uint32_t bytes, unsigned char* data)
173 {
174 CRBYTES(ctx, bytes, data);
175 }
176
irand32(void * ctx)177 uint32_t irand32(void* ctx)
178 {
179 return CIRAND32(ctx);
180 }
irand64(void * ctx)181 UV irand64(void* ctx)
182 {
183 #if BITS_PER_WORD < 64
184 croak("irand64 too many bits for UV");
185 #else
186 return CIRAND64(ctx);
187 #endif
188 }
189
190
191 /*****************************************************************************/
192
is_csprng_well_seeded(void * ctx)193 int is_csprng_well_seeded(void *ctx)
194 {
195 chacha_context_t *cs = ctx;
196 return cs->goodseed;
197 }
198
199 /* There are many ways to get floats from integers. A few good, many bad.
200 *
201 * Vigna recommends (x64 >> 11) * (1.0 / (1ULL<<53)).
202 * http://xoroshiro.di.unimi.it
203 * Also see alternatives discussed:
204 * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/speed-up-real.html
205 *
206 * Melissa O'Neill notes the problem is harder than it looks, doesn't address.
207 * http://www.pcg-random.org/pdf/toms-oneill-pcg-family-v1.02.pdf
208 *
209 * randomstate for numpy uses separate code for each generator.
210 * With the exception of dSFMT, they each one one of:
211 * (x64 >> 11) * (1 / 9007199254740992.0)
212 * ((x32 >> 5) * 67108864.0 + (y32 >> 6)) / 9007199254740992.0
213 * where the first one is identical to Vigna.
214 *
215 * David Jones recommends the minor 32-bit variant:
216 * ((x32 >> 6) * 134217728.0 + (y32 >> 5)) / 9007199254740992.0
217 * http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf
218 *
219 * Taylor Campbell discusses this in:
220 * http://mumble.net/~campbell/tmp/random_real.c
221 * He points out that there are two common non-broken choices,
222 * div by 2^-53 or div by 2^-64, and each are slightly flawed in
223 * different ways. He shows a theoretically better method.
224 */
225
226 /*
227 * We prefer the x64 / 2^-64 method. It seems to produce the best results
228 * and is easiest for ensuring we fill up all the bits.
229 * It is similar to what Geoff Kuenning does in MTwist, though he computes
230 * the constants at runtime to ensure a dodgy compiler won't munge them.
231 */
232 #define TO_NV_32 2.3283064365386962890625000000000000000E-10L
233 #define TO_NV_64 5.4210108624275221700372640043497085571E-20L
234 #define TO_NV_96 1.2621774483536188886587657044524579675E-29L
235 #define TO_NV_128 2.9387358770557187699218413430556141945E-39L
236
237 #define DRAND_32_32 (CIRAND32(ctx) * TO_NV_32)
238 #define DRAND_64_32 (((CIRAND32(ctx)>>5) * 67108864.0 + (CIRAND32(ctx)>>6)) / 9007199254740992.0)
239 #define DRAND_64_64 (CIRAND64(ctx) * TO_NV_64)
240 #define DRAND_128_32 (CIRAND32(ctx) * TO_NV_32 + CIRAND32(ctx) * TO_NV_64 + CIRAND32(ctx) * TO_NV_96 + CIRAND32(ctx) * TO_NV_128)
241 #define DRAND_128_64 (CIRAND64(ctx) * TO_NV_64 + CIRAND64(ctx) * TO_NV_128)
242
drand64(void * ctx)243 NV drand64(void* ctx)
244 {
245 NV r;
246 #if NVMANTBITS <= 32
247 r = DRAND_32_32;
248 #elif NVMANTBITS <= 64
249 r = (BITS_PER_WORD <= 32) ? DRAND_64_32 : DRAND_64_64;
250 #else
251 r = (BITS_PER_WORD <= 32) ? DRAND_128_32 : DRAND_128_64;
252 #endif
253 return r;
254 }
255
256 /* Return rand 32-bit integer between 0 to n-1 inclusive */
urandomm32(void * ctx,uint32_t n)257 uint32_t urandomm32(void *ctx, uint32_t n)
258 {
259 uint32_t r, rmin;
260
261 if (n <= 1)
262 return 0;
263
264 rmin = -n % n;
265 while (1) {
266 r = CIRAND32(ctx);
267 if (r >= rmin)
268 break;
269 }
270 return r % n;
271 }
272
urandomm64(void * ctx,UV n)273 UV urandomm64(void* ctx, UV n)
274 {
275 UV r, rmin;
276
277 if (n <= 4294967295UL)
278 return urandomm32(ctx,n);
279 if (n-1 == 4294967295UL)
280 return irand32(ctx);
281
282 rmin = -n % n;
283 while (1) {
284 r = CIRAND64(ctx);
285 if (r >= rmin)
286 break;
287 }
288 return r % n;
289 }
290
urandomb(void * ctx,int nbits)291 UV urandomb(void* ctx, int nbits)
292 {
293 if (nbits == 0) {
294 return 0;
295 } else if (nbits <= 32) {
296 return irand32(ctx) >> (32-nbits);
297 #if BITS_PER_WORD == 64
298 } else if (nbits <= 64) {
299 return irand64(ctx) >> (64-nbits);
300 #endif
301 }
302 croak("irand64 too many bits for UV");
303 }
304