1 
2 /* Our API for random numbers.
3  *
4  * We can use ISAAC, ChaCha20, or something else.
5  *
6  * 3700    ns/word  ChaCha20 in Perl
7  * 3100    ns/word  Salsa20 in Perl
8  * 1600    ns/word  ChaCha8 in Perl
9  *  760    ns/word  ISAAC in Perl
10  *
11  *   11.20 ns/word  ChaCha20 (openbsd)
12  *   10.31 ns/word  ChaCha20 (dj)
13  *    8.66 ns/word  ChaCha20 (sse2 Peters)
14  *    6.85 ns/word  ChaCha12 (dj)
15  *    5.99 ns/word  Tyche
16  *    5.11 ns/word  ChaCha8 (dj)
17  *    4.37 ns/word  MT19937 (Cokus)
18  *    4.14 ns/word  Tyche-i
19  *    3.26 ns/word  ISAAC
20  *    3.18 ns/word  PCG64 (64-bit state, 64-bit types)
21  *    1.95 ns/word  PCG64 (64-bit state, 128-bit types)
22  *    1.84 ns/word  ChaCha20 (AVX2 chacha-opt)
23  *    1.48 ns/word  Xoroshiro128+
24  *    1.16 ns/word  SplitMix64
25  *
26  * These functions do locking, the underlying library does not.
27  */
28 
29 #include <stdio.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include "ptypes.h"
33 #include "csprng.h"
34 
35 #include "chacha.h"
36 #define SEED_BYTES (32+8)
37 #define CSEED(ctx,bytes,data,good)  chacha_seed(ctx,bytes,data,good)
38 #define CRBYTES(ctx,bytes,data)     chacha_rand_bytes(ctx,bytes,data)
39 #define CIRAND32(ctx)               chacha_irand32(ctx)
40 #define CIRAND64(ctx)               chacha_irand64(ctx)
41 #define CSELFTEST()                 chacha_selftest()
42 
43 /* Helper macros, similar to ChaCha, so we're consistent. */
44 #ifndef U8TO32_LE
45 #define U8TO32_LE(p) \
46   (((uint32_t)((p)[0])      ) | \
47    ((uint32_t)((p)[1]) <<  8) | \
48    ((uint32_t)((p)[2]) << 16) | \
49    ((uint32_t)((p)[3]) << 24))
50 #endif
51 #define U32TO8_LE(p, v) \
52   do { \
53     uint32_t _v = v; \
54     (p)[0] = (((_v)      ) & 0xFFU); \
55     (p)[1] = (((_v) >>  8) & 0xFFU); \
56     (p)[2] = (((_v) >> 16) & 0xFFU); \
57     (p)[3] = (((_v) >> 24) & 0xFFU); \
58   } while (0)
59 
60 /*****************************************************************************/
61 
62 /* We put a simple 32-bit non-CS PRNG here to help fill small seeds. */
63 #if 0
64 /* XOSHIRO128**  32-bit output, 32-bit types, 128-bit state */
65 static INLINE uint32_t rotl(const uint32_t x, int k) {
66   return (x << k) | (x >> (32 - k));
67 }
68 uint32_t prng_next(char* ctx) {
69   uint32_t *s = (uint32_t*) ctx;
70   const uint32_t result_starstar = rotl(s[0] * 5, 7) * 9;
71   const uint32_t t = s[1] << 9;
72   s[2] ^= s[0];  s[3] ^= s[1];  s[1] ^= s[2];  s[0] ^= s[3];
73   s[2] ^= t;
74   s[3] = rotl(s[3], 11);
75   return result_starstar;
76 }
77 char* prng_new(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
78   uint32_t *state;
79   New(0, state, 4, uint32_t);
80   state[0] = 1;  state[1] = b;  state[2] = c;  state[3] = d;
81   (void) prng_next((char*)state);
82   state[0] += a;
83   (void) prng_next((char*)state);
84   return (char*) state;
85 }
86 #else
87 /* PCG RXS M XS 32.  32-bit output, 32-bit state and types. */
prng_next(char * ctx)88 uint32_t prng_next(char* ctx) {
89   uint32_t *rng = (uint32_t*) ctx;
90   uint32_t word, oldstate = rng[0];
91   rng[0] = rng[0] * 747796405U + rng[1];
92   word = ((oldstate >> ((oldstate >> 28u) + 4u)) ^ oldstate) * 277803737u;
93   return (word >> 22u) ^ word;
94 }
prng_new(uint32_t a,uint32_t b,uint32_t c,uint32_t d)95 char* prng_new(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
96   uint32_t *state;
97   New(0, state, 2, uint32_t);
98   state[0] = 0U;
99   state[1] = (b << 1u) | 1u;
100   (void) prng_next((char*)state);
101   state[0] += a;
102   (void) prng_next((char*)state);
103   state[0] ^= c;
104   (void) prng_next((char*)state);
105   state[0] ^= d;
106   (void) prng_next((char*)state);
107   return (char*) state;
108 }
109 #endif
110 
111 /*****************************************************************************/
112 
csprng_context_size(void)113 uint32_t csprng_context_size(void)
114 {
115   return sizeof(chacha_context_t);
116 }
117 static char _has_selftest_run = 0;
118 
csprng_seed(void * ctx,uint32_t bytes,const unsigned char * data)119 void csprng_seed(void *ctx, uint32_t bytes, const unsigned char* data)
120 {
121   unsigned char seed[SEED_BYTES + 4];
122 
123   /* If given a short seed, minimize zeros in state */
124   if (bytes >= SEED_BYTES) {
125     memcpy(seed, data, SEED_BYTES);
126   } else {
127     char* rng;
128     uint32_t a, b, c, d, i;
129     memcpy(seed, data, bytes);
130     memset(seed+bytes, 0, sizeof(seed)-bytes);
131     a = U8TO32_LE((seed +  0));
132     b = U8TO32_LE((seed +  4));
133     c = U8TO32_LE((seed +  8));
134     d = U8TO32_LE((seed + 12));
135     rng = prng_new(a,b,c,d);
136     for (i = 4*((bytes+3)/4); i < SEED_BYTES; i += 4)
137       U32TO8_LE(seed + i, prng_next(rng));
138     Safefree(rng);
139 #if 0
140     printf("got %u bytes in expanded to %u\n", bytes, SEED_BYTES);
141     printf("from: ");for(i=0;i<bytes;i++)printf("%02x",data[i]);printf("\n");
142     printf("to:   ");for(i=0;i<SEED_BYTES;i++)printf("%02x",seed[i]);printf("\n");
143 #endif
144   }
145 
146   if (!_has_selftest_run) {
147     _has_selftest_run = 1;
148     CSELFTEST();
149   }
150   CSEED(ctx, SEED_BYTES, seed, (bytes >= 16));
151 }
152 
csprng_srand(void * ctx,UV insecure_seed)153 extern void csprng_srand(void* ctx, UV insecure_seed)
154 {
155 #if BITS_PER_WORD == 32
156   unsigned char seed[4] = {0};
157   U32TO8_LE(seed, insecure_seed);
158   csprng_seed(ctx, 4, seed);
159 #else
160   unsigned char seed[8] = {0};
161   if (insecure_seed <= UVCONST(4294967295)) {
162     U32TO8_LE(seed, insecure_seed);
163     csprng_seed(ctx, 4, seed);
164   } else {
165     U32TO8_LE(seed, insecure_seed);
166     U32TO8_LE(seed + 4, (insecure_seed >> 32));
167     csprng_seed(ctx, 8, seed);
168   }
169 #endif
170 }
171 
csprng_rand_bytes(void * ctx,uint32_t bytes,unsigned char * data)172 void csprng_rand_bytes(void* ctx, uint32_t bytes, unsigned char* data)
173 {
174   CRBYTES(ctx, bytes, data);
175 }
176 
irand32(void * ctx)177 uint32_t irand32(void* ctx)
178 {
179   return CIRAND32(ctx);
180 }
irand64(void * ctx)181 UV irand64(void* ctx)
182 {
183 #if BITS_PER_WORD < 64
184   croak("irand64 too many bits for UV");
185 #else
186   return CIRAND64(ctx);
187 #endif
188 }
189 
190 
191 /*****************************************************************************/
192 
is_csprng_well_seeded(void * ctx)193 int is_csprng_well_seeded(void *ctx)
194 {
195   chacha_context_t *cs = ctx;
196   return cs->goodseed;
197 }
198 
199 /* There are many ways to get floats from integers.  A few good, many bad.
200  *
201  * Vigna recommends (x64 >> 11) * (1.0 / (1ULL<<53)).
202  * http://xoroshiro.di.unimi.it
203  * Also see alternatives discussed:
204  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/speed-up-real.html
205  *
206  * Melissa O'Neill notes the problem is harder than it looks, doesn't address.
207  * http://www.pcg-random.org/pdf/toms-oneill-pcg-family-v1.02.pdf
208  *
209  * randomstate for numpy uses separate code for each generator.
210  * With the exception of dSFMT, they each one one of:
211  *     (x64 >> 11) * (1 / 9007199254740992.0)
212  *     ((x32 >> 5) * 67108864.0 + (y32 >> 6)) / 9007199254740992.0
213  * where the first one is identical to Vigna.
214  *
215  * David Jones recommends the minor 32-bit variant:
216  *     ((x32 >> 6) * 134217728.0 + (y32 >> 5)) / 9007199254740992.0
217  * http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf
218  *
219  * Taylor Campbell discusses this in:
220  * http://mumble.net/~campbell/tmp/random_real.c
221  * He points out that there are two common non-broken choices,
222  * div by 2^-53  or   div by 2^-64, and each are slightly flawed in
223  * different ways.  He shows a theoretically better method.
224  */
225 
226 /*
227  * We prefer the x64 / 2^-64 method.  It seems to produce the best results
228  * and is easiest for ensuring we fill up all the bits.
229  * It is similar to what Geoff Kuenning does in MTwist, though he computes
230  * the constants at runtime to ensure a dodgy compiler won't munge them.
231  */
232 #define TO_NV_32    2.3283064365386962890625000000000000000E-10L
233 #define TO_NV_64    5.4210108624275221700372640043497085571E-20L
234 #define TO_NV_96    1.2621774483536188886587657044524579675E-29L
235 #define TO_NV_128   2.9387358770557187699218413430556141945E-39L
236 
237 #define DRAND_32_32  (CIRAND32(ctx) * TO_NV_32)
238 #define DRAND_64_32  (((CIRAND32(ctx)>>5) * 67108864.0 + (CIRAND32(ctx)>>6)) / 9007199254740992.0)
239 #define DRAND_64_64  (CIRAND64(ctx) * TO_NV_64)
240 #define DRAND_128_32 (CIRAND32(ctx) * TO_NV_32 + CIRAND32(ctx) * TO_NV_64 + CIRAND32(ctx) * TO_NV_96 + CIRAND32(ctx) * TO_NV_128)
241 #define DRAND_128_64 (CIRAND64(ctx) * TO_NV_64 + CIRAND64(ctx) * TO_NV_128)
242 
drand64(void * ctx)243 NV drand64(void* ctx)
244 {
245   NV r;
246 #if NVMANTBITS <= 32
247   r = DRAND_32_32;
248 #elif NVMANTBITS <= 64
249   r = (BITS_PER_WORD <= 32)  ?  DRAND_64_32  :  DRAND_64_64;
250 #else
251   r = (BITS_PER_WORD <= 32)  ?  DRAND_128_32 :  DRAND_128_64;
252 #endif
253   return r;
254 }
255 
256 /* Return rand 32-bit integer between 0 to n-1 inclusive */
urandomm32(void * ctx,uint32_t n)257 uint32_t urandomm32(void *ctx, uint32_t n)
258 {
259   uint32_t r, rmin;
260 
261   if (n <= 1)
262     return 0;
263 
264   rmin = -n % n;
265   while (1) {
266     r = CIRAND32(ctx);
267     if (r >= rmin)
268       break;
269   }
270   return r % n;
271 }
272 
urandomm64(void * ctx,UV n)273 UV urandomm64(void* ctx, UV n)
274 {
275   UV r, rmin;
276 
277   if (n <= 4294967295UL)
278     return urandomm32(ctx,n);
279   if (n-1 == 4294967295UL)
280     return irand32(ctx);
281 
282   rmin = -n % n;
283   while (1) {
284     r = CIRAND64(ctx);
285     if (r >= rmin)
286       break;
287   }
288   return r % n;
289 }
290 
urandomb(void * ctx,int nbits)291 UV urandomb(void* ctx, int nbits)
292 {
293   if (nbits == 0) {
294     return 0;
295   } else if (nbits <= 32) {
296     return irand32(ctx) >> (32-nbits);
297 #if BITS_PER_WORD == 64
298   } else if (nbits <= 64) {
299     return irand64(ctx) >> (64-nbits);
300 #endif
301   }
302   croak("irand64 too many bits for UV");
303 }
304