1 /* Copyright (C) 2010 The Trustees of Indiana University. */
2 /* */
3 /* Use, modification and distribution is subject to the Boost Software */
4 /* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at */
5 /* http://www.boost.org/LICENSE_1_0.txt) */
6 /* */
7 /* Authors: Jeremiah Willcock */
8 /* Andrew Lumsdaine */
9
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #define __STDC_FORMAT_MACROS
14 #include <inttypes.h>
15 #include "mod_arith.h"
16 #include "splittable_mrg.h"
17
18 /* Multiple recursive generator from L'Ecuyer, P., Blouin, F., and */
19 /* Couture, R. 1993. A search for good multiple recursive random number */
20 /* generators. ACM Trans. Model. Comput. Simul. 3, 2 (Apr. 1993), 87-98. */
21 /* DOI= http://doi.acm.org/10.1145/169702.169698 -- particular generator */
22 /* used is from table 3, entry for m = 2^31 - 1, k = 5 (same generator */
23 /* is used in GNU Scientific Library). */
24 /* */
25 /* MRG state is 5 numbers mod 2^31 - 1, and there is a transition matrix */
26 /* A from one state to the next: */
27 /* */
28 /* A = [x 0 0 0 y] */
29 /* [1 0 0 0 0] */
30 /* [0 1 0 0 0] */
31 /* [0 0 1 0 0] */
32 /* [0 0 0 1 0] */
33 /* where x = 107374182 and y = 104480 */
34 /* */
35 /* To do leapfrogging (applying multiple steps at once so that we can */
36 /* create a tree of generators), we need powers of A. These (from an */
37 /* analysis with Maple) all look like: */
38 /* */
39 /* let a = x * s + t */
40 /* b = x * a + u */
41 /* c = x * b + v */
42 /* d = x * c + w in */
43 /* A^n = [d s*y a*y b*y c*y] */
44 /* [c w s*y a*y b*y] */
45 /* [b v w s*y a*y] */
46 /* [a u v w s*y] */
47 /* [s t u v w ] */
48 /* for some values of s, t, u, v, and w */
49 /* Note that A^n is determined by its bottom row (and x and y, which are */
50 /* fixed), and that it has a large part that is a Toeplitz matrix. You */
51 /* can multiply two A-like matrices by: */
52 /* (defining a..d1 and a..d2 for the two matrices) */
53 /* s3 = s1 d2 + t1 c2 + u1 b2 + v1 a2 + w1 s2, */
54 /* t3 = s1 s2 y + t1 w2 + u1 v2 + v1 u2 + w1 t2, */
55 /* u3 = s1 a2 y + t1 s2 y + u1 w2 + v1 v2 + w1 u2, */
56 /* v3 = s1 b2 y + t1 a2 y + u1 s2 y + v1 w2 + w1 v2, */
57 /* w3 = s1 c2 y + t1 b2 y + u1 a2 y + v1 s2 y + w1 w2 */
58
59 #ifdef _MSC_VER
mrg_update_cache(mrg_transition_matrix * __restrict p)60 static void mrg_update_cache(mrg_transition_matrix* __restrict p) { /* Set a, b, c, and d */
61 #else
62 static void mrg_update_cache(mrg_transition_matrix* restrict p) { /* Set a, b, c, and d */
63 #endif
64 p->a = mod_add(mod_mul_x(p->s), p->t);
65 p->b = mod_add(mod_mul_x(p->a), p->u);
66 p->c = mod_add(mod_mul_x(p->b), p->v);
67 p->d = mod_add(mod_mul_x(p->c), p->w);
68 }
69
70 static void mrg_make_identity(mrg_transition_matrix* result) {
71 result->s = result->t = result->u = result->v = 0;
72 result->w = 1;
73 mrg_update_cache(result);
74 }
75
76 static void mrg_make_A(mrg_transition_matrix* result) { /* Initial RNG transition matrix */
77 result->s = result->t = result->u = result->w = 0;
78 result->v = 1;
79 mrg_update_cache(result);
80 }
81
82 /* Multiply two transition matrices; result may alias either/both inputs. */
83 #ifdef _MSC_VER
84 static void mrg_multiply(const mrg_transition_matrix* __restrict m, const mrg_transition_matrix* __restrict n, mrg_transition_matrix* result) {
85 #else
86 static void mrg_multiply(const mrg_transition_matrix* restrict m, const mrg_transition_matrix* restrict n, mrg_transition_matrix* result) {
87 #endif
88 uint_least32_t rs = mod_mac(mod_mac(mod_mac(mod_mac(mod_mul(m->s, n->d), m->t, n->c), m->u, n->b), m->v, n->a), m->w, n->s);
89 uint_least32_t rt = mod_mac(mod_mac(mod_mac(mod_mac(mod_mul_y(mod_mul(m->s, n->s)), m->t, n->w), m->u, n->v), m->v, n->u), m->w, n->t);
90 uint_least32_t ru = mod_mac(mod_mac(mod_mac(mod_mul_y(mod_mac(mod_mul(m->s, n->a), m->t, n->s)), m->u, n->w), m->v, n->v), m->w, n->u);
91 uint_least32_t rv = mod_mac(mod_mac(mod_mul_y(mod_mac(mod_mac(mod_mul(m->s, n->b), m->t, n->a), m->u, n->s)), m->v, n->w), m->w, n->v);
92 uint_least32_t rw = mod_mac(mod_mul_y(mod_mac(mod_mac(mod_mac(mod_mul(m->s, n->c), m->t, n->b), m->u, n->a), m->v, n->s)), m->w, n->w);
93 result->s = rs;
94 result->t = rt;
95 result->u = ru;
96 result->v = rv;
97 result->w = rw;
98 mrg_update_cache(result);
99 }
100
101 /* No aliasing allowed */
102 #ifdef _MSC_VER
103 static void mrg_power(const mrg_transition_matrix* __restrict m, unsigned int exponent, mrg_transition_matrix* __restrict result) {
104 #else
105 static void mrg_power(const mrg_transition_matrix* restrict m, unsigned int exponent, mrg_transition_matrix* restrict result) {
106 #endif
107 mrg_transition_matrix current_power_of_2 = *m;
108 mrg_make_identity(result);
109 while (exponent > 0) {
110 if (exponent % 2 == 1) mrg_multiply(result, ¤t_power_of_2, result);
111 mrg_multiply(¤t_power_of_2, ¤t_power_of_2, ¤t_power_of_2);
112 exponent /= 2;
113 }
114 }
115
116 /* r may alias st */
117 #ifdef __MTA__
118 #pragma mta inline
119 #endif
120 #ifdef _MSC_VER
121 static void mrg_apply_transition(const mrg_transition_matrix* __restrict mat, const mrg_state* __restrict st, mrg_state* r) {
122 #else
123 static void mrg_apply_transition(const mrg_transition_matrix* restrict mat, const mrg_state* restrict st, mrg_state* r) {
124 #endif
125 #ifdef __MTA__
126 uint_fast64_t s = mat->s;
127 uint_fast64_t t = mat->t;
128 uint_fast64_t u = mat->u;
129 uint_fast64_t v = mat->v;
130 uint_fast64_t w = mat->w;
131 uint_fast64_t z1 = st->z1;
132 uint_fast64_t z2 = st->z2;
133 uint_fast64_t z3 = st->z3;
134 uint_fast64_t z4 = st->z4;
135 uint_fast64_t z5 = st->z5;
136 uint_fast64_t temp = s * z1 + t * z2 + u * z3 + v * z4;
137 r->z5 = mod_down(mod_down_fast(temp) + w * z5);
138 uint_fast64_t a = mod_down(107374182 * s + t);
139 uint_fast64_t sy = mod_down(104480 * s);
140 r->z4 = mod_down(mod_down_fast(a * z1 + u * z2 + v * z3) + w * z4 + sy * z5);
141 uint_fast64_t b = mod_down(107374182 * a + u);
142 uint_fast64_t ay = mod_down(104480 * a);
143 r->z3 = mod_down(mod_down_fast(b * z1 + v * z2 + w * z3) + sy * z4 + ay * z5);
144 uint_fast64_t c = mod_down(107374182 * b + v);
145 uint_fast64_t by = mod_down(104480 * b);
146 r->z2 = mod_down(mod_down_fast(c * z1 + w * z2 + sy * z3) + ay * z4 + by * z5);
147 uint_fast64_t d = mod_down(107374182 * c + w);
148 uint_fast64_t cy = mod_down(104480 * c);
149 r->z1 = mod_down(mod_down_fast(d * z1 + sy * z2 + ay * z3) + by * z4 + cy * z5);
150 /* A^n = [d s*y a*y b*y c*y] */
151 /* [c w s*y a*y b*y] */
152 /* [b v w s*y a*y] */
153 /* [a u v w s*y] */
154 /* [s t u v w ] */
155 #else
156 uint_fast32_t o1 = mod_mac_y(mod_mul(mat->d, st->z1), mod_mac4(0, mat->s, st->z2, mat->a, st->z3, mat->b, st->z4, mat->c, st->z5));
157 uint_fast32_t o2 = mod_mac_y(mod_mac2(0, mat->c, st->z1, mat->w, st->z2), mod_mac3(0, mat->s, st->z3, mat->a, st->z4, mat->b, st->z5));
158 uint_fast32_t o3 = mod_mac_y(mod_mac3(0, mat->b, st->z1, mat->v, st->z2, mat->w, st->z3), mod_mac2(0, mat->s, st->z4, mat->a, st->z5));
159 uint_fast32_t o4 = mod_mac_y(mod_mac4(0, mat->a, st->z1, mat->u, st->z2, mat->v, st->z3, mat->w, st->z4), mod_mul(mat->s, st->z5));
160 uint_fast32_t o5 = mod_mac2(mod_mac3(0, mat->s, st->z1, mat->t, st->z2, mat->u, st->z3), mat->v, st->z4, mat->w, st->z5);
161 r->z1 = o1;
162 r->z2 = o2;
163 r->z3 = o3;
164 r->z4 = o4;
165 r->z5 = o5;
166 #endif
167 }
168
169 #ifdef __MTA__
170 #pragma mta inline
171 #endif
172 static void mrg_step(const mrg_transition_matrix* mat, mrg_state* state) {
173 mrg_apply_transition(mat, state, state);
174 }
175
176 #ifdef __MTA__
177 #pragma mta inline
178 #endif
179 static void mrg_orig_step(mrg_state* state) { /* Use original A, not fully optimized yet */
180 uint_fast32_t new_elt = mod_mac2(0, 107374182, state->z1, 104480, state->z5);
181 state->z5 = state->z4;
182 state->z4 = state->z3;
183 state->z3 = state->z2;
184 state->z2 = state->z1;
185 state->z1 = new_elt;
186 }
187
188 extern const mrg_transition_matrix mrg_skip_matrices[][256]; /* In generated mrg_transitions.c */
189
190 void mrg_skip(mrg_state* state, uint_least64_t exponent_high, uint_least64_t exponent_middle, uint_least64_t exponent_low) {
191 /* fprintf(stderr, "skip(%016" PRIXLEAST64 "%016" PRIXLEAST64 "%016" PRIXLEAST64 ")\n", exponent_high, exponent_middle, exponent_low); */
192 int byte_index;
193 for (byte_index = 0; exponent_low; ++byte_index, exponent_low >>= 8) {
194 uint_least8_t val = (uint_least8_t)(exponent_low & 0xFF);
195 if (val != 0) mrg_step(&mrg_skip_matrices[byte_index][val], state);
196 }
197 for (byte_index = 8; exponent_middle; ++byte_index, exponent_middle >>= 8) {
198 uint_least8_t val = (uint_least8_t)(exponent_middle & 0xFF);
199 if (val != 0) mrg_step(&mrg_skip_matrices[byte_index][val], state);
200 }
201 for (byte_index = 16; exponent_high; ++byte_index, exponent_high >>= 8) {
202 uint_least8_t val = (uint_least8_t)(exponent_high & 0xFF);
203 if (val != 0) mrg_step(&mrg_skip_matrices[byte_index][val], state);
204 }
205 }
206
207 #ifdef DUMP_TRANSITION_TABLE
208 const mrg_transition_matrix mrg_skip_matrices[][256] = {}; /* Dummy version */
209
210 void dump_mrg(FILE* out, const mrg_transition_matrix* m) {
211 /* This is used as an initializer for the mrg_transition_matrix struct, so
212 * the order of the fields here needs to match the struct
213 * mrg_transition_matrix definition in splittable_mrg.h */
214 fprintf(out, "{%" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 "}\n", m->s, m->t, m->u, m->v, m->w, m->a, m->b, m->c, m->d);
215 }
216
217 void dump_mrg_powers(void) {
218 /* transitions contains A^(256^n) for n in 0 .. 192/8 */
219 int i, j;
220 mrg_transition_matrix transitions[192 / 8];
221 FILE* out = fopen("mrg_transitions.c", "w");
222 if (!out) {
223 fprintf(stderr, "dump_mrg_powers: could not open mrg_transitions.c for output\n");
224 exit (1);
225 }
226 fprintf(out, "/* Copyright (C) 2010 The Trustees of Indiana University. */\n");
227 fprintf(out, "/* */\n");
228 fprintf(out, "/* Use, modification and distribution is subject to the Boost Software */\n");
229 fprintf(out, "/* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at */\n");
230 fprintf(out, "/* http://www.boost.org/LICENSE_1_0.txt) */\n");
231 fprintf(out, "/* */\n");
232 fprintf(out, "/* Authors: Jeremiah Willcock */\n");
233 fprintf(out, "/* Andrew Lumsdaine */\n");
234 fprintf(out, "\n");
235 fprintf(out, "/* This code was generated by dump_mrg_powers() in splittable_mrg.c;\n * look there for how to rebuild the table. */\n\n");
236 fprintf(out, "#include \"splittable_mrg.h\"\n");
237 fprintf(out, "const mrg_transition_matrix mrg_skip_matrices[][256] = {\n");
238 for (i = 0; i < 192 / 8; ++i) {
239 if (i != 0) fprintf(out, ",");
240 fprintf(out, "/* Byte %d */ {\n", i);
241 mrg_transition_matrix m;
242 mrg_make_identity(&m);
243 dump_mrg(out, &m);
244 if (i == 0) {
245 mrg_make_A(&transitions[i]);
246 } else {
247 mrg_power(&transitions[i - 1], 256, &transitions[i]);
248 }
249 fprintf(out, ",");
250 dump_mrg(out, &transitions[i]);
251 for (j = 2; j < 256; ++j) {
252 fprintf(out, ",");
253 mrg_power(&transitions[i], j, &m);
254 dump_mrg(out, &m);
255 }
256 fprintf(out, "} /* End of byte %d */\n", i);
257 }
258 fprintf(out, "};\n");
259 fclose(out);
260 }
261
262 /* Build this file with -DDUMP_TRANSITION_TABLE on the host system, then build
263 * the output mrg_transitions.c */
264 int main(int argc, char** argv) {
265 dump_mrg_powers();
266 return 0;
267 }
268 #endif
269
270 /* Returns integer value in [0, 2^31-1) */
271 uint_fast32_t mrg_get_uint(const mrg_transition_matrix* mat, mrg_state* state) {
272 mrg_step(mat, state);
273 return state->z1;
274 }
275
276 /* Returns real value in [0, 1) */
277 double mrg_get_double(const mrg_transition_matrix* mat, mrg_state* state) {
278 return (double)mrg_get_uint(mat, state) * .000000000465661287524579692 /* (2^31 - 1)^(-1) */ +
279 (double)mrg_get_uint(mat, state) * .0000000000000000002168404346990492787 /* (2^31 - 1)^(-2) */
280 ;
281 }
282
283 /* Returns integer value in [0, 2^31-1) using original transition matrix. */
284 uint_fast32_t mrg_get_uint_orig(mrg_state* state) {
285 mrg_orig_step(state);
286 return state->z1;
287 }
288
289 /* Returns real value in [0, 1) using original transition matrix. */
290 double mrg_get_double_orig(mrg_state* state) {
291 return (double)mrg_get_uint_orig(state) * .000000000465661287524579692 /* (2^31 - 1)^(-1) */ +
292 (double)mrg_get_uint_orig(state) * .0000000000000000002168404346990492787 /* (2^31 - 1)^(-2) */
293 ;
294 }
295
296 void mrg_init(mrg_transition_matrix* tm, mrg_state* st) {
297 uint_fast32_t seed[5] = {1, 1, 1, 1, 1};
298 mrg_make_A(tm);
299 mrg_seed(st, seed);
300 }
301
302 void mrg_seed(mrg_state* st, const uint_fast32_t seed[5]) {
303 st->z1 = seed[0];
304 st->z2 = seed[1];
305 st->z3 = seed[2];
306 st->z4 = seed[3];
307 st->z5 = seed[4];
308 }
309
310 /* Split a transition matrix; the result of this function is pre-cached so it
311 * does not need to be called for individual splits of the PRNG state. */
312 void mrg_split_matrix(const mrg_transition_matrix* tm_in,
313 mrg_transition_matrix* tm_out,
314 unsigned int n) {
315 if (n == 0) return;
316 mrg_power(tm_in, n, tm_out);
317 }
318
319 /* The variable st_out should be an array of length n; all other parameters are
320 * single elements.
321 *
322 * The state st_in should not be used for random number generation after this
323 * function is called. */
324 void mrg_split_state(const mrg_transition_matrix* tm_in,
325 const mrg_state* st_in,
326 mrg_state* st_out,
327 unsigned int n) {
328 unsigned int i;
329 if (n == 0) return;
330 st_out[0] = *st_in;
331 for (i = 1; i < n; ++i) {
332 st_out[i] = st_out[i - 1];
333 mrg_step(tm_in, &st_out[i]);
334 }
335 }
336