1 /* Copyright (C) 2010 The Trustees of Indiana University.                  */
2 /*                                                                         */
3 /* Use, modification and distribution is subject to the Boost Software     */
4 /* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at */
5 /* http://www.boost.org/LICENSE_1_0.txt)                                   */
6 /*                                                                         */
7 /*  Authors: Jeremiah Willcock                                             */
8 /*           Andrew Lumsdaine                                              */
9 
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #define __STDC_FORMAT_MACROS
14 #include <inttypes.h>
15 #include "mod_arith.h"
16 #include "splittable_mrg.h"
17 
18 /* Multiple recursive generator from L'Ecuyer, P., Blouin, F., and       */
19 /* Couture, R. 1993. A search for good multiple recursive random number  */
20 /* generators. ACM Trans. Model. Comput. Simul. 3, 2 (Apr. 1993), 87-98. */
21 /* DOI= http://doi.acm.org/10.1145/169702.169698 -- particular generator */
22 /* used is from table 3, entry for m = 2^31 - 1, k = 5 (same generator   */
23 /* is used in GNU Scientific Library).                                   */
24 /*                                                                       */
25 /* MRG state is 5 numbers mod 2^31 - 1, and there is a transition matrix */
26 /* A from one state to the next:                                         */
27 /*                                                                       */
28 /* A = [x 0 0 0 y]                                                       */
29 /*     [1 0 0 0 0]                                                       */
30 /*     [0 1 0 0 0]                                                       */
31 /*     [0 0 1 0 0]                                                       */
32 /*     [0 0 0 1 0]                                                       */
33 /* where x = 107374182 and y = 104480                                    */
34 /*                                                                       */
35 /* To do leapfrogging (applying multiple steps at once so that we can    */
36 /* create a tree of generators), we need powers of A.  These (from an    */
37 /* analysis with Maple) all look like:                                   */
38 /*                                                                       */
39 /* let a = x * s + t                                                     */
40 /*     b = x * a + u                                                     */
41 /*     c = x * b + v                                                     */
42 /*     d = x * c + w in                                                  */
43 /* A^n = [d   s*y a*y b*y c*y]                                           */
44 /*       [c   w   s*y a*y b*y]                                           */
45 /*       [b   v   w   s*y a*y]                                           */
46 /*       [a   u   v   w   s*y]                                           */
47 /*       [s   t   u   v   w  ]                                           */
48 /* for some values of s, t, u, v, and w                                  */
49 /* Note that A^n is determined by its bottom row (and x and y, which are */
50 /* fixed), and that it has a large part that is a Toeplitz matrix.  You  */
51 /* can multiply two A-like matrices by:                                  */
52 /* (defining a..d1 and a..d2 for the two matrices)                       */
53 /* s3 = s1 d2 + t1 c2 + u1 b2 + v1 a2 + w1 s2,                           */
54 /* t3 = s1 s2 y + t1 w2 + u1 v2 + v1 u2 + w1 t2,                         */
55 /* u3 = s1 a2 y + t1 s2 y + u1 w2 + v1 v2 + w1 u2,                       */
56 /* v3 = s1 b2 y + t1 a2 y + u1 s2 y + v1 w2 + w1 v2,                     */
57 /* w3 = s1 c2 y + t1 b2 y + u1 a2 y + v1 s2 y + w1 w2                    */
58 
59 #ifdef _MSC_VER
mrg_update_cache(mrg_transition_matrix * __restrict p)60 static void mrg_update_cache(mrg_transition_matrix* __restrict p) { /* Set a, b, c, and d */
61 #else
62 static void mrg_update_cache(mrg_transition_matrix* restrict p) { /* Set a, b, c, and d */
63 #endif
64   p->a = mod_add(mod_mul_x(p->s), p->t);
65   p->b = mod_add(mod_mul_x(p->a), p->u);
66   p->c = mod_add(mod_mul_x(p->b), p->v);
67   p->d = mod_add(mod_mul_x(p->c), p->w);
68 }
69 
70 static void mrg_make_identity(mrg_transition_matrix* result) {
71   result->s = result->t = result->u = result->v = 0;
72   result->w = 1;
73   mrg_update_cache(result);
74 }
75 
76 static void mrg_make_A(mrg_transition_matrix* result) { /* Initial RNG transition matrix */
77   result->s = result->t = result->u = result->w = 0;
78   result->v = 1;
79   mrg_update_cache(result);
80 }
81 
82 /* Multiply two transition matrices; result may alias either/both inputs. */
83 #ifdef _MSC_VER
84 static void mrg_multiply(const mrg_transition_matrix* __restrict m, const mrg_transition_matrix* __restrict n, mrg_transition_matrix* result) {
85 #else
86 static void mrg_multiply(const mrg_transition_matrix* restrict m, const mrg_transition_matrix* restrict n, mrg_transition_matrix* result) {
87 #endif
88   uint_least32_t rs = mod_mac(mod_mac(mod_mac(mod_mac(mod_mul(m->s, n->d), m->t, n->c), m->u, n->b), m->v, n->a), m->w, n->s);
89   uint_least32_t rt = mod_mac(mod_mac(mod_mac(mod_mac(mod_mul_y(mod_mul(m->s, n->s)), m->t, n->w), m->u, n->v), m->v, n->u), m->w, n->t);
90   uint_least32_t ru = mod_mac(mod_mac(mod_mac(mod_mul_y(mod_mac(mod_mul(m->s, n->a), m->t, n->s)), m->u, n->w), m->v, n->v), m->w, n->u);
91   uint_least32_t rv = mod_mac(mod_mac(mod_mul_y(mod_mac(mod_mac(mod_mul(m->s, n->b), m->t, n->a), m->u, n->s)), m->v, n->w), m->w, n->v);
92   uint_least32_t rw = mod_mac(mod_mul_y(mod_mac(mod_mac(mod_mac(mod_mul(m->s, n->c), m->t, n->b), m->u, n->a), m->v, n->s)), m->w, n->w);
93   result->s = rs;
94   result->t = rt;
95   result->u = ru;
96   result->v = rv;
97   result->w = rw;
98   mrg_update_cache(result);
99 }
100 
101 /* No aliasing allowed */
102 #ifdef _MSC_VER
103 static void mrg_power(const mrg_transition_matrix* __restrict m, unsigned int exponent, mrg_transition_matrix* __restrict result) {
104 #else
105 static void mrg_power(const mrg_transition_matrix* restrict m, unsigned int exponent, mrg_transition_matrix* restrict result) {
106 #endif
107   mrg_transition_matrix current_power_of_2 = *m;
108   mrg_make_identity(result);
109   while (exponent > 0) {
110     if (exponent % 2 == 1) mrg_multiply(result, &current_power_of_2, result);
111     mrg_multiply(&current_power_of_2, &current_power_of_2, &current_power_of_2);
112     exponent /= 2;
113   }
114 }
115 
116 /* r may alias st */
117 #ifdef __MTA__
118 #pragma mta inline
119 #endif
120 #ifdef _MSC_VER
121 static void mrg_apply_transition(const mrg_transition_matrix* __restrict mat, const mrg_state* __restrict st, mrg_state* r) {
122 #else
123 static void mrg_apply_transition(const mrg_transition_matrix* restrict mat, const mrg_state* restrict st, mrg_state* r) {
124 #endif
125 #ifdef __MTA__
126   uint_fast64_t s = mat->s;
127   uint_fast64_t t = mat->t;
128   uint_fast64_t u = mat->u;
129   uint_fast64_t v = mat->v;
130   uint_fast64_t w = mat->w;
131   uint_fast64_t z1 = st->z1;
132   uint_fast64_t z2 = st->z2;
133   uint_fast64_t z3 = st->z3;
134   uint_fast64_t z4 = st->z4;
135   uint_fast64_t z5 = st->z5;
136   uint_fast64_t temp = s * z1 + t * z2 + u * z3 + v * z4;
137   r->z5 = mod_down(mod_down_fast(temp) + w * z5);
138   uint_fast64_t a = mod_down(107374182 * s + t);
139   uint_fast64_t sy = mod_down(104480 * s);
140   r->z4 = mod_down(mod_down_fast(a * z1 + u * z2 + v * z3) + w * z4 + sy * z5);
141   uint_fast64_t b = mod_down(107374182 * a + u);
142   uint_fast64_t ay = mod_down(104480 * a);
143   r->z3 = mod_down(mod_down_fast(b * z1 + v * z2 + w * z3) + sy * z4 + ay * z5);
144   uint_fast64_t c = mod_down(107374182 * b + v);
145   uint_fast64_t by = mod_down(104480 * b);
146   r->z2 = mod_down(mod_down_fast(c * z1 + w * z2 + sy * z3) + ay * z4 + by * z5);
147   uint_fast64_t d = mod_down(107374182 * c + w);
148   uint_fast64_t cy = mod_down(104480 * c);
149   r->z1 = mod_down(mod_down_fast(d * z1 + sy * z2 + ay * z3) + by * z4 + cy * z5);
150 /* A^n = [d   s*y a*y b*y c*y]                                           */
151 /*       [c   w   s*y a*y b*y]                                           */
152 /*       [b   v   w   s*y a*y]                                           */
153 /*       [a   u   v   w   s*y]                                           */
154 /*       [s   t   u   v   w  ]                                           */
155 #else
156   uint_fast32_t o1 = mod_mac_y(mod_mul(mat->d, st->z1), mod_mac4(0, mat->s, st->z2, mat->a, st->z3, mat->b, st->z4, mat->c, st->z5));
157   uint_fast32_t o2 = mod_mac_y(mod_mac2(0, mat->c, st->z1, mat->w, st->z2), mod_mac3(0, mat->s, st->z3, mat->a, st->z4, mat->b, st->z5));
158   uint_fast32_t o3 = mod_mac_y(mod_mac3(0, mat->b, st->z1, mat->v, st->z2, mat->w, st->z3), mod_mac2(0, mat->s, st->z4, mat->a, st->z5));
159   uint_fast32_t o4 = mod_mac_y(mod_mac4(0, mat->a, st->z1, mat->u, st->z2, mat->v, st->z3, mat->w, st->z4), mod_mul(mat->s, st->z5));
160   uint_fast32_t o5 = mod_mac2(mod_mac3(0, mat->s, st->z1, mat->t, st->z2, mat->u, st->z3), mat->v, st->z4, mat->w, st->z5);
161   r->z1 = o1;
162   r->z2 = o2;
163   r->z3 = o3;
164   r->z4 = o4;
165   r->z5 = o5;
166 #endif
167 }
168 
169 #ifdef __MTA__
170 #pragma mta inline
171 #endif
172 static void mrg_step(const mrg_transition_matrix* mat, mrg_state* state) {
173   mrg_apply_transition(mat, state, state);
174 }
175 
176 #ifdef __MTA__
177 #pragma mta inline
178 #endif
179 static void mrg_orig_step(mrg_state* state) { /* Use original A, not fully optimized yet */
180   uint_fast32_t new_elt = mod_mac2(0, 107374182, state->z1, 104480, state->z5);
181   state->z5 = state->z4;
182   state->z4 = state->z3;
183   state->z3 = state->z2;
184   state->z2 = state->z1;
185   state->z1 = new_elt;
186 }
187 
188 extern const mrg_transition_matrix mrg_skip_matrices[][256]; /* In generated mrg_transitions.c */
189 
190 void mrg_skip(mrg_state* state, uint_least64_t exponent_high, uint_least64_t exponent_middle, uint_least64_t exponent_low) {
191   /* fprintf(stderr, "skip(%016" PRIXLEAST64 "%016" PRIXLEAST64 "%016" PRIXLEAST64 ")\n", exponent_high, exponent_middle, exponent_low); */
192   int byte_index;
193   for (byte_index = 0; exponent_low; ++byte_index, exponent_low >>= 8) {
194     uint_least8_t val = (uint_least8_t)(exponent_low & 0xFF);
195     if (val != 0) mrg_step(&mrg_skip_matrices[byte_index][val], state);
196   }
197   for (byte_index = 8; exponent_middle; ++byte_index, exponent_middle >>= 8) {
198     uint_least8_t val = (uint_least8_t)(exponent_middle & 0xFF);
199     if (val != 0) mrg_step(&mrg_skip_matrices[byte_index][val], state);
200   }
201   for (byte_index = 16; exponent_high; ++byte_index, exponent_high >>= 8) {
202     uint_least8_t val = (uint_least8_t)(exponent_high & 0xFF);
203     if (val != 0) mrg_step(&mrg_skip_matrices[byte_index][val], state);
204   }
205 }
206 
207 #ifdef DUMP_TRANSITION_TABLE
208 const mrg_transition_matrix mrg_skip_matrices[][256] = {}; /* Dummy version */
209 
210 void dump_mrg(FILE* out, const mrg_transition_matrix* m) {
211   /* This is used as an initializer for the mrg_transition_matrix struct, so
212    * the order of the fields here needs to match the struct
213    * mrg_transition_matrix definition in splittable_mrg.h */
214   fprintf(out, "{%" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 ", %" PRIuFAST32 "}\n", m->s, m->t, m->u, m->v, m->w, m->a, m->b, m->c, m->d);
215 }
216 
217 void dump_mrg_powers(void) {
218   /* transitions contains A^(256^n) for n in 0 .. 192/8 */
219   int i, j;
220   mrg_transition_matrix transitions[192 / 8];
221   FILE* out = fopen("mrg_transitions.c", "w");
222   if (!out) {
223     fprintf(stderr, "dump_mrg_powers: could not open mrg_transitions.c for output\n");
224     exit (1);
225   }
226   fprintf(out, "/* Copyright (C) 2010 The Trustees of Indiana University.                  */\n");
227   fprintf(out, "/*                                                                         */\n");
228   fprintf(out, "/* Use, modification and distribution is subject to the Boost Software     */\n");
229   fprintf(out, "/* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at */\n");
230   fprintf(out, "/* http://www.boost.org/LICENSE_1_0.txt)                                   */\n");
231   fprintf(out, "/*                                                                         */\n");
232   fprintf(out, "/*  Authors: Jeremiah Willcock                                             */\n");
233   fprintf(out, "/*           Andrew Lumsdaine                                              */\n");
234   fprintf(out, "\n");
235   fprintf(out, "/* This code was generated by dump_mrg_powers() in splittable_mrg.c;\n * look there for how to rebuild the table. */\n\n");
236   fprintf(out, "#include \"splittable_mrg.h\"\n");
237   fprintf(out, "const mrg_transition_matrix mrg_skip_matrices[][256] = {\n");
238   for (i = 0; i < 192 / 8; ++i) {
239     if (i != 0) fprintf(out, ",");
240     fprintf(out, "/* Byte %d */ {\n", i);
241     mrg_transition_matrix m;
242     mrg_make_identity(&m);
243     dump_mrg(out, &m);
244     if (i == 0) {
245       mrg_make_A(&transitions[i]);
246     } else {
247       mrg_power(&transitions[i - 1], 256, &transitions[i]);
248     }
249     fprintf(out, ",");
250     dump_mrg(out, &transitions[i]);
251     for (j = 2; j < 256; ++j) {
252       fprintf(out, ",");
253       mrg_power(&transitions[i], j, &m);
254       dump_mrg(out, &m);
255     }
256     fprintf(out, "} /* End of byte %d */\n", i);
257   }
258   fprintf(out, "};\n");
259   fclose(out);
260 }
261 
262 /* Build this file with -DDUMP_TRANSITION_TABLE on the host system, then build
263  * the output mrg_transitions.c */
264 int main(int argc, char** argv) {
265   dump_mrg_powers();
266   return 0;
267 }
268 #endif
269 
270 /* Returns integer value in [0, 2^31-1) */
271 uint_fast32_t mrg_get_uint(const mrg_transition_matrix* mat, mrg_state* state) {
272   mrg_step(mat, state);
273   return state->z1;
274 }
275 
276 /* Returns real value in [0, 1) */
277 double mrg_get_double(const mrg_transition_matrix* mat, mrg_state* state) {
278   return (double)mrg_get_uint(mat, state) * .000000000465661287524579692 /* (2^31 - 1)^(-1) */ +
279          (double)mrg_get_uint(mat, state) * .0000000000000000002168404346990492787 /* (2^31 - 1)^(-2) */
280     ;
281 }
282 
283 /* Returns integer value in [0, 2^31-1) using original transition matrix. */
284 uint_fast32_t mrg_get_uint_orig(mrg_state* state) {
285   mrg_orig_step(state);
286   return state->z1;
287 }
288 
289 /* Returns real value in [0, 1) using original transition matrix. */
290 double mrg_get_double_orig(mrg_state* state) {
291   return (double)mrg_get_uint_orig(state) * .000000000465661287524579692 /* (2^31 - 1)^(-1) */ +
292          (double)mrg_get_uint_orig(state) * .0000000000000000002168404346990492787 /* (2^31 - 1)^(-2) */
293     ;
294 }
295 
296 void mrg_init(mrg_transition_matrix* tm, mrg_state* st) {
297   uint_fast32_t seed[5] = {1, 1, 1, 1, 1};
298   mrg_make_A(tm);
299   mrg_seed(st, seed);
300 }
301 
302 void mrg_seed(mrg_state* st, const uint_fast32_t seed[5]) {
303   st->z1 = seed[0];
304   st->z2 = seed[1];
305   st->z3 = seed[2];
306   st->z4 = seed[3];
307   st->z5 = seed[4];
308 }
309 
310 /* Split a transition matrix; the result of this function is pre-cached so it
311  * does not need to be called for individual splits of the PRNG state. */
312 void mrg_split_matrix(const mrg_transition_matrix* tm_in,
313                       mrg_transition_matrix* tm_out,
314                       unsigned int n) {
315   if (n == 0) return;
316   mrg_power(tm_in, n, tm_out);
317 }
318 
319 /* The variable st_out should be an array of length n; all other parameters are
320  * single elements.
321  *
322  * The state st_in should not be used for random number generation after this
323  * function is called. */
324 void mrg_split_state(const mrg_transition_matrix* tm_in,
325                      const mrg_state* st_in,
326                      mrg_state* st_out,
327                      unsigned int n) {
328   unsigned int i;
329   if (n == 0) return;
330   st_out[0] = *st_in;
331   for (i = 1; i < n; ++i) {
332     st_out[i] = st_out[i - 1];
333     mrg_step(tm_in, &st_out[i]);
334   }
335 }
336