1 /*
2  * PCG Random Number Generation for C++
3  *
4  * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org>
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * For additional information about the PCG random number generation scheme,
19  * including its license and other licensing options, visit
20  *
21  *     http://www.pcg-random.org
22  */
23 
24 /*
25  * This file provides support code that is useful for random-number generation
26  * but not specific to the PCG generation scheme, including:
27  *      - 128-bit int support for platforms where it isn't available natively
28  *      - bit twiddling operations
29  *      - I/O of 128-bit and 8-bit integers
30  *      - Handling the evilness of SeedSeq
31  *      - Support for efficiently producing random numbers less than a given
32  *        bound
33  */
34 
35 #ifndef PCG_EXTRAS_HPP_INCLUDED
36 #define PCG_EXTRAS_HPP_INCLUDED 1
37 
38 #include <cinttypes>
39 #include <cstddef>
40 #include <cstdlib>
41 #include <cstring>
42 #include <cassert>
43 #include <limits>
44 #include <iostream>
45 #include <type_traits>
46 #include <utility>
47 #include <locale>
48 #include <iterator>
49 #include <utility>
50 
51 #ifdef __GNUC__
52     #include <cxxabi.h>
53 #endif
54 
55 /*
56  * Abstractions for compiler-specific directives
57  */
58 
59 #ifdef __GNUC__
60     #define PCG_NOINLINE __attribute__((noinline))
61 #else
62     #define PCG_NOINLINE
63 #endif
64 
65 /*
66  * Some members of the PCG library use 128-bit math.  When compiling on 64-bit
67  * platforms, both GCC and Clang provide 128-bit integer types that are ideal
68  * for the job.
69  *
70  * On 32-bit platforms (or with other compilers), we fall back to a C++
71  * class that provides 128-bit unsigned integers instead.  It may seem
72  * like we're reinventing the wheel here, because libraries already exist
73  * that support large integers, but most existing libraries provide a very
74  * generic multiprecision code, but here we're operating at a fixed size.
75  * Also, most other libraries are fairly heavyweight.  So we use a direct
76  * implementation.  Sadly, it's much slower than hand-coded assembly or
77  * direct CPU support.
78  *
79  */
80 #if __SIZEOF_INT128__
81     namespace pcg_extras {
82         typedef __uint128_t pcg128_t;
83     }
84     #define PCG_128BIT_CONSTANT(high,low) \
85             ((pcg128_t(high) << 64) + low)
86 #else
87     #include "pcg_uint128.hpp"
88     namespace pcg_extras {
89         typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
90     }
91     #define PCG_128BIT_CONSTANT(high,low) \
92             pcg128_t(high,low)
93     #define PCG_EMULATED_128BIT_MATH 1
94 #endif
95 
96 
97 namespace pcg_extras {
98 
99 /*
100  * We often need to represent a "number of bits".  When used normally, these
101  * numbers are never greater than 128, so an unsigned char is plenty.
102  * If you're using a nonstandard generator of a larger size, you can set
103  * PCG_BITCOUNT_T to have it define it as a larger size.  (Some compilers
104  * might produce faster code if you set it to an unsigned int.)
105  */
106 
107 #ifndef PCG_BITCOUNT_T
108     typedef uint8_t bitcount_t;
109 #else
110     typedef PCG_BITCOUNT_T bitcount_t;
111 #endif
112 
113 /*
114  * C++ requires us to be able to serialize RNG state by printing or reading
115  * it from a stream.  Because we use 128-bit ints, we also need to be able
116  * ot print them, so here is code to do so.
117  *
118  * This code provides enough functionality to print 128-bit ints in decimal
119  * and zero-padded in hex.  It's not a full-featured implementation.
120  */
121 
122 template <typename CharT, typename Traits>
123 std::basic_ostream<CharT,Traits>&
operator <<(std::basic_ostream<CharT,Traits> & out,pcg128_t value)124 operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
125 {
126     auto desired_base = out.flags() & out.basefield;
127     bool want_hex = desired_base == out.hex;
128 
129     if (want_hex) {
130         uint64_t highpart = uint64_t(value >> 64);
131         uint64_t lowpart  = uint64_t(value);
132         auto desired_width = out.width();
133         if (desired_width > 16) {
134             out.width(desired_width - 16);
135         }
136         if (highpart != 0 || desired_width > 16)
137             out << highpart;
138         CharT oldfill;
139         if (highpart != 0) {
140             out.width(16);
141             oldfill = out.fill('0');
142         }
143         auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
144         out << lowpart;
145         out.setf(oldflags);
146         if (highpart != 0) {
147             out.fill(oldfill);
148         }
149         return out;
150     }
151     constexpr size_t MAX_CHARS_128BIT = 40;
152 
153     char buffer[MAX_CHARS_128BIT];
154     char* pos = buffer+sizeof(buffer);
155     *(--pos) = '\0';
156     constexpr auto BASE = pcg128_t(10ULL);
157     do {
158         auto div = value / BASE;
159         auto mod = uint32_t(value - (div * BASE));
160         *(--pos) = '0' + mod;
161         value = div;
162     } while(value != pcg128_t(0ULL));
163     return out << pos;
164 }
165 
166 template <typename CharT, typename Traits>
167 std::basic_istream<CharT,Traits>&
operator >>(std::basic_istream<CharT,Traits> & in,pcg128_t & value)168 operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
169 {
170     typename std::basic_istream<CharT,Traits>::sentry s(in);
171 
172     if (!s)
173          return in;
174 
175     constexpr auto BASE = pcg128_t(10ULL);
176     pcg128_t current(0ULL);
177     bool did_nothing = true;
178     bool overflow = false;
179     for(;;) {
180         CharT wide_ch = in.get();
181         if (!in.good())
182             break;
183         auto ch = in.narrow(wide_ch, '\0');
184         if (ch < '0' || ch > '9') {
185             in.unget();
186             break;
187         }
188         did_nothing = false;
189         pcg128_t digit(uint32_t(ch - '0'));
190         pcg128_t timesbase = current*BASE;
191         overflow = overflow || timesbase < current;
192         current = timesbase + digit;
193         overflow = overflow || current < digit;
194     }
195 
196     if (did_nothing || overflow) {
197         in.setstate(std::ios::failbit);
198         if (overflow)
199             current = ~pcg128_t(0ULL);
200     }
201 
202     value = current;
203 
204     return in;
205 }
206 
207 /*
208  * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
209  * If we just used the provided IO operators, they'd read/write chars,
210  * not ints, so we need to define our own.  We *can* redefine this operator
211  * here because we're in our own namespace.
212  */
213 
214 template <typename CharT, typename Traits>
215 std::basic_ostream<CharT,Traits>&
operator <<(std::basic_ostream<CharT,Traits> & out,uint8_t value)216 operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
217 {
218     return out << uint32_t(value);
219 }
220 
221 template <typename CharT, typename Traits>
222 std::basic_istream<CharT,Traits>&
operator >>(std::basic_istream<CharT,Traits> & in,uint8_t target)223 operator>>(std::basic_istream<CharT,Traits>& in, uint8_t target)
224 {
225     uint32_t value = 0xdecea5edU;
226     in >> value;
227     if (!in && value == 0xdecea5edU)
228         return in;
229     if (value > uint8_t(~0)) {
230         in.setstate(std::ios::failbit);
231         value = ~0U;
232     }
233     target = uint8_t(value);
234     return in;
235 }
236 
237 /* Unfortunately, the above functions don't get found in preference to the
238  * built in ones, so we create some more specific overloads that will.
239  * Ugh.
240  */
241 
operator <<(std::ostream & out,uint8_t value)242 inline std::ostream& operator<<(std::ostream& out, uint8_t value)
243 {
244     return pcg_extras::operator<< <char>(out, value);
245 }
246 
operator >>(std::istream & in,uint8_t & value)247 inline std::istream& operator>>(std::istream& in, uint8_t& value)
248 {
249     return pcg_extras::operator>> <char>(in, value);
250 }
251 
252 
253 
254 /*
255  * Useful bitwise operations.
256  */
257 
258 /*
259  * XorShifts are invertable, but they are someting of a pain to invert.
260  * This function backs them out.  It's used by the whacky "inside out"
261  * generator defined later.
262  */
263 
264 template <typename itype>
unxorshift(itype x,bitcount_t bits,bitcount_t shift)265 inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
266 {
267     if (2*shift >= bits) {
268         return x ^ (x >> shift);
269     }
270     itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
271     itype highmask1 = ~lowmask1;
272     itype top1 = x;
273     itype bottom1 = x & lowmask1;
274     top1 ^= top1 >> shift;
275     top1 &= highmask1;
276     x = top1 | bottom1;
277     itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
278     itype bottom2 = x & lowmask2;
279     bottom2 = unxorshift(bottom2, bits - shift, shift);
280     bottom2 &= lowmask1;
281     return top1 | bottom2;
282 }
283 
284 /*
285  * Rotate left and right.
286  *
287  * In ideal world, compilers would spot idiomatic rotate code and convert it
288  * to a rotate instruction.  Of course, opinions vary on what the correct
289  * idiom is and how to spot it.  For clang, sometimes it generates better
290  * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
291  */
292 
293 template <typename itype>
rotl(itype value,bitcount_t rot)294 inline itype rotl(itype value, bitcount_t rot)
295 {
296     constexpr bitcount_t bits = sizeof(itype) * 8;
297     constexpr bitcount_t mask = bits - 1;
298 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
299     return rot ? (value << rot) | (value >> (bits - rot)) : value;
300 #else
301     return (value << rot) | (value >> ((- rot) & mask));
302 #endif
303 }
304 
305 template <typename itype>
rotr(itype value,bitcount_t rot)306 inline itype rotr(itype value, bitcount_t rot)
307 {
308     constexpr bitcount_t bits = sizeof(itype) * 8;
309     constexpr bitcount_t mask = bits - 1;
310 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
311     return rot ? (value >> rot) | (value << (bits - rot)) : value;
312 #else
313     return (value >> rot) | (value << ((- rot) & mask));
314 #endif
315 }
316 
317 /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
318  * to properly recognizing idiomatic rotate code, so for we also provide
319  * assembler directives (enabled with PCG_USE_INLINE_ASM).  Boo, hiss.
320  * (I hope that these compilers get better so that this code can die.)
321  *
322  * These overloads will be preferred over the general template code above.
323  */
324 #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__  || __i386__)
325 
rotr(uint8_t value,bitcount_t rot)326 inline uint8_t rotr(uint8_t value, bitcount_t rot)
327 {
328     asm ("rorb   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
329     return value;
330 }
331 
rotr(uint16_t value,bitcount_t rot)332 inline uint16_t rotr(uint16_t value, bitcount_t rot)
333 {
334     asm ("rorw   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
335     return value;
336 }
337 
rotr(uint32_t value,bitcount_t rot)338 inline uint32_t rotr(uint32_t value, bitcount_t rot)
339 {
340     asm ("rorl   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
341     return value;
342 }
343 
344 #if __x86_64__
rotr(uint64_t value,bitcount_t rot)345 inline uint64_t rotr(uint64_t value, bitcount_t rot)
346 {
347     asm ("rorq   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
348     return value;
349 }
350 #endif // __x86_64__
351 
352 #endif // PCG_USE_INLINE_ASM
353 
354 
355 /*
356  * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
357  * 32-bit integers with seed data, but sometimes we want to produce
358  * larger or smaller integers.
359  *
360  * The following code handles this annoyance.
361  *
362  * uneven_copy will copy an array of 32-bit ints to an array of larger or
363  * smaller ints (actually, the code is general it only needing forward
364  * iterators).  The copy is identical to the one that would be performed if
365  * we just did memcpy on a standard little-endian machine, but works
366  * regardless of the endian of the machine (or the weirdness of the ints
367  * involved).
368  *
369  * generate_to initializes an array of integers using a SeedSeq
370  * object.  It is given the size as a static constant at compile time and
371  * tries to avoid memory allocation.  If we're filling in 32-bit constants
372  * we just do it directly.  If we need a separate buffer and it's small,
373  * we allocate it on the stack.  Otherwise, we fall back to heap allocation.
374  * Ugh.
375  *
376  * generate_one produces a single value of some integral type using a
377  * SeedSeq object.
378  */
379 
380  /* uneven_copy helper, case where destination ints are less than 32 bit. */
381 
382 template<class SrcIter, class DestIter>
383 SrcIter uneven_copy_impl(
384     SrcIter src_first, DestIter dest_first, DestIter dest_last,
385     std::true_type)
386 {
387     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
388     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
389 
390     constexpr bitcount_t SRC_SIZE  = sizeof(src_t);
391     constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
392     constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
393     constexpr bitcount_t SCALE     = SRC_SIZE / DEST_SIZE;
394 
395     size_t count = 0;
396     src_t value;
397 
398     while (dest_first != dest_last) {
399         if ((count++ % SCALE) == 0)
400             value = *src_first++;       // Get more bits
401         else
402             value >>= DEST_BITS;        // Move down bits
403 
404         *dest_first++ = dest_t(value);  // Truncates, ignores high bits.
405     }
406     return src_first;
407 }
408 
409  /* uneven_copy helper, case where destination ints are more than 32 bit. */
410 
411 template<class SrcIter, class DestIter>
412 SrcIter uneven_copy_impl(
413     SrcIter src_first, DestIter dest_first, DestIter dest_last,
414     std::false_type)
415 {
416     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
417     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
418 
419     constexpr auto SRC_SIZE  = sizeof(src_t);
420     constexpr auto SRC_BITS  = SRC_SIZE * 8;
421     constexpr auto DEST_SIZE = sizeof(dest_t);
422     constexpr auto SCALE     = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
423 
424     while (dest_first != dest_last) {
425         dest_t value(0UL);
426         unsigned int shift = 0;
427 
428         for (size_t i = 0; i < SCALE; ++i) {
429             value |= dest_t(*src_first++) << shift;
430             shift += SRC_BITS;
431         }
432 
433         *dest_first++ = value;
434     }
435     return src_first;
436 }
437 
438 /* uneven_copy, call the right code for larger vs. smaller */
439 
440 template<class SrcIter, class DestIter>
uneven_copy(SrcIter src_first,DestIter dest_first,DestIter dest_last)441 inline SrcIter uneven_copy(SrcIter src_first,
442                            DestIter dest_first, DestIter dest_last)
443 {
444     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
445     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
446 
447     constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
448 
449     return uneven_copy_impl(src_first, dest_first, dest_last,
450                             std::integral_constant<bool, DEST_IS_SMALLER>{});
451 }
452 
453 /* generate_to, fill in a fixed-size array of integral type using a SeedSeq
454  * (actually works for any random-access iterator)
455  */
456 
457 template <size_t size, typename SeedSeq, typename DestIter>
generate_to_impl(SeedSeq && generator,DestIter dest,std::true_type)458 inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
459                              std::true_type)
460 {
461     generator.generate(dest, dest+size);
462 }
463 
464 template <size_t size, typename SeedSeq, typename DestIter>
generate_to_impl(SeedSeq && generator,DestIter dest,std::false_type)465 void generate_to_impl(SeedSeq&& generator, DestIter dest,
466                       std::false_type)
467 {
468     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
469     constexpr auto DEST_SIZE = sizeof(dest_t);
470     constexpr auto GEN_SIZE  = sizeof(uint32_t);
471 
472     constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
473     constexpr size_t FROM_ELEMS =
474         GEN_IS_SMALLER
475             ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE)
476             : (size + (GEN_SIZE / DEST_SIZE) - 1)
477                 / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
478                         //  this odd code ^^^^^^^^^^^^^^^^^ is work-around for
479                         //  a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
480 
481     if (FROM_ELEMS <= 1024) {
482         uint32_t buffer[FROM_ELEMS];
483         generator.generate(buffer, buffer+FROM_ELEMS);
484         uneven_copy(buffer, dest, dest+size);
485     } else {
486         uint32_t* buffer = (uint32_t*) malloc(GEN_SIZE * FROM_ELEMS);
487         generator.generate(buffer, buffer+FROM_ELEMS);
488         uneven_copy(buffer, dest, dest+size);
489         free(buffer);
490     }
491 }
492 
493 template <size_t size, typename SeedSeq, typename DestIter>
generate_to(SeedSeq && generator,DestIter dest)494 inline void generate_to(SeedSeq&& generator, DestIter dest)
495 {
496     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
497     constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
498 
499     generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
500                            std::integral_constant<bool, IS_32BIT>{});
501 }
502 
503 /* generate_one, produce a value of integral type using a SeedSeq
504  * (optionally, we can have it produce more than one and pick which one
505  * we want)
506  */
507 
508 template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq>
generate_one(SeedSeq && generator)509 inline UInt generate_one(SeedSeq&& generator)
510 {
511     UInt result[N];
512     generate_to<N>(std::forward<SeedSeq>(generator), result);
513     return result[i];
514 }
515 
516 template <typename RngType>
bounded_rand(RngType & rng,typename RngType::result_type upper_bound)517 auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
518         -> typename RngType::result_type
519 {
520     typedef typename RngType::result_type rtype;
521     rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
522                     % upper_bound;
523     for (;;) {
524         rtype r = rng() - RngType::min();
525         if (r >= threshold)
526             return r % upper_bound;
527     }
528 }
529 
530 template <typename Iter, typename RandType>
shuffle(Iter from,Iter to,RandType && rng)531 void shuffle(Iter from, Iter to, RandType&& rng)
532 {
533     typedef typename std::iterator_traits<Iter>::difference_type delta_t;
534     auto count = to - from;
535     while (count > 1) {
536         delta_t chosen(bounded_rand(rng, count));
537         --count;
538         --to;
539         using std::swap;
540         swap(*(from+chosen), *to);
541     }
542 }
543 
544 /*
545  * Although std::seed_seq is useful, it isn't everything.  Often we want to
546  * initialize a random-number generator some other way, such as from a random
547  * device.
548  *
549  * Technically, it does not meet the requirements of a SeedSequence because
550  * it lacks some of the rarely-used member functions (some of which would
551  * be impossible to provide).  However the C++ standard is quite specific
552  * that actual engines only called the generate method, so it ought not to be
553  * a problem in practice.
554  */
555 
556 template <typename RngType>
557 class seed_seq_from {
558 private:
559     RngType rng_;
560 
561     typedef uint_least32_t result_type;
562 
563 public:
564     template<typename... Args>
seed_seq_from(Args &&...args)565     seed_seq_from(Args&&... args) :
566         rng_(std::forward<Args>(args)...)
567     {
568         // Nothing (else) to do...
569     }
570 
571     template<typename Iter>
generate(Iter start,Iter finish)572     void generate(Iter start, Iter finish)
573     {
574         for (auto i = start; i != finish; ++i)
575             *i = result_type(rng_());
576     }
577 
size() const578     constexpr size_t size() const
579     {
580         return (sizeof(typename RngType::result_type) > sizeof(result_type)
581                 && RngType::max() > ~size_t(0UL))
582              ? ~size_t(0UL)
583              : size_t(RngType::max());
584     }
585 };
586 
587 // note(jpab): static_arbitrary_seed used to be defined here.
588 //             I've pulled it out into a separate header
589 //             pcg_static_arbitrary_seed.hpp
590 
591 // Sometimes, when debugging or testing, it's handy to be able print the name
592 // of a (in human-readable form).  This code allows the idiom:
593 //
594 //      cout << printable_typename<my_foo_type_t>()
595 //
596 // to print out my_foo_type_t (or its concrete type if it is a synonym)
597 
598 template <typename T>
599 struct printable_typename {};
600 
601 template <typename T>
operator <<(std::ostream & out,printable_typename<T>)602 std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
603     const char *implementation_typename = typeid(T).name();
604 #ifdef __GNUC__
605     int status;
606     const char* pretty_name =
607         abi::__cxa_demangle(implementation_typename, NULL, NULL, &status);
608     if (status == 0)
609         out << pretty_name;
610     free((void*) pretty_name);
611     if (status == 0)
612         return out;
613 #endif
614     out << implementation_typename;
615     return out;
616 }
617 
618 } // namespace pcg_extras
619 
620 #endif // PCG_EXTRAS_HPP_INCLUDED
621