1 /*
2  * PCG Random Number Generation for C++
3  *
4  * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org>
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * For additional information about the PCG random number generation scheme,
19  * including its license and other licensing options, visit
20  *
21  *     http://www.pcg-random.org
22  */
23 
24 /*
25  * Modified by The OpenClonk.org Project to improve compatibility with
26  * Microsoft Visual C++.
27  */
28 
29 /*
30  * This file provides support code that is useful for random-number generation
31  * but not specific to the PCG generation scheme, including:
32  *      - 128-bit int support for platforms where it isn't available natively
33  *      - bit twiddling operations
34  *      - I/O of 128-bit and 8-bit integers
35  *      - Handling the evilness of SeedSeq
36  *      - Support for efficiently producing random numbers less than a given
37  *        bound
38  */
39 
40 #ifndef PCG_EXTRAS_HPP_INCLUDED
41 #define PCG_EXTRAS_HPP_INCLUDED 1
42 
43 #include <cinttypes>
44 #include <cstddef>
45 #include <cstdlib>
46 #include <cstring>
47 #include <cassert>
48 #include <limits>
49 #include <iostream>
50 #include <type_traits>
51 #include <utility>
52 #include <locale>
53 #include <iterator>
54 #include <utility>
55 
56 #ifdef __GNUC__
57     #include <cxxabi.h>
58 #endif
59 
60 /*
61  * Abstractions for compiler-specific directives
62  */
63 
64 #ifdef __GNUC__
65     #define PCG_NOINLINE __attribute__((noinline))
66 	#define PCG_ALWAYS_INLINE __attribute__((always_inline))
67 #else
68     #define PCG_NOINLINE
69 	#define PCG_ALWAYS_INLINE
70 #endif
71 
72 /*
73  * Some members of the PCG library use 128-bit math.  When compiling on 64-bit
74  * platforms, both GCC and Clang provide 128-bit integer types that are ideal
75  * for the job.
76  *
77  * On 32-bit platforms (or with other compilers), we fall back to a C++
78  * class that provides 128-bit unsigned integers instead.  It may seem
79  * like we're reinventing the wheel here, because libraries already exist
80  * that support large integers, but most existing libraries provide a very
81  * generic multiprecision code, but here we're operating at a fixed size.
82  * Also, most other libraries are fairly heavyweight.  So we use a direct
83  * implementation.  Sadly, it's much slower than hand-coded assembly or
84  * direct CPU support.
85  *
86  */
87 #if __SIZEOF_INT128__
88     namespace pcg_extras {
89         typedef __uint128_t pcg128_t;
90     }
91     #define PCG_128BIT_CONSTANT(high,low) \
92             ((pcg128_t(high) << 64) + low)
93 #else
94     #include "pcg_uint128.hpp"
95     namespace pcg_extras {
96         typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
97     }
98     #define PCG_128BIT_CONSTANT(high,low) \
99             pcg128_t(high,low)
100     #define PCG_EMULATED_128BIT_MATH 1
101 #endif
102 
103 
104 namespace pcg_extras {
105 
106 /*
107  * We often need to represent a "number of bits".  When used normally, these
108  * numbers are never greater than 128, so an unsigned char is plenty.
109  * If you're using a nonstandard generator of a larger size, you can set
110  * PCG_BITCOUNT_T to have it define it as a larger size.  (Some compilers
111  * might produce faster code if you set it to an unsigned int.)
112  */
113 
114 #ifndef PCG_BITCOUNT_T
115     typedef uint8_t bitcount_t;
116 #else
117     typedef PCG_BITCOUNT_T bitcount_t;
118 #endif
119 
120 /*
121  * C++ requires us to be able to serialize RNG state by printing or reading
122  * it from a stream.  Because we use 128-bit ints, we also need to be able
123  * ot print them, so here is code to do so.
124  *
125  * This code provides enough functionality to print 128-bit ints in decimal
126  * and zero-padded in hex.  It's not a full-featured implementation.
127  */
128 
129 template <typename CharT, typename Traits>
130 std::basic_ostream<CharT,Traits>&
operator <<(std::basic_ostream<CharT,Traits> & out,pcg128_t value)131 operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
132 {
133     auto desired_base = out.flags() & out.basefield;
134     bool want_hex = desired_base == out.hex;
135 
136     if (want_hex) {
137         uint64_t highpart = uint64_t(value >> 64);
138         uint64_t lowpart  = uint64_t(value);
139         auto desired_width = out.width();
140         if (desired_width > 16) {
141             out.width(desired_width - 16);
142         }
143         if (highpart != 0 || desired_width > 16)
144             out << highpart;
145         CharT oldfill;
146         if (highpart != 0) {
147             out.width(16);
148             oldfill = out.fill('0');
149         }
150         auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
151         out << lowpart;
152         out.setf(oldflags);
153         if (highpart != 0) {
154             out.fill(oldfill);
155         }
156         return out;
157     }
158     constexpr size_t MAX_CHARS_128BIT = 40;
159 
160     char buffer[MAX_CHARS_128BIT];
161     char* pos = buffer+sizeof(buffer);
162     *(--pos) = '\0';
163     constexpr auto BASE = pcg128_t(10ULL);
164     do {
165         auto div = value / BASE;
166         auto mod = uint32_t(value - (div * BASE));
167         *(--pos) = '0' + mod;
168         value = div;
169     } while(value != pcg128_t(0ULL));
170     return out << pos;
171 }
172 
173 template <typename CharT, typename Traits>
174 std::basic_istream<CharT,Traits>&
operator >>(std::basic_istream<CharT,Traits> & in,pcg128_t & value)175 operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
176 {
177     typename std::basic_istream<CharT,Traits>::sentry s(in);
178 
179     if (!s)
180          return in;
181 
182     constexpr auto BASE = pcg128_t(10ULL);
183     pcg128_t current(0ULL);
184     bool did_nothing = true;
185     bool overflow = false;
186     for(;;) {
187         CharT wide_ch = in.get();
188         if (!in.good())
189             break;
190         auto ch = in.narrow(wide_ch, '\0');
191         if (ch < '0' || ch > '9') {
192             in.unget();
193             break;
194         }
195         did_nothing = false;
196         pcg128_t digit(uint32_t(ch - '0'));
197         pcg128_t timesbase = current*BASE;
198         overflow = overflow || timesbase < current;
199         current = timesbase + digit;
200         overflow = overflow || current < digit;
201     }
202 
203     if (did_nothing || overflow) {
204         in.setstate(std::ios::failbit);
205         if (overflow)
206             current = ~pcg128_t(0ULL);
207     }
208 
209     value = current;
210 
211     return in;
212 }
213 
214 /*
215  * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
216  * If we just used the provided IO operators, they'd read/write chars,
217  * not ints, so we need to define our own.  We *can* redefine this operator
218  * here because we're in our own namespace.
219  */
220 
221 template <typename CharT, typename Traits>
222 std::basic_ostream<CharT,Traits>&
operator <<(std::basic_ostream<CharT,Traits> & out,uint8_t value)223 operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
224 {
225     return out << uint32_t(value);
226 }
227 
228 template <typename CharT, typename Traits>
229 std::basic_istream<CharT,Traits>&
operator >>(std::basic_istream<CharT,Traits> & in,uint8_t target)230 operator>>(std::basic_istream<CharT,Traits>& in, uint8_t target)
231 {
232     uint32_t value = 0xdecea5edU;
233     in >> value;
234     if (!in && value == 0xdecea5edU)
235         return in;
236     if (value > uint8_t(~0)) {
237         in.setstate(std::ios::failbit);
238         value = ~0U;
239     }
240     target = uint8_t(value);
241     return in;
242 }
243 
244 /* Unfortunately, the above functions don't get found in preference to the
245  * built in ones, so we create some more specific overloads that will.
246  * Ugh.
247  */
248 
operator <<(std::ostream & out,uint8_t value)249 inline std::ostream& operator<<(std::ostream& out, uint8_t value)
250 {
251     return pcg_extras::operator<< <char>(out, value);
252 }
253 
operator >>(std::istream & in,uint8_t & value)254 inline std::istream& operator>>(std::istream& in, uint8_t& value)
255 {
256     return pcg_extras::operator>> <char>(in, value);
257 }
258 
259 
260 
261 /*
262  * Useful bitwise operations.
263  */
264 
265 /*
266  * XorShifts are invertable, but they are someting of a pain to invert.
267  * This function backs them out.  It's used by the whacky "inside out"
268  * generator defined later.
269  */
270 
271 template <typename itype>
unxorshift(itype x,bitcount_t bits,bitcount_t shift)272 inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
273 {
274     if (2*shift >= bits) {
275         return x ^ (x >> shift);
276     }
277     itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
278     itype highmask1 = ~lowmask1;
279     itype top1 = x;
280     itype bottom1 = x & lowmask1;
281     top1 ^= top1 >> shift;
282     top1 &= highmask1;
283     x = top1 | bottom1;
284     itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
285     itype bottom2 = x & lowmask2;
286     bottom2 = unxorshift(bottom2, bits - shift, shift);
287     bottom2 &= lowmask1;
288     return top1 | bottom2;
289 }
290 
291 /*
292  * Rotate left and right.
293  *
294  * In ideal world, compilers would spot idiomatic rotate code and convert it
295  * to a rotate instruction.  Of course, opinions vary on what the correct
296  * idiom is and how to spot it.  For clang, sometimes it generates better
297  * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
298  */
299 
300 template <typename itype>
rotl(itype value,bitcount_t rot)301 inline itype rotl(itype value, bitcount_t rot)
302 {
303     constexpr bitcount_t bits = sizeof(itype) * 8;
304     constexpr bitcount_t mask = bits - 1;
305 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
306     return rot ? (value << rot) | (value >> (bits - rot)) : value;
307 #else
308     return (value << rot) | (value >> ((- rot) & mask));
309 #endif
310 }
311 
312 template <typename itype>
rotr(itype value,bitcount_t rot)313 inline itype rotr(itype value, bitcount_t rot)
314 {
315     constexpr bitcount_t bits = sizeof(itype) * 8;
316     constexpr bitcount_t mask = bits - 1;
317 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
318     return rot ? (value >> rot) | (value << (bits - rot)) : value;
319 #else
320     return (value >> rot) | (value << ((- rot) & mask));
321 #endif
322 }
323 
324 /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
325  * to properly recognizing idiomatic rotate code, so for we also provide
326  * assembler directives (enabled with PCG_USE_INLINE_ASM).  Boo, hiss.
327  * (I hope that these compilers get better so that this code can die.)
328  *
329  * These overloads will be preferred over the general template code above.
330  */
331 #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__  || __i386__)
332 
rotr(uint8_t value,bitcount_t rot)333 inline uint8_t rotr(uint8_t value, bitcount_t rot)
334 {
335     asm ("rorb   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
336     return value;
337 }
338 
rotr(uint16_t value,bitcount_t rot)339 inline uint16_t rotr(uint16_t value, bitcount_t rot)
340 {
341     asm ("rorw   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
342     return value;
343 }
344 
rotr(uint32_t value,bitcount_t rot)345 inline uint32_t rotr(uint32_t value, bitcount_t rot)
346 {
347     asm ("rorl   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
348     return value;
349 }
350 
351 #if __x86_64__
rotr(uint64_t value,bitcount_t rot)352 inline uint64_t rotr(uint64_t value, bitcount_t rot)
353 {
354     asm ("rorq   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
355     return value;
356 }
357 #endif // __x86_64__
358 
359 #endif // PCG_USE_INLINE_ASM
360 
361 
362 /*
363  * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
364  * 32-bit integers with seed data, but sometimes we want to produce
365  * larger or smaller integers.
366  *
367  * The following code handles this annoyance.
368  *
369  * uneven_copy will copy an array of 32-bit ints to an array of larger or
370  * smaller ints (actually, the code is general it only needing forward
371  * iterators).  The copy is identical to the one that would be performed if
372  * we just did memcpy on a standard little-endian machine, but works
373  * regardless of the endian of the machine (or the weirdness of the ints
374  * involved).
375  *
376  * generate_to initializes an array of integers using a SeedSeq
377  * object.  It is given the size as a static constant at compile time and
378  * tries to avoid memory allocation.  If we're filling in 32-bit constants
379  * we just do it directly.  If we need a separate buffer and it's small,
380  * we allocate it on the stack.  Otherwise, we fall back to heap allocation.
381  * Ugh.
382  *
383  * generate_one produces a single value of some integral type using a
384  * SeedSeq object.
385  */
386 
387  /* uneven_copy helper, case where destination ints are less than 32 bit. */
388 
389 template<class SrcIter, class DestIter>
390 SrcIter uneven_copy_impl(
391     SrcIter src_first, DestIter dest_first, DestIter dest_last,
392     std::true_type)
393 {
394     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
395     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
396 
397     constexpr bitcount_t SRC_SIZE  = sizeof(src_t);
398     constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
399     constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
400     constexpr bitcount_t SCALE     = SRC_SIZE / DEST_SIZE;
401 
402     size_t count = 0;
403     src_t value;
404 
405     while (dest_first != dest_last) {
406         if ((count++ % SCALE) == 0)
407             value = *src_first++;       // Get more bits
408         else
409             value >>= DEST_BITS;        // Move down bits
410 
411         *dest_first++ = dest_t(value);  // Truncates, ignores high bits.
412     }
413     return src_first;
414 }
415 
416  /* uneven_copy helper, case where destination ints are more than 32 bit. */
417 
418 template<class SrcIter, class DestIter>
419 SrcIter uneven_copy_impl(
420     SrcIter src_first, DestIter dest_first, DestIter dest_last,
421     std::false_type)
422 {
423     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
424     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
425 
426     constexpr auto SRC_SIZE  = sizeof(src_t);
427     constexpr auto SRC_BITS  = SRC_SIZE * 8;
428     constexpr auto DEST_SIZE = sizeof(dest_t);
429     constexpr auto SCALE     = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
430 
431     while (dest_first != dest_last) {
432         dest_t value(0UL);
433         unsigned int shift = 0;
434 
435         for (size_t i = 0; i < SCALE; ++i) {
436             value |= dest_t(*src_first++) << shift;
437             shift += SRC_BITS;
438         }
439 
440         *dest_first++ = value;
441     }
442     return src_first;
443 }
444 
445 /* uneven_copy, call the right code for larger vs. smaller */
446 
447 template<class SrcIter, class DestIter>
uneven_copy(SrcIter src_first,DestIter dest_first,DestIter dest_last)448 inline SrcIter uneven_copy(SrcIter src_first,
449                            DestIter dest_first, DestIter dest_last)
450 {
451     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
452     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
453 
454     constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
455 
456     return uneven_copy_impl(src_first, dest_first, dest_last,
457                             std::integral_constant<bool, DEST_IS_SMALLER>{});
458 }
459 
460 /* generate_to, fill in a fixed-size array of integral type using a SeedSeq
461  * (actually works for any random-access iterator)
462  */
463 
464 template <size_t size, typename SeedSeq, typename DestIter>
generate_to_impl(SeedSeq && generator,DestIter dest,std::true_type)465 inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
466                              std::true_type)
467 {
468     generator.generate(dest, dest+size);
469 }
470 
471 template <size_t size, typename SeedSeq, typename DestIter>
generate_to_impl(SeedSeq && generator,DestIter dest,std::false_type)472 void generate_to_impl(SeedSeq&& generator, DestIter dest,
473                       std::false_type)
474 {
475     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
476     constexpr auto DEST_SIZE = sizeof(dest_t);
477     constexpr auto GEN_SIZE  = sizeof(uint32_t);
478 
479     constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
480     constexpr size_t FROM_ELEMS =
481         GEN_IS_SMALLER
482             ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE)
483             : (size + (GEN_SIZE / DEST_SIZE) - 1)
484                 / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
485                         //  this odd code ^^^^^^^^^^^^^^^^^ is work-around for
486                         //  a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
487 
488     if (FROM_ELEMS <= 1024) {
489         uint32_t buffer[FROM_ELEMS];
490         generator.generate(buffer, buffer+FROM_ELEMS);
491         uneven_copy(buffer, dest, dest+size);
492     } else {
493         uint32_t* buffer = (uint32_t*) malloc(GEN_SIZE * FROM_ELEMS);
494         generator.generate(buffer, buffer+FROM_ELEMS);
495         uneven_copy(buffer, dest, dest+size);
496         free(buffer);
497     }
498 }
499 
500 template <size_t size, typename SeedSeq, typename DestIter>
generate_to(SeedSeq && generator,DestIter dest)501 inline void generate_to(SeedSeq&& generator, DestIter dest)
502 {
503     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
504     constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
505 
506     generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
507                            std::integral_constant<bool, IS_32BIT>{});
508 }
509 
510 /* generate_one, produce a value of integral type using a SeedSeq
511  * (optionally, we can have it produce more than one and pick which one
512  * we want)
513  */
514 
515 template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq>
generate_one(SeedSeq && generator)516 inline UInt generate_one(SeedSeq&& generator)
517 {
518     UInt result[N];
519     generate_to<N>(std::forward<SeedSeq>(generator), result);
520     return result[i];
521 }
522 
523 template <typename RngType>
bounded_rand(RngType & rng,typename RngType::result_type upper_bound)524 auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
525         -> typename RngType::result_type
526 {
527     typedef typename RngType::result_type rtype;
528     rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
529                     % upper_bound;
530     for (;;) {
531         rtype r = rng() - RngType::min();
532         if (r >= threshold)
533             return r % upper_bound;
534     }
535 }
536 
537 template <typename Iter, typename RandType>
shuffle(Iter from,Iter to,RandType && rng)538 void shuffle(Iter from, Iter to, RandType&& rng)
539 {
540     typedef typename std::iterator_traits<Iter>::difference_type delta_t;
541     auto count = to - from;
542     while (count > 1) {
543         delta_t chosen(bounded_rand(rng, count));
544         --count;
545         --to;
546         using std::swap;
547         swap(*(from+chosen), *to);
548     }
549 }
550 
551 /*
552  * Although std::seed_seq is useful, it isn't everything.  Often we want to
553  * initialize a random-number generator some other way, such as from a random
554  * device.
555  *
556  * Technically, it does not meet the requirements of a SeedSequence because
557  * it lacks some of the rarely-used member functions (some of which would
558  * be impossible to provide).  However the C++ standard is quite specific
559  * that actual engines only called the generate method, so it ought not to be
560  * a problem in practice.
561  */
562 
563 template <typename RngType>
564 class seed_seq_from {
565 private:
566     RngType rng_;
567 
568     typedef uint_least32_t result_type;
569 
570 public:
571     template<typename... Args>
seed_seq_from(Args &&...args)572     seed_seq_from(Args&&... args) :
573         rng_(std::forward<Args>(args)...)
574     {
575         // Nothing (else) to do...
576     }
577 
578     template<typename Iter>
generate(Iter start,Iter finish)579     void generate(Iter start, Iter finish)
580     {
581         for (auto i = start; i != finish; ++i)
582             *i = result_type(rng_());
583     }
584 
size() const585     constexpr size_t size() const
586     {
587         return (sizeof(typename RngType::result_type) > sizeof(result_type)
588                 && RngType::max() > ~size_t(0UL))
589              ? ~size_t(0UL)
590              : size_t(RngType::max());
591     }
592 };
593 
594 /*
595  * Sometimes you might want a distinct seed based on when the program
596  * was compiled.  That way, a particular instance of the program will
597  * behave the same way, but when recompiled it'll produce a different
598  * value.
599  */
600 
601 template <typename IntType>
602 struct static_arbitrary_seed {
603 private:
fnvpcg_extras::static_arbitrary_seed604     static constexpr IntType fnv(IntType hash, const char* pos) {
605         return *pos == '\0'
606              ? hash
607              : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1));
608     }
609 
610 public:
611     static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)),
612                         __DATE__ __TIME__ __FILE__);
613 };
614 
615 // Sometimes, when debugging or testing, it's handy to be able print the name
616 // of a (in human-readable form).  This code allows the idiom:
617 //
618 //      cout << printable_typename<my_foo_type_t>()
619 //
620 // to print out my_foo_type_t (or its concrete type if it is a synonym)
621 
622 template <typename T>
623 struct printable_typename {};
624 
625 template <typename T>
operator <<(std::ostream & out,printable_typename<T>)626 std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
627     const char *implementation_typename = typeid(T).name();
628 #ifdef __GNUC__
629     int status;
630     const char* pretty_name =
631         abi::__cxa_demangle(implementation_typename, NULL, NULL, &status);
632     if (status == 0)
633         out << pretty_name;
634     free((void*) pretty_name);
635     if (status == 0)
636         return out;
637 #endif
638     out << implementation_typename;
639     return out;
640 }
641 
642 } // namespace pcg_extras
643 
644 #endif // PCG_EXTRAS_HPP_INCLUDED
645