1 /*
2 * PCG Random Number Generation for C++
3 *
4 * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org>
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * For additional information about the PCG random number generation scheme,
19 * including its license and other licensing options, visit
20 *
21 * http://www.pcg-random.org
22 */
23
24 /*
25 * Modified by The OpenClonk.org Project to improve compatibility with
26 * Microsoft Visual C++.
27 */
28
29 /*
30 * This file provides support code that is useful for random-number generation
31 * but not specific to the PCG generation scheme, including:
32 * - 128-bit int support for platforms where it isn't available natively
33 * - bit twiddling operations
34 * - I/O of 128-bit and 8-bit integers
35 * - Handling the evilness of SeedSeq
36 * - Support for efficiently producing random numbers less than a given
37 * bound
38 */
39
40 #ifndef PCG_EXTRAS_HPP_INCLUDED
41 #define PCG_EXTRAS_HPP_INCLUDED 1
42
43 #include <cinttypes>
44 #include <cstddef>
45 #include <cstdlib>
46 #include <cstring>
47 #include <cassert>
48 #include <limits>
49 #include <iostream>
50 #include <type_traits>
51 #include <utility>
52 #include <locale>
53 #include <iterator>
54 #include <utility>
55
56 #ifdef __GNUC__
57 #include <cxxabi.h>
58 #endif
59
60 /*
61 * Abstractions for compiler-specific directives
62 */
63
64 #ifdef __GNUC__
65 #define PCG_NOINLINE __attribute__((noinline))
66 #define PCG_ALWAYS_INLINE __attribute__((always_inline))
67 #else
68 #define PCG_NOINLINE
69 #define PCG_ALWAYS_INLINE
70 #endif
71
72 /*
73 * Some members of the PCG library use 128-bit math. When compiling on 64-bit
74 * platforms, both GCC and Clang provide 128-bit integer types that are ideal
75 * for the job.
76 *
77 * On 32-bit platforms (or with other compilers), we fall back to a C++
78 * class that provides 128-bit unsigned integers instead. It may seem
79 * like we're reinventing the wheel here, because libraries already exist
80 * that support large integers, but most existing libraries provide a very
81 * generic multiprecision code, but here we're operating at a fixed size.
82 * Also, most other libraries are fairly heavyweight. So we use a direct
83 * implementation. Sadly, it's much slower than hand-coded assembly or
84 * direct CPU support.
85 *
86 */
87 #if __SIZEOF_INT128__
88 namespace pcg_extras {
89 typedef __uint128_t pcg128_t;
90 }
91 #define PCG_128BIT_CONSTANT(high,low) \
92 ((pcg128_t(high) << 64) + low)
93 #else
94 #include "pcg_uint128.hpp"
95 namespace pcg_extras {
96 typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
97 }
98 #define PCG_128BIT_CONSTANT(high,low) \
99 pcg128_t(high,low)
100 #define PCG_EMULATED_128BIT_MATH 1
101 #endif
102
103
104 namespace pcg_extras {
105
106 /*
107 * We often need to represent a "number of bits". When used normally, these
108 * numbers are never greater than 128, so an unsigned char is plenty.
109 * If you're using a nonstandard generator of a larger size, you can set
110 * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers
111 * might produce faster code if you set it to an unsigned int.)
112 */
113
114 #ifndef PCG_BITCOUNT_T
115 typedef uint8_t bitcount_t;
116 #else
117 typedef PCG_BITCOUNT_T bitcount_t;
118 #endif
119
120 /*
121 * C++ requires us to be able to serialize RNG state by printing or reading
122 * it from a stream. Because we use 128-bit ints, we also need to be able
123 * ot print them, so here is code to do so.
124 *
125 * This code provides enough functionality to print 128-bit ints in decimal
126 * and zero-padded in hex. It's not a full-featured implementation.
127 */
128
129 template <typename CharT, typename Traits>
130 std::basic_ostream<CharT,Traits>&
operator <<(std::basic_ostream<CharT,Traits> & out,pcg128_t value)131 operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
132 {
133 auto desired_base = out.flags() & out.basefield;
134 bool want_hex = desired_base == out.hex;
135
136 if (want_hex) {
137 uint64_t highpart = uint64_t(value >> 64);
138 uint64_t lowpart = uint64_t(value);
139 auto desired_width = out.width();
140 if (desired_width > 16) {
141 out.width(desired_width - 16);
142 }
143 if (highpart != 0 || desired_width > 16)
144 out << highpart;
145 CharT oldfill;
146 if (highpart != 0) {
147 out.width(16);
148 oldfill = out.fill('0');
149 }
150 auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
151 out << lowpart;
152 out.setf(oldflags);
153 if (highpart != 0) {
154 out.fill(oldfill);
155 }
156 return out;
157 }
158 constexpr size_t MAX_CHARS_128BIT = 40;
159
160 char buffer[MAX_CHARS_128BIT];
161 char* pos = buffer+sizeof(buffer);
162 *(--pos) = '\0';
163 constexpr auto BASE = pcg128_t(10ULL);
164 do {
165 auto div = value / BASE;
166 auto mod = uint32_t(value - (div * BASE));
167 *(--pos) = '0' + mod;
168 value = div;
169 } while(value != pcg128_t(0ULL));
170 return out << pos;
171 }
172
173 template <typename CharT, typename Traits>
174 std::basic_istream<CharT,Traits>&
operator >>(std::basic_istream<CharT,Traits> & in,pcg128_t & value)175 operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
176 {
177 typename std::basic_istream<CharT,Traits>::sentry s(in);
178
179 if (!s)
180 return in;
181
182 constexpr auto BASE = pcg128_t(10ULL);
183 pcg128_t current(0ULL);
184 bool did_nothing = true;
185 bool overflow = false;
186 for(;;) {
187 CharT wide_ch = in.get();
188 if (!in.good())
189 break;
190 auto ch = in.narrow(wide_ch, '\0');
191 if (ch < '0' || ch > '9') {
192 in.unget();
193 break;
194 }
195 did_nothing = false;
196 pcg128_t digit(uint32_t(ch - '0'));
197 pcg128_t timesbase = current*BASE;
198 overflow = overflow || timesbase < current;
199 current = timesbase + digit;
200 overflow = overflow || current < digit;
201 }
202
203 if (did_nothing || overflow) {
204 in.setstate(std::ios::failbit);
205 if (overflow)
206 current = ~pcg128_t(0ULL);
207 }
208
209 value = current;
210
211 return in;
212 }
213
214 /*
215 * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
216 * If we just used the provided IO operators, they'd read/write chars,
217 * not ints, so we need to define our own. We *can* redefine this operator
218 * here because we're in our own namespace.
219 */
220
221 template <typename CharT, typename Traits>
222 std::basic_ostream<CharT,Traits>&
operator <<(std::basic_ostream<CharT,Traits> & out,uint8_t value)223 operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
224 {
225 return out << uint32_t(value);
226 }
227
228 template <typename CharT, typename Traits>
229 std::basic_istream<CharT,Traits>&
operator >>(std::basic_istream<CharT,Traits> & in,uint8_t target)230 operator>>(std::basic_istream<CharT,Traits>& in, uint8_t target)
231 {
232 uint32_t value = 0xdecea5edU;
233 in >> value;
234 if (!in && value == 0xdecea5edU)
235 return in;
236 if (value > uint8_t(~0)) {
237 in.setstate(std::ios::failbit);
238 value = ~0U;
239 }
240 target = uint8_t(value);
241 return in;
242 }
243
244 /* Unfortunately, the above functions don't get found in preference to the
245 * built in ones, so we create some more specific overloads that will.
246 * Ugh.
247 */
248
operator <<(std::ostream & out,uint8_t value)249 inline std::ostream& operator<<(std::ostream& out, uint8_t value)
250 {
251 return pcg_extras::operator<< <char>(out, value);
252 }
253
operator >>(std::istream & in,uint8_t & value)254 inline std::istream& operator>>(std::istream& in, uint8_t& value)
255 {
256 return pcg_extras::operator>> <char>(in, value);
257 }
258
259
260
261 /*
262 * Useful bitwise operations.
263 */
264
265 /*
266 * XorShifts are invertable, but they are someting of a pain to invert.
267 * This function backs them out. It's used by the whacky "inside out"
268 * generator defined later.
269 */
270
271 template <typename itype>
unxorshift(itype x,bitcount_t bits,bitcount_t shift)272 inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
273 {
274 if (2*shift >= bits) {
275 return x ^ (x >> shift);
276 }
277 itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
278 itype highmask1 = ~lowmask1;
279 itype top1 = x;
280 itype bottom1 = x & lowmask1;
281 top1 ^= top1 >> shift;
282 top1 &= highmask1;
283 x = top1 | bottom1;
284 itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
285 itype bottom2 = x & lowmask2;
286 bottom2 = unxorshift(bottom2, bits - shift, shift);
287 bottom2 &= lowmask1;
288 return top1 | bottom2;
289 }
290
291 /*
292 * Rotate left and right.
293 *
294 * In ideal world, compilers would spot idiomatic rotate code and convert it
295 * to a rotate instruction. Of course, opinions vary on what the correct
296 * idiom is and how to spot it. For clang, sometimes it generates better
297 * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
298 */
299
300 template <typename itype>
rotl(itype value,bitcount_t rot)301 inline itype rotl(itype value, bitcount_t rot)
302 {
303 constexpr bitcount_t bits = sizeof(itype) * 8;
304 constexpr bitcount_t mask = bits - 1;
305 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
306 return rot ? (value << rot) | (value >> (bits - rot)) : value;
307 #else
308 return (value << rot) | (value >> ((- rot) & mask));
309 #endif
310 }
311
312 template <typename itype>
rotr(itype value,bitcount_t rot)313 inline itype rotr(itype value, bitcount_t rot)
314 {
315 constexpr bitcount_t bits = sizeof(itype) * 8;
316 constexpr bitcount_t mask = bits - 1;
317 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
318 return rot ? (value >> rot) | (value << (bits - rot)) : value;
319 #else
320 return (value >> rot) | (value << ((- rot) & mask));
321 #endif
322 }
323
324 /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
325 * to properly recognizing idiomatic rotate code, so for we also provide
326 * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss.
327 * (I hope that these compilers get better so that this code can die.)
328 *
329 * These overloads will be preferred over the general template code above.
330 */
331 #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__)
332
rotr(uint8_t value,bitcount_t rot)333 inline uint8_t rotr(uint8_t value, bitcount_t rot)
334 {
335 asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
336 return value;
337 }
338
rotr(uint16_t value,bitcount_t rot)339 inline uint16_t rotr(uint16_t value, bitcount_t rot)
340 {
341 asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
342 return value;
343 }
344
rotr(uint32_t value,bitcount_t rot)345 inline uint32_t rotr(uint32_t value, bitcount_t rot)
346 {
347 asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
348 return value;
349 }
350
351 #if __x86_64__
rotr(uint64_t value,bitcount_t rot)352 inline uint64_t rotr(uint64_t value, bitcount_t rot)
353 {
354 asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
355 return value;
356 }
357 #endif // __x86_64__
358
359 #endif // PCG_USE_INLINE_ASM
360
361
362 /*
363 * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
364 * 32-bit integers with seed data, but sometimes we want to produce
365 * larger or smaller integers.
366 *
367 * The following code handles this annoyance.
368 *
369 * uneven_copy will copy an array of 32-bit ints to an array of larger or
370 * smaller ints (actually, the code is general it only needing forward
371 * iterators). The copy is identical to the one that would be performed if
372 * we just did memcpy on a standard little-endian machine, but works
373 * regardless of the endian of the machine (or the weirdness of the ints
374 * involved).
375 *
376 * generate_to initializes an array of integers using a SeedSeq
377 * object. It is given the size as a static constant at compile time and
378 * tries to avoid memory allocation. If we're filling in 32-bit constants
379 * we just do it directly. If we need a separate buffer and it's small,
380 * we allocate it on the stack. Otherwise, we fall back to heap allocation.
381 * Ugh.
382 *
383 * generate_one produces a single value of some integral type using a
384 * SeedSeq object.
385 */
386
387 /* uneven_copy helper, case where destination ints are less than 32 bit. */
388
389 template<class SrcIter, class DestIter>
390 SrcIter uneven_copy_impl(
391 SrcIter src_first, DestIter dest_first, DestIter dest_last,
392 std::true_type)
393 {
394 typedef typename std::iterator_traits<SrcIter>::value_type src_t;
395 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
396
397 constexpr bitcount_t SRC_SIZE = sizeof(src_t);
398 constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
399 constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
400 constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE;
401
402 size_t count = 0;
403 src_t value;
404
405 while (dest_first != dest_last) {
406 if ((count++ % SCALE) == 0)
407 value = *src_first++; // Get more bits
408 else
409 value >>= DEST_BITS; // Move down bits
410
411 *dest_first++ = dest_t(value); // Truncates, ignores high bits.
412 }
413 return src_first;
414 }
415
416 /* uneven_copy helper, case where destination ints are more than 32 bit. */
417
418 template<class SrcIter, class DestIter>
419 SrcIter uneven_copy_impl(
420 SrcIter src_first, DestIter dest_first, DestIter dest_last,
421 std::false_type)
422 {
423 typedef typename std::iterator_traits<SrcIter>::value_type src_t;
424 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
425
426 constexpr auto SRC_SIZE = sizeof(src_t);
427 constexpr auto SRC_BITS = SRC_SIZE * 8;
428 constexpr auto DEST_SIZE = sizeof(dest_t);
429 constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
430
431 while (dest_first != dest_last) {
432 dest_t value(0UL);
433 unsigned int shift = 0;
434
435 for (size_t i = 0; i < SCALE; ++i) {
436 value |= dest_t(*src_first++) << shift;
437 shift += SRC_BITS;
438 }
439
440 *dest_first++ = value;
441 }
442 return src_first;
443 }
444
445 /* uneven_copy, call the right code for larger vs. smaller */
446
447 template<class SrcIter, class DestIter>
uneven_copy(SrcIter src_first,DestIter dest_first,DestIter dest_last)448 inline SrcIter uneven_copy(SrcIter src_first,
449 DestIter dest_first, DestIter dest_last)
450 {
451 typedef typename std::iterator_traits<SrcIter>::value_type src_t;
452 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
453
454 constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
455
456 return uneven_copy_impl(src_first, dest_first, dest_last,
457 std::integral_constant<bool, DEST_IS_SMALLER>{});
458 }
459
460 /* generate_to, fill in a fixed-size array of integral type using a SeedSeq
461 * (actually works for any random-access iterator)
462 */
463
464 template <size_t size, typename SeedSeq, typename DestIter>
generate_to_impl(SeedSeq && generator,DestIter dest,std::true_type)465 inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
466 std::true_type)
467 {
468 generator.generate(dest, dest+size);
469 }
470
471 template <size_t size, typename SeedSeq, typename DestIter>
generate_to_impl(SeedSeq && generator,DestIter dest,std::false_type)472 void generate_to_impl(SeedSeq&& generator, DestIter dest,
473 std::false_type)
474 {
475 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
476 constexpr auto DEST_SIZE = sizeof(dest_t);
477 constexpr auto GEN_SIZE = sizeof(uint32_t);
478
479 constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
480 constexpr size_t FROM_ELEMS =
481 GEN_IS_SMALLER
482 ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE)
483 : (size + (GEN_SIZE / DEST_SIZE) - 1)
484 / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
485 // this odd code ^^^^^^^^^^^^^^^^^ is work-around for
486 // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
487
488 if (FROM_ELEMS <= 1024) {
489 uint32_t buffer[FROM_ELEMS];
490 generator.generate(buffer, buffer+FROM_ELEMS);
491 uneven_copy(buffer, dest, dest+size);
492 } else {
493 uint32_t* buffer = (uint32_t*) malloc(GEN_SIZE * FROM_ELEMS);
494 generator.generate(buffer, buffer+FROM_ELEMS);
495 uneven_copy(buffer, dest, dest+size);
496 free(buffer);
497 }
498 }
499
500 template <size_t size, typename SeedSeq, typename DestIter>
generate_to(SeedSeq && generator,DestIter dest)501 inline void generate_to(SeedSeq&& generator, DestIter dest)
502 {
503 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
504 constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
505
506 generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
507 std::integral_constant<bool, IS_32BIT>{});
508 }
509
510 /* generate_one, produce a value of integral type using a SeedSeq
511 * (optionally, we can have it produce more than one and pick which one
512 * we want)
513 */
514
515 template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq>
generate_one(SeedSeq && generator)516 inline UInt generate_one(SeedSeq&& generator)
517 {
518 UInt result[N];
519 generate_to<N>(std::forward<SeedSeq>(generator), result);
520 return result[i];
521 }
522
523 template <typename RngType>
bounded_rand(RngType & rng,typename RngType::result_type upper_bound)524 auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
525 -> typename RngType::result_type
526 {
527 typedef typename RngType::result_type rtype;
528 rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
529 % upper_bound;
530 for (;;) {
531 rtype r = rng() - RngType::min();
532 if (r >= threshold)
533 return r % upper_bound;
534 }
535 }
536
537 template <typename Iter, typename RandType>
shuffle(Iter from,Iter to,RandType && rng)538 void shuffle(Iter from, Iter to, RandType&& rng)
539 {
540 typedef typename std::iterator_traits<Iter>::difference_type delta_t;
541 auto count = to - from;
542 while (count > 1) {
543 delta_t chosen(bounded_rand(rng, count));
544 --count;
545 --to;
546 using std::swap;
547 swap(*(from+chosen), *to);
548 }
549 }
550
551 /*
552 * Although std::seed_seq is useful, it isn't everything. Often we want to
553 * initialize a random-number generator some other way, such as from a random
554 * device.
555 *
556 * Technically, it does not meet the requirements of a SeedSequence because
557 * it lacks some of the rarely-used member functions (some of which would
558 * be impossible to provide). However the C++ standard is quite specific
559 * that actual engines only called the generate method, so it ought not to be
560 * a problem in practice.
561 */
562
563 template <typename RngType>
564 class seed_seq_from {
565 private:
566 RngType rng_;
567
568 typedef uint_least32_t result_type;
569
570 public:
571 template<typename... Args>
seed_seq_from(Args &&...args)572 seed_seq_from(Args&&... args) :
573 rng_(std::forward<Args>(args)...)
574 {
575 // Nothing (else) to do...
576 }
577
578 template<typename Iter>
generate(Iter start,Iter finish)579 void generate(Iter start, Iter finish)
580 {
581 for (auto i = start; i != finish; ++i)
582 *i = result_type(rng_());
583 }
584
size() const585 constexpr size_t size() const
586 {
587 return (sizeof(typename RngType::result_type) > sizeof(result_type)
588 && RngType::max() > ~size_t(0UL))
589 ? ~size_t(0UL)
590 : size_t(RngType::max());
591 }
592 };
593
594 /*
595 * Sometimes you might want a distinct seed based on when the program
596 * was compiled. That way, a particular instance of the program will
597 * behave the same way, but when recompiled it'll produce a different
598 * value.
599 */
600
601 template <typename IntType>
602 struct static_arbitrary_seed {
603 private:
fnvpcg_extras::static_arbitrary_seed604 static constexpr IntType fnv(IntType hash, const char* pos) {
605 return *pos == '\0'
606 ? hash
607 : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1));
608 }
609
610 public:
611 static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)),
612 __DATE__ __TIME__ __FILE__);
613 };
614
615 // Sometimes, when debugging or testing, it's handy to be able print the name
616 // of a (in human-readable form). This code allows the idiom:
617 //
618 // cout << printable_typename<my_foo_type_t>()
619 //
620 // to print out my_foo_type_t (or its concrete type if it is a synonym)
621
622 template <typename T>
623 struct printable_typename {};
624
625 template <typename T>
operator <<(std::ostream & out,printable_typename<T>)626 std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
627 const char *implementation_typename = typeid(T).name();
628 #ifdef __GNUC__
629 int status;
630 const char* pretty_name =
631 abi::__cxa_demangle(implementation_typename, NULL, NULL, &status);
632 if (status == 0)
633 out << pretty_name;
634 free((void*) pretty_name);
635 if (status == 0)
636 return out;
637 #endif
638 out << implementation_typename;
639 return out;
640 }
641
642 } // namespace pcg_extras
643
644 #endif // PCG_EXTRAS_HPP_INCLUDED
645