1 /*
2  * PCG Random Number Generation for C++
3  *
4  * Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
5  *                     and the PCG Project contributors.
6  *
7  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
8  *
9  * Licensed under the Apache License, Version 2.0 (provided in
10  * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
11  * or under the MIT license (provided in LICENSE-MIT.txt and at
12  * http://opensource.org/licenses/MIT), at your option. This file may not
13  * be copied, modified, or distributed except according to those terms.
14  *
15  * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
16  * express or implied.  See your chosen license for details.
17  *
18  * For additional information about the PCG random number generation scheme,
19  * visit http://www.pcg-random.org/.
20  */
21 
22 /*
23  * This file provides support code that is useful for random-number generation
24  * but not specific to the PCG generation scheme, including:
25  *      - 128-bit int support for platforms where it isn't available natively
26  *      - bit twiddling operations
27  *      - I/O of 128-bit and 8-bit integers
28  *      - Handling the evilness of SeedSeq
29  *      - Support for efficiently producing random numbers less than a given
30  *        bound
31  */
32 
33 #ifndef PCG_EXTRAS_HPP_INCLUDED
34 #define PCG_EXTRAS_HPP_INCLUDED 1
35 
36 #include <cinttypes>
37 #include <cstddef>
38 #include <cstdlib>
39 #include <cstring>
40 #include <cassert>
41 #include <limits>
42 #include <iostream>
43 #include <type_traits>
44 #include <utility>
45 #include <locale>
46 #include <iterator>
47 
48 #ifdef __GNUC__
49     #include <cxxabi.h>
50 #endif
51 
52 /*
53  * Abstractions for compiler-specific directives
54  */
55 
56 #ifdef __GNUC__
57     #define PCG_NOINLINE __attribute__((noinline))
58 #else
59     #define PCG_NOINLINE
60 #endif
61 
62 /*
63  * Some members of the PCG library use 128-bit math.  When compiling on 64-bit
64  * platforms, both GCC and Clang provide 128-bit integer types that are ideal
65  * for the job.
66  *
67  * On 32-bit platforms (or with other compilers), we fall back to a C++
68  * class that provides 128-bit unsigned integers instead.  It may seem
69  * like we're reinventing the wheel here, because libraries already exist
70  * that support large integers, but most existing libraries provide a very
71  * generic multiprecision code, but here we're operating at a fixed size.
72  * Also, most other libraries are fairly heavyweight.  So we use a direct
73  * implementation.  Sadly, it's much slower than hand-coded assembly or
74  * direct CPU support.
75  *
76  */
77 #if __SIZEOF_INT128__
78     namespace pcg_extras {
79         typedef __uint128_t pcg128_t;
80     }
81     #define PCG_128BIT_CONSTANT(high,low) \
82             ((pcg_extras::pcg128_t(high) << 64) + low)
83 #else
84     #include "pcg_uint128.hpp"
85     namespace pcg_extras {
86         typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
87     }
88     #define PCG_128BIT_CONSTANT(high,low) \
89             pcg_extras::pcg128_t(high,low)
90     #define PCG_EMULATED_128BIT_MATH 1
91 #endif
92 
93 
94 namespace pcg_extras {
95 
96 /*
97  * We often need to represent a "number of bits".  When used normally, these
98  * numbers are never greater than 128, so an unsigned char is plenty.
99  * If you're using a nonstandard generator of a larger size, you can set
100  * PCG_BITCOUNT_T to have it define it as a larger size.  (Some compilers
101  * might produce faster code if you set it to an unsigned int.)
102  */
103 
104 #ifndef PCG_BITCOUNT_T
105     typedef uint8_t bitcount_t;
106 #else
107     typedef PCG_BITCOUNT_T bitcount_t;
108 #endif
109 
110 /*
111  * C++ requires us to be able to serialize RNG state by printing or reading
112  * it from a stream.  Because we use 128-bit ints, we also need to be able
113  * ot print them, so here is code to do so.
114  *
115  * This code provides enough functionality to print 128-bit ints in decimal
116  * and zero-padded in hex.  It's not a full-featured implementation.
117  */
118 
119 template <typename CharT, typename Traits>
120 std::basic_ostream<CharT,Traits>&
operator <<(std::basic_ostream<CharT,Traits> & out,pcg128_t value)121 operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
122 {
123     auto desired_base = out.flags() & out.basefield;
124     bool want_hex = desired_base == out.hex;
125 
126     if (want_hex) {
127         uint64_t highpart = uint64_t(value >> 64);
128         uint64_t lowpart  = uint64_t(value);
129         auto desired_width = out.width();
130         if (desired_width > 16) {
131             out.width(desired_width - 16);
132         }
133         if (highpart != 0 || desired_width > 16)
134             out << highpart;
135         CharT oldfill = '\0';
136         if (highpart != 0) {
137             out.width(16);
138             oldfill = out.fill('0');
139         }
140         auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
141         out << lowpart;
142         out.setf(oldflags);
143         if (highpart != 0) {
144             out.fill(oldfill);
145         }
146         return out;
147     }
148     constexpr size_t MAX_CHARS_128BIT = 40;
149 
150     char buffer[MAX_CHARS_128BIT];
151     char* pos = buffer+sizeof(buffer);
152     *(--pos) = '\0';
153     constexpr auto BASE = pcg128_t(10ULL);
154     do {
155         auto div = value / BASE;
156         auto mod = uint32_t(value - (div * BASE));
157         *(--pos) = '0' + char(mod);
158         value = div;
159     } while(value != pcg128_t(0ULL));
160     return out << pos;
161 }
162 
163 template <typename CharT, typename Traits>
164 std::basic_istream<CharT,Traits>&
operator >>(std::basic_istream<CharT,Traits> & in,pcg128_t & value)165 operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
166 {
167     typename std::basic_istream<CharT,Traits>::sentry s(in);
168 
169     if (!s)
170          return in;
171 
172     constexpr auto BASE = pcg128_t(10ULL);
173     pcg128_t current(0ULL);
174     bool did_nothing = true;
175     bool overflow = false;
176     for(;;) {
177         CharT wide_ch = in.get();
178         if (!in.good())
179             break;
180         auto ch = in.narrow(wide_ch, '\0');
181         if (ch < '0' || ch > '9') {
182             in.unget();
183             break;
184         }
185         did_nothing = false;
186         pcg128_t digit(uint32_t(ch - '0'));
187         pcg128_t timesbase = current*BASE;
188         overflow = overflow || timesbase < current;
189         current = timesbase + digit;
190         overflow = overflow || current < digit;
191     }
192 
193     if (did_nothing || overflow) {
194         in.setstate(std::ios::failbit);
195         if (overflow)
196             current = ~pcg128_t(0ULL);
197     }
198 
199     value = current;
200 
201     return in;
202 }
203 
204 /*
205  * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
206  * If we just used the provided IO operators, they'd read/write chars,
207  * not ints, so we need to define our own.  We *can* redefine this operator
208  * here because we're in our own namespace.
209  */
210 
211 template <typename CharT, typename Traits>
212 std::basic_ostream<CharT,Traits>&
operator <<(std::basic_ostream<CharT,Traits> & out,uint8_t value)213 operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
214 {
215     return out << uint32_t(value);
216 }
217 
218 template <typename CharT, typename Traits>
219 std::basic_istream<CharT,Traits>&
operator >>(std::basic_istream<CharT,Traits> & in,uint8_t & target)220 operator>>(std::basic_istream<CharT,Traits>& in, uint8_t& target)
221 {
222     uint32_t value = 0xdecea5edU;
223     in >> value;
224     if (!in && value == 0xdecea5edU)
225         return in;
226     if (value > uint8_t(~0)) {
227         in.setstate(std::ios::failbit);
228         value = ~0U;
229     }
230     target = uint8_t(value);
231     return in;
232 }
233 
234 /* Unfortunately, the above functions don't get found in preference to the
235  * built in ones, so we create some more specific overloads that will.
236  * Ugh.
237  */
238 
operator <<(std::ostream & out,uint8_t value)239 inline std::ostream& operator<<(std::ostream& out, uint8_t value)
240 {
241     return pcg_extras::operator<< <char>(out, value);
242 }
243 
operator >>(std::istream & in,uint8_t & value)244 inline std::istream& operator>>(std::istream& in, uint8_t& value)
245 {
246     return pcg_extras::operator>> <char>(in, value);
247 }
248 
249 
250 
251 /*
252  * Useful bitwise operations.
253  */
254 
255 /*
256  * XorShifts are invertable, but they are someting of a pain to invert.
257  * This function backs them out.  It's used by the whacky "inside out"
258  * generator defined later.
259  */
260 
261 template <typename itype>
unxorshift(itype x,bitcount_t bits,bitcount_t shift)262 inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
263 {
264     if (2*shift >= bits) {
265         return x ^ (x >> shift);
266     }
267     itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
268     itype highmask1 = ~lowmask1;
269     itype top1 = x;
270     itype bottom1 = x & lowmask1;
271     top1 ^= top1 >> shift;
272     top1 &= highmask1;
273     x = top1 | bottom1;
274     itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
275     itype bottom2 = x & lowmask2;
276     bottom2 = unxorshift(bottom2, bits - shift, shift);
277     bottom2 &= lowmask1;
278     return top1 | bottom2;
279 }
280 
281 /*
282  * Rotate left and right.
283  *
284  * In ideal world, compilers would spot idiomatic rotate code and convert it
285  * to a rotate instruction.  Of course, opinions vary on what the correct
286  * idiom is and how to spot it.  For clang, sometimes it generates better
287  * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
288  */
289 
290 template <typename itype>
rotl(itype value,bitcount_t rot)291 inline itype rotl(itype value, bitcount_t rot)
292 {
293     constexpr bitcount_t bits = sizeof(itype) * 8;
294     constexpr bitcount_t mask = bits - 1;
295 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
296     return rot ? (value << rot) | (value >> (bits - rot)) : value;
297 #else
298     return (value << rot) | (value >> ((- rot) & mask));
299 #endif
300 }
301 
302 template <typename itype>
rotr(itype value,bitcount_t rot)303 inline itype rotr(itype value, bitcount_t rot)
304 {
305     constexpr bitcount_t bits = sizeof(itype) * 8;
306     constexpr bitcount_t mask = bits - 1;
307 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
308     return rot ? (value >> rot) | (value << (bits - rot)) : value;
309 #else
310     return (value >> rot) | (value << ((- rot) & mask));
311 #endif
312 }
313 
314 /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
315  * to properly recognizing idiomatic rotate code, so for we also provide
316  * assembler directives (enabled with PCG_USE_INLINE_ASM).  Boo, hiss.
317  * (I hope that these compilers get better so that this code can die.)
318  *
319  * These overloads will be preferred over the general template code above.
320  */
321 #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__  || __i386__)
322 
rotr(uint8_t value,bitcount_t rot)323 inline uint8_t rotr(uint8_t value, bitcount_t rot)
324 {
325     asm ("rorb   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
326     return value;
327 }
328 
rotr(uint16_t value,bitcount_t rot)329 inline uint16_t rotr(uint16_t value, bitcount_t rot)
330 {
331     asm ("rorw   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
332     return value;
333 }
334 
rotr(uint32_t value,bitcount_t rot)335 inline uint32_t rotr(uint32_t value, bitcount_t rot)
336 {
337     asm ("rorl   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
338     return value;
339 }
340 
341 #if __x86_64__
rotr(uint64_t value,bitcount_t rot)342 inline uint64_t rotr(uint64_t value, bitcount_t rot)
343 {
344     asm ("rorq   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
345     return value;
346 }
347 #endif // __x86_64__
348 
349 #elif defined(_MSC_VER)
350   // Use MSVC++ bit rotation intrinsics
351 
352 #pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16)
353 
rotr(uint8_t value,bitcount_t rot)354 inline uint8_t rotr(uint8_t value, bitcount_t rot)
355 {
356     return _rotr8(value, rot);
357 }
358 
rotr(uint16_t value,bitcount_t rot)359 inline uint16_t rotr(uint16_t value, bitcount_t rot)
360 {
361     return _rotr16(value, rot);
362 }
363 
rotr(uint32_t value,bitcount_t rot)364 inline uint32_t rotr(uint32_t value, bitcount_t rot)
365 {
366     return _rotr(value, rot);
367 }
368 
rotr(uint64_t value,bitcount_t rot)369 inline uint64_t rotr(uint64_t value, bitcount_t rot)
370 {
371     return _rotr64(value, rot);
372 }
373 
374 #endif // PCG_USE_INLINE_ASM
375 
376 
377 /*
378  * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
379  * 32-bit integers with seed data, but sometimes we want to produce
380  * larger or smaller integers.
381  *
382  * The following code handles this annoyance.
383  *
384  * uneven_copy will copy an array of 32-bit ints to an array of larger or
385  * smaller ints (actually, the code is general it only needing forward
386  * iterators).  The copy is identical to the one that would be performed if
387  * we just did memcpy on a standard little-endian machine, but works
388  * regardless of the endian of the machine (or the weirdness of the ints
389  * involved).
390  *
391  * generate_to initializes an array of integers using a SeedSeq
392  * object.  It is given the size as a static constant at compile time and
393  * tries to avoid memory allocation.  If we're filling in 32-bit constants
394  * we just do it directly.  If we need a separate buffer and it's small,
395  * we allocate it on the stack.  Otherwise, we fall back to heap allocation.
396  * Ugh.
397  *
398  * generate_one produces a single value of some integral type using a
399  * SeedSeq object.
400  */
401 
402  /* uneven_copy helper, case where destination ints are less than 32 bit. */
403 
404 template<class SrcIter, class DestIter>
405 SrcIter uneven_copy_impl(
406     SrcIter src_first, DestIter dest_first, DestIter dest_last,
407     std::true_type)
408 {
409     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
410     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
411 
412     constexpr bitcount_t SRC_SIZE  = sizeof(src_t);
413     constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
414     constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
415     constexpr bitcount_t SCALE     = SRC_SIZE / DEST_SIZE;
416 
417     size_t count = 0;
418     src_t value = 0;
419 
420     while (dest_first != dest_last) {
421         if ((count++ % SCALE) == 0)
422             value = *src_first++;       // Get more bits
423         else
424             value >>= DEST_BITS;        // Move down bits
425 
426         *dest_first++ = dest_t(value);  // Truncates, ignores high bits.
427     }
428     return src_first;
429 }
430 
431  /* uneven_copy helper, case where destination ints are more than 32 bit. */
432 
433 template<class SrcIter, class DestIter>
434 SrcIter uneven_copy_impl(
435     SrcIter src_first, DestIter dest_first, DestIter dest_last,
436     std::false_type)
437 {
438     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
439     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
440 
441     constexpr auto SRC_SIZE  = sizeof(src_t);
442     constexpr auto SRC_BITS  = SRC_SIZE * 8;
443     constexpr auto DEST_SIZE = sizeof(dest_t);
444     constexpr auto SCALE     = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
445 
446     while (dest_first != dest_last) {
447         dest_t value(0UL);
448         unsigned int shift = 0;
449 
450         for (size_t i = 0; i < SCALE; ++i) {
451             value |= dest_t(*src_first++) << shift;
452             shift += SRC_BITS;
453         }
454 
455         *dest_first++ = value;
456     }
457     return src_first;
458 }
459 
460 /* uneven_copy, call the right code for larger vs. smaller */
461 
462 template<class SrcIter, class DestIter>
uneven_copy(SrcIter src_first,DestIter dest_first,DestIter dest_last)463 inline SrcIter uneven_copy(SrcIter src_first,
464                            DestIter dest_first, DestIter dest_last)
465 {
466     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
467     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
468 
469     constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
470 
471     return uneven_copy_impl(src_first, dest_first, dest_last,
472                             std::integral_constant<bool, DEST_IS_SMALLER>{});
473 }
474 
475 /* generate_to, fill in a fixed-size array of integral type using a SeedSeq
476  * (actually works for any random-access iterator)
477  */
478 
479 template <size_t size, typename SeedSeq, typename DestIter>
generate_to_impl(SeedSeq && generator,DestIter dest,std::true_type)480 inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
481                              std::true_type)
482 {
483     generator.generate(dest, dest+size);
484 }
485 
486 template <size_t size, typename SeedSeq, typename DestIter>
generate_to_impl(SeedSeq && generator,DestIter dest,std::false_type)487 void generate_to_impl(SeedSeq&& generator, DestIter dest,
488                       std::false_type)
489 {
490     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
491     constexpr auto DEST_SIZE = sizeof(dest_t);
492     constexpr auto GEN_SIZE  = sizeof(uint32_t);
493 
494     constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
495     constexpr size_t FROM_ELEMS =
496         GEN_IS_SMALLER
497             ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE)
498             : (size + (GEN_SIZE / DEST_SIZE) - 1)
499                 / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
500                         //  this odd code ^^^^^^^^^^^^^^^^^ is work-around for
501                         //  a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
502 
503     if (FROM_ELEMS <= 1024) {
504         uint32_t buffer[FROM_ELEMS];
505         generator.generate(buffer, buffer+FROM_ELEMS);
506         uneven_copy(buffer, dest, dest+size);
507     } else {
508         uint32_t* buffer = static_cast<uint32_t*>(malloc(GEN_SIZE * FROM_ELEMS));
509         generator.generate(buffer, buffer+FROM_ELEMS);
510         uneven_copy(buffer, dest, dest+size);
511         free(static_cast<void*>(buffer));
512     }
513 }
514 
515 template <size_t size, typename SeedSeq, typename DestIter>
generate_to(SeedSeq && generator,DestIter dest)516 inline void generate_to(SeedSeq&& generator, DestIter dest)
517 {
518     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
519     constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
520 
521     generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
522                            std::integral_constant<bool, IS_32BIT>{});
523 }
524 
525 /* generate_one, produce a value of integral type using a SeedSeq
526  * (optionally, we can have it produce more than one and pick which one
527  * we want)
528  */
529 
530 template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq>
generate_one(SeedSeq && generator)531 inline UInt generate_one(SeedSeq&& generator)
532 {
533     UInt result[N];
534     generate_to<N>(std::forward<SeedSeq>(generator), result);
535     return result[i];
536 }
537 
538 template <typename RngType>
bounded_rand(RngType & rng,typename RngType::result_type upper_bound)539 auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
540         -> typename RngType::result_type
541 {
542     typedef typename RngType::result_type rtype;
543     rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
544                     % upper_bound;
545     for (;;) {
546         rtype r = rng() - RngType::min();
547         if (r >= threshold)
548             return r % upper_bound;
549     }
550 }
551 
552 template <typename Iter, typename RandType>
shuffle(Iter from,Iter to,RandType && rng)553 void shuffle(Iter from, Iter to, RandType&& rng)
554 {
555     typedef typename std::iterator_traits<Iter>::difference_type delta_t;
556     typedef typename std::remove_reference<RandType>::type::result_type result_t;
557     auto count = to - from;
558     while (count > 1) {
559         delta_t chosen = delta_t(bounded_rand(rng, result_t(count)));
560         --count;
561         --to;
562         using std::swap;
563         swap(*(from + chosen), *to);
564     }
565 }
566 
567 /*
568  * Although std::seed_seq is useful, it isn't everything.  Often we want to
569  * initialize a random-number generator some other way, such as from a random
570  * device.
571  *
572  * Technically, it does not meet the requirements of a SeedSequence because
573  * it lacks some of the rarely-used member functions (some of which would
574  * be impossible to provide).  However the C++ standard is quite specific
575  * that actual engines only called the generate method, so it ought not to be
576  * a problem in practice.
577  */
578 
579 template <typename RngType>
580 class seed_seq_from {
581 private:
582     RngType rng_;
583 
584     typedef uint_least32_t result_type;
585 
586 public:
587     template<typename... Args>
seed_seq_from(Args &&...args)588     seed_seq_from(Args&&... args) :
589         rng_(std::forward<Args>(args)...)
590     {
591         // Nothing (else) to do...
592     }
593 
594     template<typename Iter>
generate(Iter start,Iter finish)595     void generate(Iter start, Iter finish)
596     {
597         for (auto i = start; i != finish; ++i)
598             *i = result_type(rng_());
599     }
600 
size() const601     constexpr size_t size() const
602     {
603         return (sizeof(typename RngType::result_type) > sizeof(result_type)
604                 && RngType::max() > ~size_t(0UL))
605              ? ~size_t(0UL)
606              : size_t(RngType::max());
607     }
608 };
609 
610 /*
611  * Sometimes you might want a distinct seed based on when the program
612  * was compiled.  That way, a particular instance of the program will
613  * behave the same way, but when recompiled it'll produce a different
614  * value.
615  */
616 
617 template <typename IntType>
618 struct static_arbitrary_seed {
619 private:
fnvpcg_extras::static_arbitrary_seed620     static constexpr IntType fnv(IntType hash, const char* pos) {
621         return *pos == '\0'
622              ? hash
623              : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1));
624     }
625 
626 public:
627     static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)),
628                         __DATE__ __TIME__ __FILE__);
629 };
630 
631 // Sometimes, when debugging or testing, it's handy to be able print the name
632 // of a (in human-readable form).  This code allows the idiom:
633 //
634 //      cout << printable_typename<my_foo_type_t>()
635 //
636 // to print out my_foo_type_t (or its concrete type if it is a synonym)
637 
638 #if __cpp_rtti || __GXX_RTTI
639 
640 template <typename T>
641 struct printable_typename {};
642 
643 template <typename T>
operator <<(std::ostream & out,printable_typename<T>)644 std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
645     const char *implementation_typename = typeid(T).name();
646 #ifdef __GNUC__
647     int status;
648     char* pretty_name =
649         abi::__cxa_demangle(implementation_typename, nullptr, nullptr, &status);
650     if (status == 0)
651         out << pretty_name;
652     free(static_cast<void*>(pretty_name));
653     if (status == 0)
654         return out;
655 #endif
656     out << implementation_typename;
657     return out;
658 }
659 
660 #endif  // __cpp_rtti || __GXX_RTTI
661 
662 } // namespace pcg_extras
663 
664 #endif // PCG_EXTRAS_HPP_INCLUDED
665