1"""
2BitGenerator base class and SeedSequence used to seed the BitGenerators.
3
4SeedSequence is derived from Melissa E. O'Neill's C++11 `std::seed_seq`
5implementation, as it has a lot of nice properties that we want.
6
7https://gist.github.com/imneme/540829265469e673d045
8http://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html
9
10The MIT License (MIT)
11
12Copyright (c) 2015 Melissa E. O'Neill
13Copyright (c) 2019 NumPy Developers
14
15Permission is hereby granted, free of charge, to any person obtaining a copy
16of this software and associated documentation files (the "Software"), to deal
17in the Software without restriction, including without limitation the rights
18to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19copies of the Software, and to permit persons to whom the Software is
20furnished to do so, subject to the following conditions:
21
22The above copyright notice and this permission notice shall be included in
23all copies or substantial portions of the Software.
24
25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31SOFTWARE.
32"""
33
34import abc
35import sys
36from itertools import cycle
37import re
38
39try:
40    from secrets import randbits
41except ImportError:
42    # secrets unavailable on python 3.5 and before
43    from random import SystemRandom
44    randbits = SystemRandom().getrandbits
45
46try:
47    from threading import Lock
48except ImportError:
49    from dummy_threading import Lock
50
51from cpython.pycapsule cimport PyCapsule_New
52
53import numpy as np
54cimport numpy as np
55
56from ._common cimport (random_raw, benchmark, prepare_ctypes, prepare_cffi)
57
58__all__ = ['SeedSequence', 'BitGenerator']
59
60np.import_array()
61
62DECIMAL_RE = re.compile(r'[0-9]+')
63
64cdef uint32_t DEFAULT_POOL_SIZE = 4  # Appears also in docstring for pool_size
65cdef uint32_t INIT_A = 0x43b0d7e5
66cdef uint32_t MULT_A = 0x931e8875
67cdef uint32_t INIT_B = 0x8b51f9dd
68cdef uint32_t MULT_B = 0x58f38ded
69cdef uint32_t MIX_MULT_L = 0xca01f9dd
70cdef uint32_t MIX_MULT_R = 0x4973f715
71cdef uint32_t XSHIFT = np.dtype(np.uint32).itemsize * 8 // 2
72cdef uint32_t MASK32 = 0xFFFFFFFF
73
74def _int_to_uint32_array(n):
75    arr = []
76    if n < 0:
77        raise ValueError("expected non-negative integer")
78    if n == 0:
79        arr.append(np.uint32(n))
80    if isinstance(n, np.unsignedinteger):
81        # Cannot do n & MASK32, convert to python int
82        n = int(n)
83    while n > 0:
84        arr.append(np.uint32(n & MASK32))
85        n //= (2**32)
86    return np.array(arr, dtype=np.uint32)
87
88def _coerce_to_uint32_array(x):
89    """ Coerce an input to a uint32 array.
90
91    If a `uint32` array, pass it through directly.
92    If a non-negative integer, then break it up into `uint32` words, lowest
93    bits first.
94    If a string starting with "0x", then interpret as a hex integer, as above.
95    If a string of decimal digits, interpret as a decimal integer, as above.
96    If a sequence of ints or strings, interpret each element as above and
97    concatenate.
98
99    Note that the handling of `int64` or `uint64` arrays are not just
100    straightforward views as `uint32` arrays. If an element is small enough to
101    fit into a `uint32`, then it will only take up one `uint32` element in the
102    output. This is to make sure that the interpretation of a sequence of
103    integers is the same regardless of numpy's default integer type, which
104    differs on different platforms.
105
106    Parameters
107    ----------
108    x : int, str, sequence of int or str
109
110    Returns
111    -------
112    seed_array : uint32 array
113
114    Examples
115    --------
116    >>> import numpy as np
117    >>> from numpy.random.bit_generator import _coerce_to_uint32_array
118    >>> _coerce_to_uint32_array(12345)
119    array([12345], dtype=uint32)
120    >>> _coerce_to_uint32_array('12345')
121    array([12345], dtype=uint32)
122    >>> _coerce_to_uint32_array('0x12345')
123    array([74565], dtype=uint32)
124    >>> _coerce_to_uint32_array([12345, '67890'])
125    array([12345, 67890], dtype=uint32)
126    >>> _coerce_to_uint32_array(np.array([12345, 67890], dtype=np.uint32))
127    array([12345, 67890], dtype=uint32)
128    >>> _coerce_to_uint32_array(np.array([12345, 67890], dtype=np.int64))
129    array([12345, 67890], dtype=uint32)
130    >>> _coerce_to_uint32_array([12345, 0x10deadbeef, 67890, 0xdeadbeef])
131    array([     12345, 3735928559,         16,      67890, 3735928559],
132          dtype=uint32)
133    >>> _coerce_to_uint32_array(1234567890123456789012345678901234567890)
134    array([3460238034, 2898026390, 3235640248, 2697535605,          3],
135          dtype=uint32)
136    """
137    if isinstance(x, np.ndarray) and x.dtype == np.dtype(np.uint32):
138        return x.copy()
139    elif isinstance(x, str):
140        if x.startswith('0x'):
141            x = int(x, base=16)
142        elif DECIMAL_RE.match(x):
143            x = int(x)
144        else:
145            raise ValueError("unrecognized seed string")
146    if isinstance(x, (int, np.integer)):
147        return _int_to_uint32_array(x)
148    elif isinstance(x, (float, np.inexact)):
149        raise TypeError('seed must be integer')
150    else:
151        if len(x) == 0:
152            return np.array([], dtype=np.uint32)
153        # Should be a sequence of interpretable-as-ints. Convert each one to
154        # a uint32 array and concatenate.
155        subseqs = [_coerce_to_uint32_array(v) for v in x]
156        return np.concatenate(subseqs)
157
158
159cdef uint32_t hashmix(uint32_t value, uint32_t * hash_const):
160    # We are modifying the multiplier as we go along, so it is input-output
161    value ^= hash_const[0]
162    hash_const[0] *= MULT_A
163    value *=  hash_const[0]
164    value ^= value >> XSHIFT
165    return value
166
167cdef uint32_t mix(uint32_t x, uint32_t y):
168    cdef uint32_t result = (MIX_MULT_L * x - MIX_MULT_R * y)
169    result ^= result >> XSHIFT
170    return result
171
172
173class ISeedSequence(abc.ABC):
174    """
175    Abstract base class for seed sequences.
176
177    ``BitGenerator`` implementations should treat any object that adheres to
178    this interface as a seed sequence.
179
180    See Also
181    --------
182    SeedSequence, SeedlessSeedSequence
183    """
184
185    @abc.abstractmethod
186    def generate_state(self, n_words, dtype=np.uint32):
187        """
188        generate_state(n_words, dtype=np.uint32)
189
190        Return the requested number of words for PRNG seeding.
191
192        A BitGenerator should call this method in its constructor with
193        an appropriate `n_words` parameter to properly seed itself.
194
195        Parameters
196        ----------
197        n_words : int
198        dtype : np.uint32 or np.uint64, optional
199            The size of each word. This should only be either `uint32` or
200            `uint64`. Strings (`'uint32'`, `'uint64'`) are fine. Note that
201            requesting `uint64` will draw twice as many bits as `uint32` for
202            the same `n_words`. This is a convenience for `BitGenerator`s that
203            express their states as `uint64` arrays.
204
205        Returns
206        -------
207        state : uint32 or uint64 array, shape=(n_words,)
208        """
209
210
211class ISpawnableSeedSequence(ISeedSequence):
212    """
213    Abstract base class for seed sequences that can spawn.
214    """
215
216    @abc.abstractmethod
217    def spawn(self, n_children):
218        """
219        spawn(n_children)
220
221        Spawn a number of child `SeedSequence` s by extending the
222        `spawn_key`.
223
224        Parameters
225        ----------
226        n_children : int
227
228        Returns
229        -------
230        seqs : list of `SeedSequence` s
231        """
232
233
234cdef class SeedlessSeedSequence():
235    """
236    A seed sequence for BitGenerators with no need for seed state.
237
238    See Also
239    --------
240    SeedSequence, ISeedSequence
241    """
242
243    def generate_state(self, n_words, dtype=np.uint32):
244        raise NotImplementedError('seedless SeedSequences cannot generate state')
245
246    def spawn(self, n_children):
247        return [self] * n_children
248
249
250# We cannot directly subclass a `cdef class` type from an `ABC` in Cython, so
251# we must register it after the fact.
252ISpawnableSeedSequence.register(SeedlessSeedSequence)
253
254
255cdef class SeedSequence():
256    """
257    SeedSequence(entropy=None, *, spawn_key=(), pool_size=4)
258
259    SeedSequence mixes sources of entropy in a reproducible way to set the
260    initial state for independent and very probably non-overlapping
261    BitGenerators.
262
263    Once the SeedSequence is instantiated, you can call the `generate_state`
264    method to get an appropriately sized seed. Calling `spawn(n) <spawn>` will
265    create ``n`` SeedSequences that can be used to seed independent
266    BitGenerators, i.e. for different threads.
267
268    Parameters
269    ----------
270    entropy : {None, int, sequence[int]}, optional
271        The entropy for creating a `SeedSequence`.
272    spawn_key : {(), sequence[int]}, optional
273        A third source of entropy, used internally when calling
274        `SeedSequence.spawn`
275    pool_size : {int}, optional
276        Size of the pooled entropy to store. Default is 4 to give a 128-bit
277        entropy pool. 8 (for 256 bits) is another reasonable choice if working
278        with larger PRNGs, but there is very little to be gained by selecting
279        another value.
280    n_children_spawned : {int}, optional
281        The number of children already spawned. Only pass this if
282        reconstructing a `SeedSequence` from a serialized form.
283
284    Notes
285    -----
286
287    Best practice for achieving reproducible bit streams is to use
288    the default ``None`` for the initial entropy, and then use
289    `SeedSequence.entropy` to log/pickle the `entropy` for reproducibility:
290
291    >>> sq1 = np.random.SeedSequence()
292    >>> sq1.entropy
293    243799254704924441050048792905230269161  # random
294    >>> sq2 = np.random.SeedSequence(sq1.entropy)
295    >>> np.all(sq1.generate_state(10) == sq2.generate_state(10))
296    True
297    """
298
299    def __init__(self, entropy=None, *, spawn_key=(),
300                 pool_size=DEFAULT_POOL_SIZE, n_children_spawned=0):
301        if pool_size < DEFAULT_POOL_SIZE:
302            raise ValueError("The size of the entropy pool should be at least "
303                             f"{DEFAULT_POOL_SIZE}")
304        if entropy is None:
305            entropy = randbits(pool_size * 32)
306        elif not isinstance(entropy, (int, np.integer, list, tuple, range,
307                                      np.ndarray)):
308            raise TypeError('SeedSequence expects int or sequence of ints for '
309                            'entropy not {}'.format(entropy))
310        self.entropy = entropy
311        self.spawn_key = tuple(spawn_key)
312        self.pool_size = pool_size
313        self.n_children_spawned = n_children_spawned
314
315        self.pool = np.zeros(pool_size, dtype=np.uint32)
316        self.mix_entropy(self.pool, self.get_assembled_entropy())
317
318    def __repr__(self):
319        lines = [
320            f'{type(self).__name__}(',
321            f'    entropy={self.entropy!r},',
322        ]
323        # Omit some entries if they are left as the defaults in order to
324        # simplify things.
325        if self.spawn_key:
326            lines.append(f'    spawn_key={self.spawn_key!r},')
327        if self.pool_size != DEFAULT_POOL_SIZE:
328            lines.append(f'    pool_size={self.pool_size!r},')
329        if self.n_children_spawned != 0:
330            lines.append(f'    n_children_spawned={self.n_children_spawned!r},')
331        lines.append(')')
332        text = '\n'.join(lines)
333        return text
334
335    @property
336    def state(self):
337        return {k:getattr(self, k) for k in
338                ['entropy', 'spawn_key', 'pool_size',
339                 'n_children_spawned']
340                if getattr(self, k) is not None}
341
342    cdef mix_entropy(self, np.ndarray[np.npy_uint32, ndim=1] mixer,
343                     np.ndarray[np.npy_uint32, ndim=1] entropy_array):
344        """ Mix in the given entropy to mixer.
345
346        Parameters
347        ----------
348        mixer : 1D uint32 array, modified in-place
349        entropy_array : 1D uint32 array
350        """
351        cdef uint32_t hash_const[1]
352        hash_const[0] = INIT_A
353
354        # Add in the entropy up to the pool size.
355        for i in range(len(mixer)):
356            if i < len(entropy_array):
357                mixer[i] = hashmix(entropy_array[i], hash_const)
358            else:
359                # Our pool size is bigger than our entropy, so just keep
360                # running the hash out.
361                mixer[i] = hashmix(0, hash_const)
362
363        # Mix all bits together so late bits can affect earlier bits.
364        for i_src in range(len(mixer)):
365            for i_dst in range(len(mixer)):
366                if i_src != i_dst:
367                    mixer[i_dst] = mix(mixer[i_dst],
368                                       hashmix(mixer[i_src], hash_const))
369
370        # Add any remaining entropy, mixing each new entropy word with each
371        # pool word.
372        for i_src in range(len(mixer), len(entropy_array)):
373            for i_dst in range(len(mixer)):
374                mixer[i_dst] = mix(mixer[i_dst],
375                                   hashmix(entropy_array[i_src], hash_const))
376
377    cdef get_assembled_entropy(self):
378        """ Convert and assemble all entropy sources into a uniform uint32
379        array.
380
381        Returns
382        -------
383        entropy_array : 1D uint32 array
384        """
385        # Convert run-entropy and the spawn key into uint32
386        # arrays and concatenate them.
387
388        # We MUST have at least some run-entropy. The others are optional.
389        assert self.entropy is not None
390        run_entropy = _coerce_to_uint32_array(self.entropy)
391        spawn_entropy = _coerce_to_uint32_array(self.spawn_key)
392        if len(spawn_entropy) > 0 and len(run_entropy) < self.pool_size:
393            # Explicitly fill out the entropy with 0s to the pool size to avoid
394            # conflict with spawn keys. We changed this in 1.19.0 to fix
395            # gh-16539. In order to preserve stream-compatibility with
396            # unspawned SeedSequences with small entropy inputs, we only do
397            # this when a spawn_key is specified.
398            diff = self.pool_size - len(run_entropy)
399            run_entropy = np.concatenate(
400                [run_entropy, np.zeros(diff, dtype=np.uint32)])
401        entropy_array = np.concatenate([run_entropy, spawn_entropy])
402        return entropy_array
403
404    @np.errstate(over='ignore')
405    def generate_state(self, n_words, dtype=np.uint32):
406        """
407        generate_state(n_words, dtype=np.uint32)
408
409        Return the requested number of words for PRNG seeding.
410
411        A BitGenerator should call this method in its constructor with
412        an appropriate `n_words` parameter to properly seed itself.
413
414        Parameters
415        ----------
416        n_words : int
417        dtype : np.uint32 or np.uint64, optional
418            The size of each word. This should only be either `uint32` or
419            `uint64`. Strings (`'uint32'`, `'uint64'`) are fine. Note that
420            requesting `uint64` will draw twice as many bits as `uint32` for
421            the same `n_words`. This is a convenience for `BitGenerator`s that
422            express their states as `uint64` arrays.
423
424        Returns
425        -------
426        state : uint32 or uint64 array, shape=(n_words,)
427        """
428        cdef uint32_t hash_const = INIT_B
429        cdef uint32_t data_val
430
431        out_dtype = np.dtype(dtype)
432        if out_dtype == np.dtype(np.uint32):
433            pass
434        elif out_dtype == np.dtype(np.uint64):
435            n_words *= 2
436        else:
437            raise ValueError("only support uint32 or uint64")
438        state = np.zeros(n_words, dtype=np.uint32)
439        src_cycle = cycle(self.pool)
440        for i_dst in range(n_words):
441            data_val = next(src_cycle)
442            data_val ^= hash_const
443            hash_const *= MULT_B
444            data_val *= hash_const
445            data_val ^= data_val >> XSHIFT
446            state[i_dst] = data_val
447        if out_dtype == np.dtype(np.uint64):
448            # For consistency across different endiannesses, view first as
449            # little-endian then convert the values to the native endianness.
450            state = state.astype('<u4').view('<u8').astype(np.uint64)
451        return state
452
453    def spawn(self, n_children):
454        """
455        spawn(n_children)
456
457        Spawn a number of child `SeedSequence` s by extending the
458        `spawn_key`.
459
460        Parameters
461        ----------
462        n_children : int
463
464        Returns
465        -------
466        seqs : list of `SeedSequence` s
467        """
468        cdef uint32_t i
469
470        seqs = []
471        for i in range(self.n_children_spawned,
472                       self.n_children_spawned + n_children):
473            seqs.append(type(self)(
474                self.entropy,
475                spawn_key=self.spawn_key + (i,),
476                pool_size=self.pool_size,
477            ))
478        self.n_children_spawned += n_children
479        return seqs
480
481
482ISpawnableSeedSequence.register(SeedSequence)
483
484
485cdef class BitGenerator():
486    """
487    BitGenerator(seed=None)
488
489    Base Class for generic BitGenerators, which provide a stream
490    of random bits based on different algorithms. Must be overridden.
491
492    Parameters
493    ----------
494    seed : {None, int, array_like[ints], SeedSequence}, optional
495        A seed to initialize the `BitGenerator`. If None, then fresh,
496        unpredictable entropy will be pulled from the OS. If an ``int`` or
497        ``array_like[ints]`` is passed, then it will be passed to
498        ~`numpy.random.SeedSequence` to derive the initial `BitGenerator` state.
499        One may also pass in a `SeedSequence` instance.
500
501    Attributes
502    ----------
503    lock : threading.Lock
504        Lock instance that is shared so that the same BitGenerator can
505        be used in multiple Generators without corrupting the state. Code that
506        generates values from a bit generator should hold the bit generator's
507        lock.
508
509    See Also
510    --------
511    SeedSequence
512    """
513
514    def __init__(self, seed=None):
515        self.lock = Lock()
516        self._bitgen.state = <void *>0
517        if type(self) is BitGenerator:
518            raise NotImplementedError('BitGenerator is a base class and cannot be instantized')
519
520        self._ctypes = None
521        self._cffi = None
522
523        cdef const char *name = "BitGenerator"
524        self.capsule = PyCapsule_New(<void *>&self._bitgen, name, NULL)
525        if not isinstance(seed, ISeedSequence):
526            seed = SeedSequence(seed)
527        self._seed_seq = seed
528
529    # Pickling support:
530    def __getstate__(self):
531        return self.state
532
533    def __setstate__(self, state):
534        self.state = state
535
536    def __reduce__(self):
537        from ._pickle import __bit_generator_ctor
538        return __bit_generator_ctor, (self.state['bit_generator'],), self.state
539
540    @property
541    def state(self):
542        """
543        Get or set the PRNG state
544
545        The base BitGenerator.state must be overridden by a subclass
546
547        Returns
548        -------
549        state : dict
550            Dictionary containing the information required to describe the
551            state of the PRNG
552        """
553        raise NotImplementedError('Not implemented in base BitGenerator')
554
555    @state.setter
556    def state(self, value):
557        raise NotImplementedError('Not implemented in base BitGenerator')
558
559    def random_raw(self, size=None, output=True):
560        """
561        random_raw(self, size=None)
562
563        Return randoms as generated by the underlying BitGenerator
564
565        Parameters
566        ----------
567        size : int or tuple of ints, optional
568            Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
569            ``m * n * k`` samples are drawn.  Default is None, in which case a
570            single value is returned.
571        output : bool, optional
572            Output values.  Used for performance testing since the generated
573            values are not returned.
574
575        Returns
576        -------
577        out : uint or ndarray
578            Drawn samples.
579
580        Notes
581        -----
582        This method directly exposes the the raw underlying pseudo-random
583        number generator. All values are returned as unsigned 64-bit
584        values irrespective of the number of bits produced by the PRNG.
585
586        See the class docstring for the number of bits returned.
587        """
588        return random_raw(&self._bitgen, self.lock, size, output)
589
590    def _benchmark(self, Py_ssize_t cnt, method=u'uint64'):
591        '''Used in tests'''
592        return benchmark(&self._bitgen, self.lock, cnt, method)
593
594    @property
595    def ctypes(self):
596        """
597        ctypes interface
598
599        Returns
600        -------
601        interface : namedtuple
602            Named tuple containing ctypes wrapper
603
604            * state_address - Memory address of the state struct
605            * state - pointer to the state struct
606            * next_uint64 - function pointer to produce 64 bit integers
607            * next_uint32 - function pointer to produce 32 bit integers
608            * next_double - function pointer to produce doubles
609            * bitgen - pointer to the bit generator struct
610        """
611        if self._ctypes is None:
612            self._ctypes = prepare_ctypes(&self._bitgen)
613
614        return self._ctypes
615
616    @property
617    def cffi(self):
618        """
619        CFFI interface
620
621        Returns
622        -------
623        interface : namedtuple
624            Named tuple containing CFFI wrapper
625
626            * state_address - Memory address of the state struct
627            * state - pointer to the state struct
628            * next_uint64 - function pointer to produce 64 bit integers
629            * next_uint32 - function pointer to produce 32 bit integers
630            * next_double - function pointer to produce doubles
631            * bitgen - pointer to the bit generator struct
632        """
633        if self._cffi is None:
634            self._cffi = prepare_cffi(&self._bitgen)
635        return self._cffi
636