1""" 2BitGenerator base class and SeedSequence used to seed the BitGenerators. 3 4SeedSequence is derived from Melissa E. O'Neill's C++11 `std::seed_seq` 5implementation, as it has a lot of nice properties that we want. 6 7https://gist.github.com/imneme/540829265469e673d045 8http://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html 9 10The MIT License (MIT) 11 12Copyright (c) 2015 Melissa E. O'Neill 13Copyright (c) 2019 NumPy Developers 14 15Permission is hereby granted, free of charge, to any person obtaining a copy 16of this software and associated documentation files (the "Software"), to deal 17in the Software without restriction, including without limitation the rights 18to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19copies of the Software, and to permit persons to whom the Software is 20furnished to do so, subject to the following conditions: 21 22The above copyright notice and this permission notice shall be included in 23all copies or substantial portions of the Software. 24 25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31SOFTWARE. 32""" 33 34import abc 35import sys 36from itertools import cycle 37import re 38 39try: 40 from secrets import randbits 41except ImportError: 42 # secrets unavailable on python 3.5 and before 43 from random import SystemRandom 44 randbits = SystemRandom().getrandbits 45 46try: 47 from threading import Lock 48except ImportError: 49 from dummy_threading import Lock 50 51from cpython.pycapsule cimport PyCapsule_New 52 53import numpy as np 54cimport numpy as np 55 56from ._common cimport (random_raw, benchmark, prepare_ctypes, prepare_cffi) 57 58__all__ = ['SeedSequence', 'BitGenerator'] 59 60np.import_array() 61 62DECIMAL_RE = re.compile(r'[0-9]+') 63 64cdef uint32_t DEFAULT_POOL_SIZE = 4 # Appears also in docstring for pool_size 65cdef uint32_t INIT_A = 0x43b0d7e5 66cdef uint32_t MULT_A = 0x931e8875 67cdef uint32_t INIT_B = 0x8b51f9dd 68cdef uint32_t MULT_B = 0x58f38ded 69cdef uint32_t MIX_MULT_L = 0xca01f9dd 70cdef uint32_t MIX_MULT_R = 0x4973f715 71cdef uint32_t XSHIFT = np.dtype(np.uint32).itemsize * 8 // 2 72cdef uint32_t MASK32 = 0xFFFFFFFF 73 74def _int_to_uint32_array(n): 75 arr = [] 76 if n < 0: 77 raise ValueError("expected non-negative integer") 78 if n == 0: 79 arr.append(np.uint32(n)) 80 if isinstance(n, np.unsignedinteger): 81 # Cannot do n & MASK32, convert to python int 82 n = int(n) 83 while n > 0: 84 arr.append(np.uint32(n & MASK32)) 85 n //= (2**32) 86 return np.array(arr, dtype=np.uint32) 87 88def _coerce_to_uint32_array(x): 89 """ Coerce an input to a uint32 array. 90 91 If a `uint32` array, pass it through directly. 92 If a non-negative integer, then break it up into `uint32` words, lowest 93 bits first. 94 If a string starting with "0x", then interpret as a hex integer, as above. 95 If a string of decimal digits, interpret as a decimal integer, as above. 96 If a sequence of ints or strings, interpret each element as above and 97 concatenate. 98 99 Note that the handling of `int64` or `uint64` arrays are not just 100 straightforward views as `uint32` arrays. If an element is small enough to 101 fit into a `uint32`, then it will only take up one `uint32` element in the 102 output. This is to make sure that the interpretation of a sequence of 103 integers is the same regardless of numpy's default integer type, which 104 differs on different platforms. 105 106 Parameters 107 ---------- 108 x : int, str, sequence of int or str 109 110 Returns 111 ------- 112 seed_array : uint32 array 113 114 Examples 115 -------- 116 >>> import numpy as np 117 >>> from numpy.random.bit_generator import _coerce_to_uint32_array 118 >>> _coerce_to_uint32_array(12345) 119 array([12345], dtype=uint32) 120 >>> _coerce_to_uint32_array('12345') 121 array([12345], dtype=uint32) 122 >>> _coerce_to_uint32_array('0x12345') 123 array([74565], dtype=uint32) 124 >>> _coerce_to_uint32_array([12345, '67890']) 125 array([12345, 67890], dtype=uint32) 126 >>> _coerce_to_uint32_array(np.array([12345, 67890], dtype=np.uint32)) 127 array([12345, 67890], dtype=uint32) 128 >>> _coerce_to_uint32_array(np.array([12345, 67890], dtype=np.int64)) 129 array([12345, 67890], dtype=uint32) 130 >>> _coerce_to_uint32_array([12345, 0x10deadbeef, 67890, 0xdeadbeef]) 131 array([ 12345, 3735928559, 16, 67890, 3735928559], 132 dtype=uint32) 133 >>> _coerce_to_uint32_array(1234567890123456789012345678901234567890) 134 array([3460238034, 2898026390, 3235640248, 2697535605, 3], 135 dtype=uint32) 136 """ 137 if isinstance(x, np.ndarray) and x.dtype == np.dtype(np.uint32): 138 return x.copy() 139 elif isinstance(x, str): 140 if x.startswith('0x'): 141 x = int(x, base=16) 142 elif DECIMAL_RE.match(x): 143 x = int(x) 144 else: 145 raise ValueError("unrecognized seed string") 146 if isinstance(x, (int, np.integer)): 147 return _int_to_uint32_array(x) 148 elif isinstance(x, (float, np.inexact)): 149 raise TypeError('seed must be integer') 150 else: 151 if len(x) == 0: 152 return np.array([], dtype=np.uint32) 153 # Should be a sequence of interpretable-as-ints. Convert each one to 154 # a uint32 array and concatenate. 155 subseqs = [_coerce_to_uint32_array(v) for v in x] 156 return np.concatenate(subseqs) 157 158 159cdef uint32_t hashmix(uint32_t value, uint32_t * hash_const): 160 # We are modifying the multiplier as we go along, so it is input-output 161 value ^= hash_const[0] 162 hash_const[0] *= MULT_A 163 value *= hash_const[0] 164 value ^= value >> XSHIFT 165 return value 166 167cdef uint32_t mix(uint32_t x, uint32_t y): 168 cdef uint32_t result = (MIX_MULT_L * x - MIX_MULT_R * y) 169 result ^= result >> XSHIFT 170 return result 171 172 173class ISeedSequence(abc.ABC): 174 """ 175 Abstract base class for seed sequences. 176 177 ``BitGenerator`` implementations should treat any object that adheres to 178 this interface as a seed sequence. 179 180 See Also 181 -------- 182 SeedSequence, SeedlessSeedSequence 183 """ 184 185 @abc.abstractmethod 186 def generate_state(self, n_words, dtype=np.uint32): 187 """ 188 generate_state(n_words, dtype=np.uint32) 189 190 Return the requested number of words for PRNG seeding. 191 192 A BitGenerator should call this method in its constructor with 193 an appropriate `n_words` parameter to properly seed itself. 194 195 Parameters 196 ---------- 197 n_words : int 198 dtype : np.uint32 or np.uint64, optional 199 The size of each word. This should only be either `uint32` or 200 `uint64`. Strings (`'uint32'`, `'uint64'`) are fine. Note that 201 requesting `uint64` will draw twice as many bits as `uint32` for 202 the same `n_words`. This is a convenience for `BitGenerator`s that 203 express their states as `uint64` arrays. 204 205 Returns 206 ------- 207 state : uint32 or uint64 array, shape=(n_words,) 208 """ 209 210 211class ISpawnableSeedSequence(ISeedSequence): 212 """ 213 Abstract base class for seed sequences that can spawn. 214 """ 215 216 @abc.abstractmethod 217 def spawn(self, n_children): 218 """ 219 spawn(n_children) 220 221 Spawn a number of child `SeedSequence` s by extending the 222 `spawn_key`. 223 224 Parameters 225 ---------- 226 n_children : int 227 228 Returns 229 ------- 230 seqs : list of `SeedSequence` s 231 """ 232 233 234cdef class SeedlessSeedSequence(): 235 """ 236 A seed sequence for BitGenerators with no need for seed state. 237 238 See Also 239 -------- 240 SeedSequence, ISeedSequence 241 """ 242 243 def generate_state(self, n_words, dtype=np.uint32): 244 raise NotImplementedError('seedless SeedSequences cannot generate state') 245 246 def spawn(self, n_children): 247 return [self] * n_children 248 249 250# We cannot directly subclass a `cdef class` type from an `ABC` in Cython, so 251# we must register it after the fact. 252ISpawnableSeedSequence.register(SeedlessSeedSequence) 253 254 255cdef class SeedSequence(): 256 """ 257 SeedSequence(entropy=None, *, spawn_key=(), pool_size=4) 258 259 SeedSequence mixes sources of entropy in a reproducible way to set the 260 initial state for independent and very probably non-overlapping 261 BitGenerators. 262 263 Once the SeedSequence is instantiated, you can call the `generate_state` 264 method to get an appropriately sized seed. Calling `spawn(n) <spawn>` will 265 create ``n`` SeedSequences that can be used to seed independent 266 BitGenerators, i.e. for different threads. 267 268 Parameters 269 ---------- 270 entropy : {None, int, sequence[int]}, optional 271 The entropy for creating a `SeedSequence`. 272 spawn_key : {(), sequence[int]}, optional 273 A third source of entropy, used internally when calling 274 `SeedSequence.spawn` 275 pool_size : {int}, optional 276 Size of the pooled entropy to store. Default is 4 to give a 128-bit 277 entropy pool. 8 (for 256 bits) is another reasonable choice if working 278 with larger PRNGs, but there is very little to be gained by selecting 279 another value. 280 n_children_spawned : {int}, optional 281 The number of children already spawned. Only pass this if 282 reconstructing a `SeedSequence` from a serialized form. 283 284 Notes 285 ----- 286 287 Best practice for achieving reproducible bit streams is to use 288 the default ``None`` for the initial entropy, and then use 289 `SeedSequence.entropy` to log/pickle the `entropy` for reproducibility: 290 291 >>> sq1 = np.random.SeedSequence() 292 >>> sq1.entropy 293 243799254704924441050048792905230269161 # random 294 >>> sq2 = np.random.SeedSequence(sq1.entropy) 295 >>> np.all(sq1.generate_state(10) == sq2.generate_state(10)) 296 True 297 """ 298 299 def __init__(self, entropy=None, *, spawn_key=(), 300 pool_size=DEFAULT_POOL_SIZE, n_children_spawned=0): 301 if pool_size < DEFAULT_POOL_SIZE: 302 raise ValueError("The size of the entropy pool should be at least " 303 f"{DEFAULT_POOL_SIZE}") 304 if entropy is None: 305 entropy = randbits(pool_size * 32) 306 elif not isinstance(entropy, (int, np.integer, list, tuple, range, 307 np.ndarray)): 308 raise TypeError('SeedSequence expects int or sequence of ints for ' 309 'entropy not {}'.format(entropy)) 310 self.entropy = entropy 311 self.spawn_key = tuple(spawn_key) 312 self.pool_size = pool_size 313 self.n_children_spawned = n_children_spawned 314 315 self.pool = np.zeros(pool_size, dtype=np.uint32) 316 self.mix_entropy(self.pool, self.get_assembled_entropy()) 317 318 def __repr__(self): 319 lines = [ 320 f'{type(self).__name__}(', 321 f' entropy={self.entropy!r},', 322 ] 323 # Omit some entries if they are left as the defaults in order to 324 # simplify things. 325 if self.spawn_key: 326 lines.append(f' spawn_key={self.spawn_key!r},') 327 if self.pool_size != DEFAULT_POOL_SIZE: 328 lines.append(f' pool_size={self.pool_size!r},') 329 if self.n_children_spawned != 0: 330 lines.append(f' n_children_spawned={self.n_children_spawned!r},') 331 lines.append(')') 332 text = '\n'.join(lines) 333 return text 334 335 @property 336 def state(self): 337 return {k:getattr(self, k) for k in 338 ['entropy', 'spawn_key', 'pool_size', 339 'n_children_spawned'] 340 if getattr(self, k) is not None} 341 342 cdef mix_entropy(self, np.ndarray[np.npy_uint32, ndim=1] mixer, 343 np.ndarray[np.npy_uint32, ndim=1] entropy_array): 344 """ Mix in the given entropy to mixer. 345 346 Parameters 347 ---------- 348 mixer : 1D uint32 array, modified in-place 349 entropy_array : 1D uint32 array 350 """ 351 cdef uint32_t hash_const[1] 352 hash_const[0] = INIT_A 353 354 # Add in the entropy up to the pool size. 355 for i in range(len(mixer)): 356 if i < len(entropy_array): 357 mixer[i] = hashmix(entropy_array[i], hash_const) 358 else: 359 # Our pool size is bigger than our entropy, so just keep 360 # running the hash out. 361 mixer[i] = hashmix(0, hash_const) 362 363 # Mix all bits together so late bits can affect earlier bits. 364 for i_src in range(len(mixer)): 365 for i_dst in range(len(mixer)): 366 if i_src != i_dst: 367 mixer[i_dst] = mix(mixer[i_dst], 368 hashmix(mixer[i_src], hash_const)) 369 370 # Add any remaining entropy, mixing each new entropy word with each 371 # pool word. 372 for i_src in range(len(mixer), len(entropy_array)): 373 for i_dst in range(len(mixer)): 374 mixer[i_dst] = mix(mixer[i_dst], 375 hashmix(entropy_array[i_src], hash_const)) 376 377 cdef get_assembled_entropy(self): 378 """ Convert and assemble all entropy sources into a uniform uint32 379 array. 380 381 Returns 382 ------- 383 entropy_array : 1D uint32 array 384 """ 385 # Convert run-entropy and the spawn key into uint32 386 # arrays and concatenate them. 387 388 # We MUST have at least some run-entropy. The others are optional. 389 assert self.entropy is not None 390 run_entropy = _coerce_to_uint32_array(self.entropy) 391 spawn_entropy = _coerce_to_uint32_array(self.spawn_key) 392 if len(spawn_entropy) > 0 and len(run_entropy) < self.pool_size: 393 # Explicitly fill out the entropy with 0s to the pool size to avoid 394 # conflict with spawn keys. We changed this in 1.19.0 to fix 395 # gh-16539. In order to preserve stream-compatibility with 396 # unspawned SeedSequences with small entropy inputs, we only do 397 # this when a spawn_key is specified. 398 diff = self.pool_size - len(run_entropy) 399 run_entropy = np.concatenate( 400 [run_entropy, np.zeros(diff, dtype=np.uint32)]) 401 entropy_array = np.concatenate([run_entropy, spawn_entropy]) 402 return entropy_array 403 404 @np.errstate(over='ignore') 405 def generate_state(self, n_words, dtype=np.uint32): 406 """ 407 generate_state(n_words, dtype=np.uint32) 408 409 Return the requested number of words for PRNG seeding. 410 411 A BitGenerator should call this method in its constructor with 412 an appropriate `n_words` parameter to properly seed itself. 413 414 Parameters 415 ---------- 416 n_words : int 417 dtype : np.uint32 or np.uint64, optional 418 The size of each word. This should only be either `uint32` or 419 `uint64`. Strings (`'uint32'`, `'uint64'`) are fine. Note that 420 requesting `uint64` will draw twice as many bits as `uint32` for 421 the same `n_words`. This is a convenience for `BitGenerator`s that 422 express their states as `uint64` arrays. 423 424 Returns 425 ------- 426 state : uint32 or uint64 array, shape=(n_words,) 427 """ 428 cdef uint32_t hash_const = INIT_B 429 cdef uint32_t data_val 430 431 out_dtype = np.dtype(dtype) 432 if out_dtype == np.dtype(np.uint32): 433 pass 434 elif out_dtype == np.dtype(np.uint64): 435 n_words *= 2 436 else: 437 raise ValueError("only support uint32 or uint64") 438 state = np.zeros(n_words, dtype=np.uint32) 439 src_cycle = cycle(self.pool) 440 for i_dst in range(n_words): 441 data_val = next(src_cycle) 442 data_val ^= hash_const 443 hash_const *= MULT_B 444 data_val *= hash_const 445 data_val ^= data_val >> XSHIFT 446 state[i_dst] = data_val 447 if out_dtype == np.dtype(np.uint64): 448 # For consistency across different endiannesses, view first as 449 # little-endian then convert the values to the native endianness. 450 state = state.astype('<u4').view('<u8').astype(np.uint64) 451 return state 452 453 def spawn(self, n_children): 454 """ 455 spawn(n_children) 456 457 Spawn a number of child `SeedSequence` s by extending the 458 `spawn_key`. 459 460 Parameters 461 ---------- 462 n_children : int 463 464 Returns 465 ------- 466 seqs : list of `SeedSequence` s 467 """ 468 cdef uint32_t i 469 470 seqs = [] 471 for i in range(self.n_children_spawned, 472 self.n_children_spawned + n_children): 473 seqs.append(type(self)( 474 self.entropy, 475 spawn_key=self.spawn_key + (i,), 476 pool_size=self.pool_size, 477 )) 478 self.n_children_spawned += n_children 479 return seqs 480 481 482ISpawnableSeedSequence.register(SeedSequence) 483 484 485cdef class BitGenerator(): 486 """ 487 BitGenerator(seed=None) 488 489 Base Class for generic BitGenerators, which provide a stream 490 of random bits based on different algorithms. Must be overridden. 491 492 Parameters 493 ---------- 494 seed : {None, int, array_like[ints], SeedSequence}, optional 495 A seed to initialize the `BitGenerator`. If None, then fresh, 496 unpredictable entropy will be pulled from the OS. If an ``int`` or 497 ``array_like[ints]`` is passed, then it will be passed to 498 ~`numpy.random.SeedSequence` to derive the initial `BitGenerator` state. 499 One may also pass in a `SeedSequence` instance. 500 501 Attributes 502 ---------- 503 lock : threading.Lock 504 Lock instance that is shared so that the same BitGenerator can 505 be used in multiple Generators without corrupting the state. Code that 506 generates values from a bit generator should hold the bit generator's 507 lock. 508 509 See Also 510 -------- 511 SeedSequence 512 """ 513 514 def __init__(self, seed=None): 515 self.lock = Lock() 516 self._bitgen.state = <void *>0 517 if type(self) is BitGenerator: 518 raise NotImplementedError('BitGenerator is a base class and cannot be instantized') 519 520 self._ctypes = None 521 self._cffi = None 522 523 cdef const char *name = "BitGenerator" 524 self.capsule = PyCapsule_New(<void *>&self._bitgen, name, NULL) 525 if not isinstance(seed, ISeedSequence): 526 seed = SeedSequence(seed) 527 self._seed_seq = seed 528 529 # Pickling support: 530 def __getstate__(self): 531 return self.state 532 533 def __setstate__(self, state): 534 self.state = state 535 536 def __reduce__(self): 537 from ._pickle import __bit_generator_ctor 538 return __bit_generator_ctor, (self.state['bit_generator'],), self.state 539 540 @property 541 def state(self): 542 """ 543 Get or set the PRNG state 544 545 The base BitGenerator.state must be overridden by a subclass 546 547 Returns 548 ------- 549 state : dict 550 Dictionary containing the information required to describe the 551 state of the PRNG 552 """ 553 raise NotImplementedError('Not implemented in base BitGenerator') 554 555 @state.setter 556 def state(self, value): 557 raise NotImplementedError('Not implemented in base BitGenerator') 558 559 def random_raw(self, size=None, output=True): 560 """ 561 random_raw(self, size=None) 562 563 Return randoms as generated by the underlying BitGenerator 564 565 Parameters 566 ---------- 567 size : int or tuple of ints, optional 568 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 569 ``m * n * k`` samples are drawn. Default is None, in which case a 570 single value is returned. 571 output : bool, optional 572 Output values. Used for performance testing since the generated 573 values are not returned. 574 575 Returns 576 ------- 577 out : uint or ndarray 578 Drawn samples. 579 580 Notes 581 ----- 582 This method directly exposes the the raw underlying pseudo-random 583 number generator. All values are returned as unsigned 64-bit 584 values irrespective of the number of bits produced by the PRNG. 585 586 See the class docstring for the number of bits returned. 587 """ 588 return random_raw(&self._bitgen, self.lock, size, output) 589 590 def _benchmark(self, Py_ssize_t cnt, method=u'uint64'): 591 '''Used in tests''' 592 return benchmark(&self._bitgen, self.lock, cnt, method) 593 594 @property 595 def ctypes(self): 596 """ 597 ctypes interface 598 599 Returns 600 ------- 601 interface : namedtuple 602 Named tuple containing ctypes wrapper 603 604 * state_address - Memory address of the state struct 605 * state - pointer to the state struct 606 * next_uint64 - function pointer to produce 64 bit integers 607 * next_uint32 - function pointer to produce 32 bit integers 608 * next_double - function pointer to produce doubles 609 * bitgen - pointer to the bit generator struct 610 """ 611 if self._ctypes is None: 612 self._ctypes = prepare_ctypes(&self._bitgen) 613 614 return self._ctypes 615 616 @property 617 def cffi(self): 618 """ 619 CFFI interface 620 621 Returns 622 ------- 623 interface : namedtuple 624 Named tuple containing CFFI wrapper 625 626 * state_address - Memory address of the state struct 627 * state - pointer to the state struct 628 * next_uint64 - function pointer to produce 64 bit integers 629 * next_uint32 - function pointer to produce 32 bit integers 630 * next_double - function pointer to produce doubles 631 * bitgen - pointer to the bit generator struct 632 """ 633 if self._cffi is None: 634 self._cffi = prepare_cffi(&self._bitgen) 635 return self._cffi 636