1#!python 2#cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3 3import operator 4import warnings 5from collections.abc import Sequence 6 7from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer 8from cpython cimport (Py_INCREF, PyFloat_AsDouble) 9from cpython.mem cimport PyMem_Malloc, PyMem_Free 10 11cimport cython 12import numpy as np 13cimport numpy as np 14from numpy.core.multiarray import normalize_axis_index 15 16from .c_distributions cimport * 17from libc cimport string 18from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t, 19 int32_t, int64_t, INT64_MAX, SIZE_MAX) 20from ._bounded_integers cimport (_rand_bool, _rand_int32, _rand_int64, 21 _rand_int16, _rand_int8, _rand_uint64, _rand_uint32, _rand_uint16, 22 _rand_uint8, _gen_mask) 23from ._pcg64 import PCG64 24from numpy.random cimport bitgen_t 25from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE, 26 CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1, 27 CONS_GT_1, CONS_POSITIVE_NOT_NAN, CONS_POISSON, 28 double_fill, cont, kahan_sum, cont_broadcast_3, float_fill, cont_f, 29 check_array_constraint, check_constraint, disc, discrete_broadcast_iii, 30 validate_output_shape 31 ) 32 33cdef extern from "numpy/arrayobject.h": 34 int PyArray_ResolveWritebackIfCopy(np.ndarray) 35 object PyArray_FromArray(np.PyArrayObject *, np.PyArray_Descr *, int) 36 37 enum: 38 NPY_ARRAY_WRITEBACKIFCOPY 39 40np.import_array() 41 42cdef int64_t _safe_sum_nonneg_int64(size_t num_colors, int64_t *colors): 43 """ 44 Sum the values in the array `colors`. 45 46 Return -1 if an overflow occurs. 47 The values in *colors are assumed to be nonnegative. 48 """ 49 cdef size_t i 50 cdef int64_t sum 51 52 sum = 0 53 for i in range(num_colors): 54 if colors[i] > INT64_MAX - sum: 55 return -1 56 sum += colors[i] 57 return sum 58 59 60cdef inline void _shuffle_raw_wrap(bitgen_t *bitgen, np.npy_intp n, 61 np.npy_intp first, np.npy_intp itemsize, 62 np.npy_intp stride, 63 char* data, char* buf) nogil: 64 # We trick gcc into providing a specialized implementation for 65 # the most common case, yielding a ~33% performance improvement. 66 # Note that apparently, only one branch can ever be specialized. 67 if itemsize == sizeof(np.npy_intp): 68 _shuffle_raw(bitgen, n, first, sizeof(np.npy_intp), stride, data, buf) 69 else: 70 _shuffle_raw(bitgen, n, first, itemsize, stride, data, buf) 71 72 73cdef inline void _shuffle_raw(bitgen_t *bitgen, np.npy_intp n, 74 np.npy_intp first, np.npy_intp itemsize, 75 np.npy_intp stride, 76 char* data, char* buf) nogil: 77 """ 78 Parameters 79 ---------- 80 bitgen 81 Pointer to a bitgen_t instance. 82 n 83 Number of elements in data 84 first 85 First observation to shuffle. Shuffles n-1, 86 n-2, ..., first, so that when first=1 the entire 87 array is shuffled 88 itemsize 89 Size in bytes of item 90 stride 91 Array stride 92 data 93 Location of data 94 buf 95 Location of buffer (itemsize) 96 """ 97 cdef np.npy_intp i, j 98 99 for i in reversed(range(first, n)): 100 j = random_interval(bitgen, i) 101 string.memcpy(buf, data + j * stride, itemsize) 102 string.memcpy(data + j * stride, data + i * stride, itemsize) 103 string.memcpy(data + i * stride, buf, itemsize) 104 105 106cdef inline void _shuffle_int(bitgen_t *bitgen, np.npy_intp n, 107 np.npy_intp first, int64_t* data) nogil: 108 """ 109 Parameters 110 ---------- 111 bitgen 112 Pointer to a bitgen_t instance. 113 n 114 Number of elements in data 115 first 116 First observation to shuffle. Shuffles n-1, 117 n-2, ..., first, so that when first=1 the entire 118 array is shuffled 119 data 120 Location of data 121 """ 122 cdef np.npy_intp i, j 123 cdef int64_t temp 124 for i in reversed(range(first, n)): 125 j = random_bounded_uint64(bitgen, 0, i, 0, 0) 126 temp = data[j] 127 data[j] = data[i] 128 data[i] = temp 129 130 131cdef bint _check_bit_generator(object bitgen): 132 """Check if an object satisfies the BitGenerator interface. 133 """ 134 if not hasattr(bitgen, "capsule"): 135 return False 136 cdef const char *name = "BitGenerator" 137 return PyCapsule_IsValid(bitgen.capsule, name) 138 139 140cdef class Generator: 141 """ 142 Generator(bit_generator) 143 144 Container for the BitGenerators. 145 146 ``Generator`` exposes a number of methods for generating random 147 numbers drawn from a variety of probability distributions. In addition to 148 the distribution-specific arguments, each method takes a keyword argument 149 `size` that defaults to ``None``. If `size` is ``None``, then a single 150 value is generated and returned. If `size` is an integer, then a 1-D 151 array filled with generated values is returned. If `size` is a tuple, 152 then an array with that shape is filled and returned. 153 154 The function :func:`numpy.random.default_rng` will instantiate 155 a `Generator` with numpy's default `BitGenerator`. 156 157 **No Compatibility Guarantee** 158 159 ``Generator`` does not provide a version compatibility guarantee. In 160 particular, as better algorithms evolve the bit stream may change. 161 162 Parameters 163 ---------- 164 bit_generator : BitGenerator 165 BitGenerator to use as the core generator. 166 167 Notes 168 ----- 169 The Python stdlib module `random` contains pseudo-random number generator 170 with a number of methods that are similar to the ones available in 171 ``Generator``. It uses Mersenne Twister, and this bit generator can 172 be accessed using ``MT19937``. ``Generator``, besides being 173 NumPy-aware, has the advantage that it provides a much larger number 174 of probability distributions to choose from. 175 176 Examples 177 -------- 178 >>> from numpy.random import Generator, PCG64 179 >>> rg = Generator(PCG64()) 180 >>> rg.standard_normal() 181 -0.203 # random 182 183 See Also 184 -------- 185 default_rng : Recommended constructor for `Generator`. 186 """ 187 cdef public object _bit_generator 188 cdef bitgen_t _bitgen 189 cdef binomial_t _binomial 190 cdef object lock 191 _poisson_lam_max = POISSON_LAM_MAX 192 193 def __init__(self, bit_generator): 194 self._bit_generator = bit_generator 195 196 capsule = bit_generator.capsule 197 cdef const char *name = "BitGenerator" 198 if not PyCapsule_IsValid(capsule, name): 199 raise ValueError("Invalid bit generator. The bit generator must " 200 "be instantiated.") 201 self._bitgen = (<bitgen_t *> PyCapsule_GetPointer(capsule, name))[0] 202 self.lock = bit_generator.lock 203 204 def __repr__(self): 205 return self.__str__() + ' at 0x{:X}'.format(id(self)) 206 207 def __str__(self): 208 _str = self.__class__.__name__ 209 _str += '(' + self.bit_generator.__class__.__name__ + ')' 210 return _str 211 212 # Pickling support: 213 def __getstate__(self): 214 return self.bit_generator.state 215 216 def __setstate__(self, state): 217 self.bit_generator.state = state 218 219 def __reduce__(self): 220 from ._pickle import __generator_ctor 221 return __generator_ctor, (self.bit_generator.state['bit_generator'],), self.bit_generator.state 222 223 @property 224 def bit_generator(self): 225 """ 226 Gets the bit generator instance used by the generator 227 228 Returns 229 ------- 230 bit_generator : BitGenerator 231 The bit generator instance used by the generator 232 """ 233 return self._bit_generator 234 235 def random(self, size=None, dtype=np.float64, out=None): 236 """ 237 random(size=None, dtype=np.float64, out=None) 238 239 Return random floats in the half-open interval [0.0, 1.0). 240 241 Results are from the "continuous uniform" distribution over the 242 stated interval. To sample :math:`Unif[a, b), b > a` multiply 243 the output of `random` by `(b-a)` and add `a`:: 244 245 (b - a) * random() + a 246 247 Parameters 248 ---------- 249 size : int or tuple of ints, optional 250 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 251 ``m * n * k`` samples are drawn. Default is None, in which case a 252 single value is returned. 253 dtype : dtype, optional 254 Desired dtype of the result, only `float64` and `float32` are supported. 255 Byteorder must be native. The default value is np.float64. 256 out : ndarray, optional 257 Alternative output array in which to place the result. If size is not None, 258 it must have the same shape as the provided size and must match the type of 259 the output values. 260 261 Returns 262 ------- 263 out : float or ndarray of floats 264 Array of random floats of shape `size` (unless ``size=None``, in which 265 case a single float is returned). 266 267 Examples 268 -------- 269 >>> rng = np.random.default_rng() 270 >>> rng.random() 271 0.47108547995356098 # random 272 >>> type(rng.random()) 273 <class 'float'> 274 >>> rng.random((5,)) 275 array([ 0.30220482, 0.86820401, 0.1654503 , 0.11659149, 0.54323428]) # random 276 277 Three-by-two array of random numbers from [-5, 0): 278 279 >>> 5 * rng.random((3, 2)) - 5 280 array([[-3.99149989, -0.52338984], # random 281 [-2.99091858, -0.79479508], 282 [-1.23204345, -1.75224494]]) 283 284 """ 285 cdef double temp 286 _dtype = np.dtype(dtype) 287 if _dtype == np.float64: 288 return double_fill(&random_standard_uniform_fill, &self._bitgen, size, self.lock, out) 289 elif _dtype == np.float32: 290 return float_fill(&random_standard_uniform_fill_f, &self._bitgen, size, self.lock, out) 291 else: 292 raise TypeError('Unsupported dtype %r for random' % _dtype) 293 294 def beta(self, a, b, size=None): 295 """ 296 beta(a, b, size=None) 297 298 Draw samples from a Beta distribution. 299 300 The Beta distribution is a special case of the Dirichlet distribution, 301 and is related to the Gamma distribution. It has the probability 302 distribution function 303 304 .. math:: f(x; a,b) = \\frac{1}{B(\\alpha, \\beta)} x^{\\alpha - 1} 305 (1 - x)^{\\beta - 1}, 306 307 where the normalization, B, is the beta function, 308 309 .. math:: B(\\alpha, \\beta) = \\int_0^1 t^{\\alpha - 1} 310 (1 - t)^{\\beta - 1} dt. 311 312 It is often seen in Bayesian inference and order statistics. 313 314 Parameters 315 ---------- 316 a : float or array_like of floats 317 Alpha, positive (>0). 318 b : float or array_like of floats 319 Beta, positive (>0). 320 size : int or tuple of ints, optional 321 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 322 ``m * n * k`` samples are drawn. If size is ``None`` (default), 323 a single value is returned if ``a`` and ``b`` are both scalars. 324 Otherwise, ``np.broadcast(a, b).size`` samples are drawn. 325 326 Returns 327 ------- 328 out : ndarray or scalar 329 Drawn samples from the parameterized beta distribution. 330 331 """ 332 return cont(&random_beta, &self._bitgen, size, self.lock, 2, 333 a, 'a', CONS_POSITIVE, 334 b, 'b', CONS_POSITIVE, 335 0.0, '', CONS_NONE, None) 336 337 def exponential(self, scale=1.0, size=None): 338 """ 339 exponential(scale=1.0, size=None) 340 341 Draw samples from an exponential distribution. 342 343 Its probability density function is 344 345 .. math:: f(x; \\frac{1}{\\beta}) = \\frac{1}{\\beta} \\exp(-\\frac{x}{\\beta}), 346 347 for ``x > 0`` and 0 elsewhere. :math:`\\beta` is the scale parameter, 348 which is the inverse of the rate parameter :math:`\\lambda = 1/\\beta`. 349 The rate parameter is an alternative, widely used parameterization 350 of the exponential distribution [3]_. 351 352 The exponential distribution is a continuous analogue of the 353 geometric distribution. It describes many common situations, such as 354 the size of raindrops measured over many rainstorms [1]_, or the time 355 between page requests to Wikipedia [2]_. 356 357 Parameters 358 ---------- 359 scale : float or array_like of floats 360 The scale parameter, :math:`\\beta = 1/\\lambda`. Must be 361 non-negative. 362 size : int or tuple of ints, optional 363 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 364 ``m * n * k`` samples are drawn. If size is ``None`` (default), 365 a single value is returned if ``scale`` is a scalar. Otherwise, 366 ``np.array(scale).size`` samples are drawn. 367 368 Returns 369 ------- 370 out : ndarray or scalar 371 Drawn samples from the parameterized exponential distribution. 372 373 References 374 ---------- 375 .. [1] Peyton Z. Peebles Jr., "Probability, Random Variables and 376 Random Signal Principles", 4th ed, 2001, p. 57. 377 .. [2] Wikipedia, "Poisson process", 378 https://en.wikipedia.org/wiki/Poisson_process 379 .. [3] Wikipedia, "Exponential distribution", 380 https://en.wikipedia.org/wiki/Exponential_distribution 381 382 """ 383 return cont(&random_exponential, &self._bitgen, size, self.lock, 1, 384 scale, 'scale', CONS_NON_NEGATIVE, 385 0.0, '', CONS_NONE, 386 0.0, '', CONS_NONE, 387 None) 388 389 def standard_exponential(self, size=None, dtype=np.float64, method=u'zig', out=None): 390 """ 391 standard_exponential(size=None, dtype=np.float64, method='zig', out=None) 392 393 Draw samples from the standard exponential distribution. 394 395 `standard_exponential` is identical to the exponential distribution 396 with a scale parameter of 1. 397 398 Parameters 399 ---------- 400 size : int or tuple of ints, optional 401 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 402 ``m * n * k`` samples are drawn. Default is None, in which case a 403 single value is returned. 404 dtype : dtype, optional 405 Desired dtype of the result, only `float64` and `float32` are supported. 406 Byteorder must be native. The default value is np.float64. 407 method : str, optional 408 Either 'inv' or 'zig'. 'inv' uses the default inverse CDF method. 409 'zig' uses the much faster Ziggurat method of Marsaglia and Tsang. 410 out : ndarray, optional 411 Alternative output array in which to place the result. If size is not None, 412 it must have the same shape as the provided size and must match the type of 413 the output values. 414 415 Returns 416 ------- 417 out : float or ndarray 418 Drawn samples. 419 420 Examples 421 -------- 422 Output a 3x8000 array: 423 424 >>> n = np.random.default_rng().standard_exponential((3, 8000)) 425 426 """ 427 _dtype = np.dtype(dtype) 428 if _dtype == np.float64: 429 if method == u'zig': 430 return double_fill(&random_standard_exponential_fill, &self._bitgen, size, self.lock, out) 431 else: 432 return double_fill(&random_standard_exponential_inv_fill, &self._bitgen, size, self.lock, out) 433 elif _dtype == np.float32: 434 if method == u'zig': 435 return float_fill(&random_standard_exponential_fill_f, &self._bitgen, size, self.lock, out) 436 else: 437 return float_fill(&random_standard_exponential_inv_fill_f, &self._bitgen, size, self.lock, out) 438 else: 439 raise TypeError('Unsupported dtype %r for standard_exponential' 440 % _dtype) 441 442 def integers(self, low, high=None, size=None, dtype=np.int64, endpoint=False): 443 """ 444 integers(low, high=None, size=None, dtype=np.int64, endpoint=False) 445 446 Return random integers from `low` (inclusive) to `high` (exclusive), or 447 if endpoint=True, `low` (inclusive) to `high` (inclusive). Replaces 448 `RandomState.randint` (with endpoint=False) and 449 `RandomState.random_integers` (with endpoint=True) 450 451 Return random integers from the "discrete uniform" distribution of 452 the specified dtype. If `high` is None (the default), then results are 453 from 0 to `low`. 454 455 Parameters 456 ---------- 457 low : int or array-like of ints 458 Lowest (signed) integers to be drawn from the distribution (unless 459 ``high=None``, in which case this parameter is 0 and this value is 460 used for `high`). 461 high : int or array-like of ints, optional 462 If provided, one above the largest (signed) integer to be drawn 463 from the distribution (see above for behavior if ``high=None``). 464 If array-like, must contain integer values 465 size : int or tuple of ints, optional 466 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 467 ``m * n * k`` samples are drawn. Default is None, in which case a 468 single value is returned. 469 dtype : dtype, optional 470 Desired dtype of the result. Byteorder must be native. 471 The default value is np.int64. 472 endpoint : bool, optional 473 If true, sample from the interval [low, high] instead of the 474 default [low, high) 475 Defaults to False 476 477 Returns 478 ------- 479 out : int or ndarray of ints 480 `size`-shaped array of random integers from the appropriate 481 distribution, or a single such random int if `size` not provided. 482 483 Notes 484 ----- 485 When using broadcasting with uint64 dtypes, the maximum value (2**64) 486 cannot be represented as a standard integer type. The high array (or 487 low if high is None) must have object dtype, e.g., array([2**64]). 488 489 Examples 490 -------- 491 >>> rng = np.random.default_rng() 492 >>> rng.integers(2, size=10) 493 array([1, 0, 0, 0, 1, 1, 0, 0, 1, 0]) # random 494 >>> rng.integers(1, size=10) 495 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) 496 497 Generate a 2 x 4 array of ints between 0 and 4, inclusive: 498 499 >>> rng.integers(5, size=(2, 4)) 500 array([[4, 0, 2, 1], 501 [3, 2, 2, 0]]) # random 502 503 Generate a 1 x 3 array with 3 different upper bounds 504 505 >>> rng.integers(1, [3, 5, 10]) 506 array([2, 2, 9]) # random 507 508 Generate a 1 by 3 array with 3 different lower bounds 509 510 >>> rng.integers([1, 5, 7], 10) 511 array([9, 8, 7]) # random 512 513 Generate a 2 by 4 array using broadcasting with dtype of uint8 514 515 >>> rng.integers([1, 3, 5, 7], [[10], [20]], dtype=np.uint8) 516 array([[ 8, 6, 9, 7], 517 [ 1, 16, 9, 12]], dtype=uint8) # random 518 519 References 520 ---------- 521 .. [1] Daniel Lemire., "Fast Random Integer Generation in an Interval", 522 ACM Transactions on Modeling and Computer Simulation 29 (1), 2019, 523 http://arxiv.org/abs/1805.10941. 524 525 """ 526 if high is None: 527 high = low 528 low = 0 529 530 _dtype = np.dtype(dtype) 531 532 # Implementation detail: the old API used a masked method to generate 533 # bounded uniform integers. Lemire's method is preferable since it is 534 # faster. randomgen allows a choice, we will always use the faster one. 535 cdef bint _masked = False 536 537 if _dtype == np.int32: 538 ret = _rand_int32(low, high, size, _masked, endpoint, &self._bitgen, self.lock) 539 elif _dtype == np.int64: 540 ret = _rand_int64(low, high, size, _masked, endpoint, &self._bitgen, self.lock) 541 elif _dtype == np.int16: 542 ret = _rand_int16(low, high, size, _masked, endpoint, &self._bitgen, self.lock) 543 elif _dtype == np.int8: 544 ret = _rand_int8(low, high, size, _masked, endpoint, &self._bitgen, self.lock) 545 elif _dtype == np.uint64: 546 ret = _rand_uint64(low, high, size, _masked, endpoint, &self._bitgen, self.lock) 547 elif _dtype == np.uint32: 548 ret = _rand_uint32(low, high, size, _masked, endpoint, &self._bitgen, self.lock) 549 elif _dtype == np.uint16: 550 ret = _rand_uint16(low, high, size, _masked, endpoint, &self._bitgen, self.lock) 551 elif _dtype == np.uint8: 552 ret = _rand_uint8(low, high, size, _masked, endpoint, &self._bitgen, self.lock) 553 elif _dtype == np.bool_: 554 ret = _rand_bool(low, high, size, _masked, endpoint, &self._bitgen, self.lock) 555 elif not _dtype.isnative: 556 raise ValueError('Providing a dtype with a non-native byteorder ' 557 'is not supported. If you require ' 558 'platform-independent byteorder, call byteswap ' 559 'when required.') 560 else: 561 raise TypeError('Unsupported dtype %r for integers' % _dtype) 562 563 564 if size is None and dtype in (bool, int, np.compat.long): 565 if np.array(ret).shape == (): 566 return dtype(ret) 567 return ret 568 569 def bytes(self, np.npy_intp length): 570 """ 571 bytes(length) 572 573 Return random bytes. 574 575 Parameters 576 ---------- 577 length : int 578 Number of random bytes. 579 580 Returns 581 ------- 582 out : str 583 String of length `length`. 584 585 Examples 586 -------- 587 >>> np.random.default_rng().bytes(10) 588 ' eh\\x85\\x022SZ\\xbf\\xa4' #random 589 590 """ 591 cdef Py_ssize_t n_uint32 = ((length - 1) // 4 + 1) 592 # Interpret the uint32s as little-endian to convert them to bytes 593 # consistently. 594 return self.integers(0, 4294967296, size=n_uint32, 595 dtype=np.uint32).astype('<u4').tobytes()[:length] 596 597 @cython.wraparound(True) 598 def choice(self, a, size=None, replace=True, p=None, axis=0, bint shuffle=True): 599 """ 600 choice(a, size=None, replace=True, p=None, axis=0, shuffle=True) 601 602 Generates a random sample from a given 1-D array 603 604 Parameters 605 ---------- 606 a : {array_like, int} 607 If an ndarray, a random sample is generated from its elements. 608 If an int, the random sample is generated from np.arange(a). 609 size : {int, tuple[int]}, optional 610 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 611 ``m * n * k`` samples are drawn from the 1-d `a`. If `a` has more 612 than one dimension, the `size` shape will be inserted into the 613 `axis` dimension, so the output ``ndim`` will be ``a.ndim - 1 + 614 len(size)``. Default is None, in which case a single value is 615 returned. 616 replace : bool, optional 617 Whether the sample is with or without replacement 618 p : 1-D array_like, optional 619 The probabilities associated with each entry in a. 620 If not given the sample assumes a uniform distribution over all 621 entries in a. 622 axis : int, optional 623 The axis along which the selection is performed. The default, 0, 624 selects by row. 625 shuffle : bool, optional 626 Whether the sample is shuffled when sampling without replacement. 627 Default is True, False provides a speedup. 628 629 Returns 630 ------- 631 samples : single item or ndarray 632 The generated random samples 633 634 Raises 635 ------ 636 ValueError 637 If a is an int and less than zero, if p is not 1-dimensional, if 638 a is array-like with a size 0, if p is not a vector of 639 probabilities, if a and p have different lengths, or if 640 replace=False and the sample size is greater than the population 641 size. 642 643 See Also 644 -------- 645 integers, shuffle, permutation 646 647 Examples 648 -------- 649 Generate a uniform random sample from np.arange(5) of size 3: 650 651 >>> rng = np.random.default_rng() 652 >>> rng.choice(5, 3) 653 array([0, 3, 4]) # random 654 >>> #This is equivalent to rng.integers(0,5,3) 655 656 Generate a non-uniform random sample from np.arange(5) of size 3: 657 658 >>> rng.choice(5, 3, p=[0.1, 0, 0.3, 0.6, 0]) 659 array([3, 3, 0]) # random 660 661 Generate a uniform random sample from np.arange(5) of size 3 without 662 replacement: 663 664 >>> rng.choice(5, 3, replace=False) 665 array([3,1,0]) # random 666 >>> #This is equivalent to rng.permutation(np.arange(5))[:3] 667 668 Generate a non-uniform random sample from np.arange(5) of size 669 3 without replacement: 670 671 >>> rng.choice(5, 3, replace=False, p=[0.1, 0, 0.3, 0.6, 0]) 672 array([2, 3, 0]) # random 673 674 Any of the above can be repeated with an arbitrary array-like 675 instead of just integers. For instance: 676 677 >>> aa_milne_arr = ['pooh', 'rabbit', 'piglet', 'Christopher'] 678 >>> rng.choice(aa_milne_arr, 5, p=[0.5, 0.1, 0.1, 0.3]) 679 array(['pooh', 'pooh', 'pooh', 'Christopher', 'piglet'], # random 680 dtype='<U11') 681 682 """ 683 684 cdef int64_t val, t, loc, size_i, pop_size_i 685 cdef int64_t *idx_data 686 cdef np.npy_intp j 687 cdef uint64_t set_size, mask 688 cdef uint64_t[::1] hash_set 689 # Format and Verify input 690 a = np.array(a, copy=False) 691 if a.ndim == 0: 692 try: 693 # __index__ must return an integer by python rules. 694 pop_size = operator.index(a.item()) 695 except TypeError: 696 raise ValueError("a must an array or an integer") 697 if pop_size <= 0 and np.prod(size) != 0: 698 raise ValueError("a must be a positive integer unless no" 699 "samples are taken") 700 else: 701 pop_size = a.shape[axis] 702 if pop_size == 0 and np.prod(size) != 0: 703 raise ValueError("a cannot be empty unless no samples are" 704 "taken") 705 706 if p is not None: 707 d = len(p) 708 709 atol = np.sqrt(np.finfo(np.float64).eps) 710 if isinstance(p, np.ndarray): 711 if np.issubdtype(p.dtype, np.floating): 712 atol = max(atol, np.sqrt(np.finfo(p.dtype).eps)) 713 714 p = <np.ndarray>np.PyArray_FROM_OTF( 715 p, np.NPY_DOUBLE, np.NPY_ALIGNED | np.NPY_ARRAY_C_CONTIGUOUS) 716 pix = <double*>np.PyArray_DATA(p) 717 718 if p.ndim != 1: 719 raise ValueError("p must be 1-dimensional") 720 if p.size != pop_size: 721 raise ValueError("a and p must have same size") 722 p_sum = kahan_sum(pix, d) 723 if np.isnan(p_sum): 724 raise ValueError("probabilities contain NaN") 725 if np.logical_or.reduce(p < 0): 726 raise ValueError("probabilities are not non-negative") 727 if abs(p_sum - 1.) > atol: 728 raise ValueError("probabilities do not sum to 1") 729 730 # `shape == None` means `shape == ()`, but with scalar unpacking at the 731 # end 732 is_scalar = size is None 733 if not is_scalar: 734 shape = size 735 size = np.prod(shape, dtype=np.intp) 736 else: 737 shape = () 738 size = 1 739 740 # Actual sampling 741 if replace: 742 if p is not None: 743 cdf = p.cumsum() 744 cdf /= cdf[-1] 745 uniform_samples = self.random(shape) 746 idx = cdf.searchsorted(uniform_samples, side='right') 747 # searchsorted returns a scalar 748 idx = np.array(idx, copy=False, dtype=np.int64) 749 else: 750 idx = self.integers(0, pop_size, size=shape, dtype=np.int64) 751 else: 752 if size > pop_size: 753 raise ValueError("Cannot take a larger sample than " 754 "population when replace is False") 755 elif size < 0: 756 raise ValueError("negative dimensions are not allowed") 757 758 if p is not None: 759 if np.count_nonzero(p > 0) < size: 760 raise ValueError("Fewer non-zero entries in p than size") 761 n_uniq = 0 762 p = p.copy() 763 found = np.zeros(shape, dtype=np.int64) 764 flat_found = found.ravel() 765 while n_uniq < size: 766 x = self.random((size - n_uniq,)) 767 if n_uniq > 0: 768 p[flat_found[0:n_uniq]] = 0 769 cdf = np.cumsum(p) 770 cdf /= cdf[-1] 771 new = cdf.searchsorted(x, side='right') 772 _, unique_indices = np.unique(new, return_index=True) 773 unique_indices.sort() 774 new = new.take(unique_indices) 775 flat_found[n_uniq:n_uniq + new.size] = new 776 n_uniq += new.size 777 idx = found 778 else: 779 size_i = size 780 pop_size_i = pop_size 781 # This is a heuristic tuning. should be improvable 782 if shuffle: 783 cutoff = 50 784 else: 785 cutoff = 20 786 if pop_size_i > 10000 and (size_i > (pop_size_i // cutoff)): 787 # Tail shuffle size elements 788 idx = np.PyArray_Arange(0, pop_size_i, 1, np.NPY_INT64) 789 idx_data = <int64_t*>(<np.ndarray>idx).data 790 with self.lock, nogil: 791 _shuffle_int(&self._bitgen, pop_size_i, 792 max(pop_size_i - size_i, 1), idx_data) 793 # Copy to allow potentially large array backing idx to be gc 794 idx = idx[(pop_size - size):].copy() 795 else: 796 # Floyd's algorithm 797 idx = np.empty(size, dtype=np.int64) 798 idx_data = <int64_t*>np.PyArray_DATA(<np.ndarray>idx) 799 # smallest power of 2 larger than 1.2 * size 800 set_size = <uint64_t>(1.2 * size_i) 801 mask = _gen_mask(set_size) 802 set_size = 1 + mask 803 hash_set = np.full(set_size, <uint64_t>-1, np.uint64) 804 with self.lock, cython.wraparound(False), nogil: 805 for j in range(pop_size_i - size_i, pop_size_i): 806 val = random_bounded_uint64(&self._bitgen, 0, j, 0, 0) 807 loc = val & mask 808 while hash_set[loc] != <uint64_t>-1 and hash_set[loc] != <uint64_t>val: 809 loc = (loc + 1) & mask 810 if hash_set[loc] == <uint64_t>-1: # then val not in hash_set 811 hash_set[loc] = val 812 idx_data[j - pop_size_i + size_i] = val 813 else: # we need to insert j instead 814 loc = j & mask 815 while hash_set[loc] != <uint64_t>-1: 816 loc = (loc + 1) & mask 817 hash_set[loc] = j 818 idx_data[j - pop_size_i + size_i] = j 819 if shuffle: 820 _shuffle_int(&self._bitgen, size_i, 1, idx_data) 821 idx.shape = shape 822 823 if is_scalar and isinstance(idx, np.ndarray): 824 # In most cases a scalar will have been made an array 825 idx = idx.item(0) 826 827 # Use samples as indices for a if a is array-like 828 if a.ndim == 0: 829 return idx 830 831 if not is_scalar and idx.ndim == 0: 832 # If size == () then the user requested a 0-d array as opposed to 833 # a scalar object when size is None. However a[idx] is always a 834 # scalar and not an array. So this makes sure the result is an 835 # array, taking into account that np.array(item) may not work 836 # for object arrays. 837 res = np.empty((), dtype=a.dtype) 838 res[()] = a[idx] 839 return res 840 841 # asarray downcasts on 32-bit platforms, always safe 842 # no-op on 64-bit platforms 843 return a.take(np.asarray(idx, dtype=np.intp), axis=axis) 844 845 def uniform(self, low=0.0, high=1.0, size=None): 846 """ 847 uniform(low=0.0, high=1.0, size=None) 848 849 Draw samples from a uniform distribution. 850 851 Samples are uniformly distributed over the half-open interval 852 ``[low, high)`` (includes low, but excludes high). In other words, 853 any value within the given interval is equally likely to be drawn 854 by `uniform`. 855 856 Parameters 857 ---------- 858 low : float or array_like of floats, optional 859 Lower boundary of the output interval. All values generated will be 860 greater than or equal to low. The default value is 0. 861 high : float or array_like of floats 862 Upper boundary of the output interval. All values generated will be 863 less than high. The default value is 1.0. 864 size : int or tuple of ints, optional 865 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 866 ``m * n * k`` samples are drawn. If size is ``None`` (default), 867 a single value is returned if ``low`` and ``high`` are both scalars. 868 Otherwise, ``np.broadcast(low, high).size`` samples are drawn. 869 870 Returns 871 ------- 872 out : ndarray or scalar 873 Drawn samples from the parameterized uniform distribution. 874 875 See Also 876 -------- 877 integers : Discrete uniform distribution, yielding integers. 878 random : Floats uniformly distributed over ``[0, 1)``. 879 880 Notes 881 ----- 882 The probability density function of the uniform distribution is 883 884 .. math:: p(x) = \\frac{1}{b - a} 885 886 anywhere within the interval ``[a, b)``, and zero elsewhere. 887 888 When ``high`` == ``low``, values of ``low`` will be returned. 889 If ``high`` < ``low``, the results are officially undefined 890 and may eventually raise an error, i.e. do not rely on this 891 function to behave when passed arguments satisfying that 892 inequality condition. 893 894 Examples 895 -------- 896 Draw samples from the distribution: 897 898 >>> s = np.random.default_rng().uniform(-1,0,1000) 899 900 All values are within the given interval: 901 902 >>> np.all(s >= -1) 903 True 904 >>> np.all(s < 0) 905 True 906 907 Display the histogram of the samples, along with the 908 probability density function: 909 910 >>> import matplotlib.pyplot as plt 911 >>> count, bins, ignored = plt.hist(s, 15, density=True) 912 >>> plt.plot(bins, np.ones_like(bins), linewidth=2, color='r') 913 >>> plt.show() 914 915 """ 916 cdef bint is_scalar = True 917 cdef np.ndarray alow, ahigh, arange 918 cdef double _low, _high, range 919 cdef object temp 920 921 alow = <np.ndarray>np.PyArray_FROM_OTF(low, np.NPY_DOUBLE, np.NPY_ALIGNED) 922 ahigh = <np.ndarray>np.PyArray_FROM_OTF(high, np.NPY_DOUBLE, np.NPY_ALIGNED) 923 924 if np.PyArray_NDIM(alow) == np.PyArray_NDIM(ahigh) == 0: 925 _low = PyFloat_AsDouble(low) 926 _high = PyFloat_AsDouble(high) 927 range = _high - _low 928 if not np.isfinite(range): 929 raise OverflowError('Range exceeds valid bounds') 930 931 return cont(&random_uniform, &self._bitgen, size, self.lock, 2, 932 _low, '', CONS_NONE, 933 range, '', CONS_NONE, 934 0.0, '', CONS_NONE, 935 None) 936 937 temp = np.subtract(ahigh, alow) 938 # needed to get around Pyrex's automatic reference-counting 939 # rules because EnsureArray steals a reference 940 Py_INCREF(temp) 941 942 arange = <np.ndarray>np.PyArray_EnsureArray(temp) 943 if not np.all(np.isfinite(arange)): 944 raise OverflowError('Range exceeds valid bounds') 945 return cont(&random_uniform, &self._bitgen, size, self.lock, 2, 946 alow, '', CONS_NONE, 947 arange, '', CONS_NONE, 948 0.0, '', CONS_NONE, 949 None) 950 951 # Complicated, continuous distributions: 952 def standard_normal(self, size=None, dtype=np.float64, out=None): 953 """ 954 standard_normal(size=None, dtype=np.float64, out=None) 955 956 Draw samples from a standard Normal distribution (mean=0, stdev=1). 957 958 Parameters 959 ---------- 960 size : int or tuple of ints, optional 961 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 962 ``m * n * k`` samples are drawn. Default is None, in which case a 963 single value is returned. 964 dtype : dtype, optional 965 Desired dtype of the result, only `float64` and `float32` are supported. 966 Byteorder must be native. The default value is np.float64. 967 out : ndarray, optional 968 Alternative output array in which to place the result. If size is not None, 969 it must have the same shape as the provided size and must match the type of 970 the output values. 971 972 Returns 973 ------- 974 out : float or ndarray 975 A floating-point array of shape ``size`` of drawn samples, or a 976 single sample if ``size`` was not specified. 977 978 See Also 979 -------- 980 normal : 981 Equivalent function with additional ``loc`` and ``scale`` arguments 982 for setting the mean and standard deviation. 983 984 Notes 985 ----- 986 For random samples from :math:`N(\\mu, \\sigma^2)`, use one of:: 987 988 mu + sigma * gen.standard_normal(size=...) 989 gen.normal(mu, sigma, size=...) 990 991 Examples 992 -------- 993 >>> rng = np.random.default_rng() 994 >>> rng.standard_normal() 995 2.1923875335537315 #random 996 997 >>> s = rng.standard_normal(8000) 998 >>> s 999 array([ 0.6888893 , 0.78096262, -0.89086505, ..., 0.49876311, # random 1000 -0.38672696, -0.4685006 ]) # random 1001 >>> s.shape 1002 (8000,) 1003 >>> s = rng.standard_normal(size=(3, 4, 2)) 1004 >>> s.shape 1005 (3, 4, 2) 1006 1007 Two-by-four array of samples from :math:`N(3, 6.25)`: 1008 1009 >>> 3 + 2.5 * rng.standard_normal(size=(2, 4)) 1010 array([[-4.49401501, 4.00950034, -1.81814867, 7.29718677], # random 1011 [ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) # random 1012 1013 """ 1014 _dtype = np.dtype(dtype) 1015 if _dtype == np.float64: 1016 return double_fill(&random_standard_normal_fill, &self._bitgen, size, self.lock, out) 1017 elif _dtype == np.float32: 1018 return float_fill(&random_standard_normal_fill_f, &self._bitgen, size, self.lock, out) 1019 else: 1020 raise TypeError('Unsupported dtype %r for standard_normal' % _dtype) 1021 1022 def normal(self, loc=0.0, scale=1.0, size=None): 1023 """ 1024 normal(loc=0.0, scale=1.0, size=None) 1025 1026 Draw random samples from a normal (Gaussian) distribution. 1027 1028 The probability density function of the normal distribution, first 1029 derived by De Moivre and 200 years later by both Gauss and Laplace 1030 independently [2]_, is often called the bell curve because of 1031 its characteristic shape (see the example below). 1032 1033 The normal distributions occurs often in nature. For example, it 1034 describes the commonly occurring distribution of samples influenced 1035 by a large number of tiny, random disturbances, each with its own 1036 unique distribution [2]_. 1037 1038 Parameters 1039 ---------- 1040 loc : float or array_like of floats 1041 Mean ("centre") of the distribution. 1042 scale : float or array_like of floats 1043 Standard deviation (spread or "width") of the distribution. Must be 1044 non-negative. 1045 size : int or tuple of ints, optional 1046 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1047 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1048 a single value is returned if ``loc`` and ``scale`` are both scalars. 1049 Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn. 1050 1051 Returns 1052 ------- 1053 out : ndarray or scalar 1054 Drawn samples from the parameterized normal distribution. 1055 1056 See Also 1057 -------- 1058 scipy.stats.norm : probability density function, distribution or 1059 cumulative density function, etc. 1060 1061 Notes 1062 ----- 1063 The probability density for the Gaussian distribution is 1064 1065 .. math:: p(x) = \\frac{1}{\\sqrt{ 2 \\pi \\sigma^2 }} 1066 e^{ - \\frac{ (x - \\mu)^2 } {2 \\sigma^2} }, 1067 1068 where :math:`\\mu` is the mean and :math:`\\sigma` the standard 1069 deviation. The square of the standard deviation, :math:`\\sigma^2`, 1070 is called the variance. 1071 1072 The function has its peak at the mean, and its "spread" increases with 1073 the standard deviation (the function reaches 0.607 times its maximum at 1074 :math:`x + \\sigma` and :math:`x - \\sigma` [2]_). This implies that 1075 :meth:`normal` is more likely to return samples lying close to the 1076 mean, rather than those far away. 1077 1078 References 1079 ---------- 1080 .. [1] Wikipedia, "Normal distribution", 1081 https://en.wikipedia.org/wiki/Normal_distribution 1082 .. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability, 1083 Random Variables and Random Signal Principles", 4th ed., 2001, 1084 pp. 51, 51, 125. 1085 1086 Examples 1087 -------- 1088 Draw samples from the distribution: 1089 1090 >>> mu, sigma = 0, 0.1 # mean and standard deviation 1091 >>> s = np.random.default_rng().normal(mu, sigma, 1000) 1092 1093 Verify the mean and the variance: 1094 1095 >>> abs(mu - np.mean(s)) 1096 0.0 # may vary 1097 1098 >>> abs(sigma - np.std(s, ddof=1)) 1099 0.1 # may vary 1100 1101 Display the histogram of the samples, along with 1102 the probability density function: 1103 1104 >>> import matplotlib.pyplot as plt 1105 >>> count, bins, ignored = plt.hist(s, 30, density=True) 1106 >>> plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * 1107 ... np.exp( - (bins - mu)**2 / (2 * sigma**2) ), 1108 ... linewidth=2, color='r') 1109 >>> plt.show() 1110 1111 Two-by-four array of samples from N(3, 6.25): 1112 1113 >>> np.random.default_rng().normal(3, 2.5, size=(2, 4)) 1114 array([[-4.49401501, 4.00950034, -1.81814867, 7.29718677], # random 1115 [ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) # random 1116 1117 """ 1118 return cont(&random_normal, &self._bitgen, size, self.lock, 2, 1119 loc, '', CONS_NONE, 1120 scale, 'scale', CONS_NON_NEGATIVE, 1121 0.0, '', CONS_NONE, 1122 None) 1123 1124 def standard_gamma(self, shape, size=None, dtype=np.float64, out=None): 1125 """ 1126 standard_gamma(shape, size=None, dtype=np.float64, out=None) 1127 1128 Draw samples from a standard Gamma distribution. 1129 1130 Samples are drawn from a Gamma distribution with specified parameters, 1131 shape (sometimes designated "k") and scale=1. 1132 1133 Parameters 1134 ---------- 1135 shape : float or array_like of floats 1136 Parameter, must be non-negative. 1137 size : int or tuple of ints, optional 1138 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1139 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1140 a single value is returned if ``shape`` is a scalar. Otherwise, 1141 ``np.array(shape).size`` samples are drawn. 1142 dtype : dtype, optional 1143 Desired dtype of the result, only `float64` and `float32` are supported. 1144 Byteorder must be native. The default value is np.float64. 1145 out : ndarray, optional 1146 Alternative output array in which to place the result. If size is 1147 not None, it must have the same shape as the provided size and 1148 must match the type of the output values. 1149 1150 Returns 1151 ------- 1152 out : ndarray or scalar 1153 Drawn samples from the parameterized standard gamma distribution. 1154 1155 See Also 1156 -------- 1157 scipy.stats.gamma : probability density function, distribution or 1158 cumulative density function, etc. 1159 1160 Notes 1161 ----- 1162 The probability density for the Gamma distribution is 1163 1164 .. math:: p(x) = x^{k-1}\\frac{e^{-x/\\theta}}{\\theta^k\\Gamma(k)}, 1165 1166 where :math:`k` is the shape and :math:`\\theta` the scale, 1167 and :math:`\\Gamma` is the Gamma function. 1168 1169 The Gamma distribution is often used to model the times to failure of 1170 electronic components, and arises naturally in processes for which the 1171 waiting times between Poisson distributed events are relevant. 1172 1173 References 1174 ---------- 1175 .. [1] Weisstein, Eric W. "Gamma Distribution." From MathWorld--A 1176 Wolfram Web Resource. 1177 http://mathworld.wolfram.com/GammaDistribution.html 1178 .. [2] Wikipedia, "Gamma distribution", 1179 https://en.wikipedia.org/wiki/Gamma_distribution 1180 1181 Examples 1182 -------- 1183 Draw samples from the distribution: 1184 1185 >>> shape, scale = 2., 1. # mean and width 1186 >>> s = np.random.default_rng().standard_gamma(shape, 1000000) 1187 1188 Display the histogram of the samples, along with 1189 the probability density function: 1190 1191 >>> import matplotlib.pyplot as plt 1192 >>> import scipy.special as sps # doctest: +SKIP 1193 >>> count, bins, ignored = plt.hist(s, 50, density=True) 1194 >>> y = bins**(shape-1) * ((np.exp(-bins/scale))/ # doctest: +SKIP 1195 ... (sps.gamma(shape) * scale**shape)) 1196 >>> plt.plot(bins, y, linewidth=2, color='r') # doctest: +SKIP 1197 >>> plt.show() 1198 1199 """ 1200 cdef void *func 1201 _dtype = np.dtype(dtype) 1202 if _dtype == np.float64: 1203 return cont(&random_standard_gamma, &self._bitgen, size, self.lock, 1, 1204 shape, 'shape', CONS_NON_NEGATIVE, 1205 0.0, '', CONS_NONE, 1206 0.0, '', CONS_NONE, 1207 out) 1208 if _dtype == np.float32: 1209 return cont_f(&random_standard_gamma_f, &self._bitgen, size, self.lock, 1210 shape, 'shape', CONS_NON_NEGATIVE, 1211 out) 1212 else: 1213 raise TypeError('Unsupported dtype %r for standard_gamma' % _dtype) 1214 1215 def gamma(self, shape, scale=1.0, size=None): 1216 """ 1217 gamma(shape, scale=1.0, size=None) 1218 1219 Draw samples from a Gamma distribution. 1220 1221 Samples are drawn from a Gamma distribution with specified parameters, 1222 `shape` (sometimes designated "k") and `scale` (sometimes designated 1223 "theta"), where both parameters are > 0. 1224 1225 Parameters 1226 ---------- 1227 shape : float or array_like of floats 1228 The shape of the gamma distribution. Must be non-negative. 1229 scale : float or array_like of floats, optional 1230 The scale of the gamma distribution. Must be non-negative. 1231 Default is equal to 1. 1232 size : int or tuple of ints, optional 1233 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1234 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1235 a single value is returned if ``shape`` and ``scale`` are both scalars. 1236 Otherwise, ``np.broadcast(shape, scale).size`` samples are drawn. 1237 1238 Returns 1239 ------- 1240 out : ndarray or scalar 1241 Drawn samples from the parameterized gamma distribution. 1242 1243 See Also 1244 -------- 1245 scipy.stats.gamma : probability density function, distribution or 1246 cumulative density function, etc. 1247 1248 Notes 1249 ----- 1250 The probability density for the Gamma distribution is 1251 1252 .. math:: p(x) = x^{k-1}\\frac{e^{-x/\\theta}}{\\theta^k\\Gamma(k)}, 1253 1254 where :math:`k` is the shape and :math:`\\theta` the scale, 1255 and :math:`\\Gamma` is the Gamma function. 1256 1257 The Gamma distribution is often used to model the times to failure of 1258 electronic components, and arises naturally in processes for which the 1259 waiting times between Poisson distributed events are relevant. 1260 1261 References 1262 ---------- 1263 .. [1] Weisstein, Eric W. "Gamma Distribution." From MathWorld--A 1264 Wolfram Web Resource. 1265 http://mathworld.wolfram.com/GammaDistribution.html 1266 .. [2] Wikipedia, "Gamma distribution", 1267 https://en.wikipedia.org/wiki/Gamma_distribution 1268 1269 Examples 1270 -------- 1271 Draw samples from the distribution: 1272 1273 >>> shape, scale = 2., 2. # mean=4, std=2*sqrt(2) 1274 >>> s = np.random.default_rng().gamma(shape, scale, 1000) 1275 1276 Display the histogram of the samples, along with 1277 the probability density function: 1278 1279 >>> import matplotlib.pyplot as plt 1280 >>> import scipy.special as sps # doctest: +SKIP 1281 >>> count, bins, ignored = plt.hist(s, 50, density=True) 1282 >>> y = bins**(shape-1)*(np.exp(-bins/scale) / # doctest: +SKIP 1283 ... (sps.gamma(shape)*scale**shape)) 1284 >>> plt.plot(bins, y, linewidth=2, color='r') # doctest: +SKIP 1285 >>> plt.show() 1286 1287 """ 1288 return cont(&random_gamma, &self._bitgen, size, self.lock, 2, 1289 shape, 'shape', CONS_NON_NEGATIVE, 1290 scale, 'scale', CONS_NON_NEGATIVE, 1291 0.0, '', CONS_NONE, None) 1292 1293 def f(self, dfnum, dfden, size=None): 1294 """ 1295 f(dfnum, dfden, size=None) 1296 1297 Draw samples from an F distribution. 1298 1299 Samples are drawn from an F distribution with specified parameters, 1300 `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of 1301 freedom in denominator), where both parameters must be greater than 1302 zero. 1303 1304 The random variate of the F distribution (also known as the 1305 Fisher distribution) is a continuous probability distribution 1306 that arises in ANOVA tests, and is the ratio of two chi-square 1307 variates. 1308 1309 Parameters 1310 ---------- 1311 dfnum : float or array_like of floats 1312 Degrees of freedom in numerator, must be > 0. 1313 dfden : float or array_like of float 1314 Degrees of freedom in denominator, must be > 0. 1315 size : int or tuple of ints, optional 1316 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1317 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1318 a single value is returned if ``dfnum`` and ``dfden`` are both scalars. 1319 Otherwise, ``np.broadcast(dfnum, dfden).size`` samples are drawn. 1320 1321 Returns 1322 ------- 1323 out : ndarray or scalar 1324 Drawn samples from the parameterized Fisher distribution. 1325 1326 See Also 1327 -------- 1328 scipy.stats.f : probability density function, distribution or 1329 cumulative density function, etc. 1330 1331 Notes 1332 ----- 1333 The F statistic is used to compare in-group variances to between-group 1334 variances. Calculating the distribution depends on the sampling, and 1335 so it is a function of the respective degrees of freedom in the 1336 problem. The variable `dfnum` is the number of samples minus one, the 1337 between-groups degrees of freedom, while `dfden` is the within-groups 1338 degrees of freedom, the sum of the number of samples in each group 1339 minus the number of groups. 1340 1341 References 1342 ---------- 1343 .. [1] Glantz, Stanton A. "Primer of Biostatistics.", McGraw-Hill, 1344 Fifth Edition, 2002. 1345 .. [2] Wikipedia, "F-distribution", 1346 https://en.wikipedia.org/wiki/F-distribution 1347 1348 Examples 1349 -------- 1350 An example from Glantz[1], pp 47-40: 1351 1352 Two groups, children of diabetics (25 people) and children from people 1353 without diabetes (25 controls). Fasting blood glucose was measured, 1354 case group had a mean value of 86.1, controls had a mean value of 1355 82.2. Standard deviations were 2.09 and 2.49 respectively. Are these 1356 data consistent with the null hypothesis that the parents diabetic 1357 status does not affect their children's blood glucose levels? 1358 Calculating the F statistic from the data gives a value of 36.01. 1359 1360 Draw samples from the distribution: 1361 1362 >>> dfnum = 1. # between group degrees of freedom 1363 >>> dfden = 48. # within groups degrees of freedom 1364 >>> s = np.random.default_rng().f(dfnum, dfden, 1000) 1365 1366 The lower bound for the top 1% of the samples is : 1367 1368 >>> np.sort(s)[-10] 1369 7.61988120985 # random 1370 1371 So there is about a 1% chance that the F statistic will exceed 7.62, 1372 the measured value is 36, so the null hypothesis is rejected at the 1% 1373 level. 1374 1375 """ 1376 return cont(&random_f, &self._bitgen, size, self.lock, 2, 1377 dfnum, 'dfnum', CONS_POSITIVE, 1378 dfden, 'dfden', CONS_POSITIVE, 1379 0.0, '', CONS_NONE, None) 1380 1381 def noncentral_f(self, dfnum, dfden, nonc, size=None): 1382 """ 1383 noncentral_f(dfnum, dfden, nonc, size=None) 1384 1385 Draw samples from the noncentral F distribution. 1386 1387 Samples are drawn from an F distribution with specified parameters, 1388 `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of 1389 freedom in denominator), where both parameters > 1. 1390 `nonc` is the non-centrality parameter. 1391 1392 Parameters 1393 ---------- 1394 dfnum : float or array_like of floats 1395 Numerator degrees of freedom, must be > 0. 1396 1397 .. versionchanged:: 1.14.0 1398 Earlier NumPy versions required dfnum > 1. 1399 dfden : float or array_like of floats 1400 Denominator degrees of freedom, must be > 0. 1401 nonc : float or array_like of floats 1402 Non-centrality parameter, the sum of the squares of the numerator 1403 means, must be >= 0. 1404 size : int or tuple of ints, optional 1405 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1406 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1407 a single value is returned if ``dfnum``, ``dfden``, and ``nonc`` 1408 are all scalars. Otherwise, ``np.broadcast(dfnum, dfden, nonc).size`` 1409 samples are drawn. 1410 1411 Returns 1412 ------- 1413 out : ndarray or scalar 1414 Drawn samples from the parameterized noncentral Fisher distribution. 1415 1416 Notes 1417 ----- 1418 When calculating the power of an experiment (power = probability of 1419 rejecting the null hypothesis when a specific alternative is true) the 1420 non-central F statistic becomes important. When the null hypothesis is 1421 true, the F statistic follows a central F distribution. When the null 1422 hypothesis is not true, then it follows a non-central F statistic. 1423 1424 References 1425 ---------- 1426 .. [1] Weisstein, Eric W. "Noncentral F-Distribution." 1427 From MathWorld--A Wolfram Web Resource. 1428 http://mathworld.wolfram.com/NoncentralF-Distribution.html 1429 .. [2] Wikipedia, "Noncentral F-distribution", 1430 https://en.wikipedia.org/wiki/Noncentral_F-distribution 1431 1432 Examples 1433 -------- 1434 In a study, testing for a specific alternative to the null hypothesis 1435 requires use of the Noncentral F distribution. We need to calculate the 1436 area in the tail of the distribution that exceeds the value of the F 1437 distribution for the null hypothesis. We'll plot the two probability 1438 distributions for comparison. 1439 1440 >>> rng = np.random.default_rng() 1441 >>> dfnum = 3 # between group deg of freedom 1442 >>> dfden = 20 # within groups degrees of freedom 1443 >>> nonc = 3.0 1444 >>> nc_vals = rng.noncentral_f(dfnum, dfden, nonc, 1000000) 1445 >>> NF = np.histogram(nc_vals, bins=50, density=True) 1446 >>> c_vals = rng.f(dfnum, dfden, 1000000) 1447 >>> F = np.histogram(c_vals, bins=50, density=True) 1448 >>> import matplotlib.pyplot as plt 1449 >>> plt.plot(F[1][1:], F[0]) 1450 >>> plt.plot(NF[1][1:], NF[0]) 1451 >>> plt.show() 1452 1453 """ 1454 return cont(&random_noncentral_f, &self._bitgen, size, self.lock, 3, 1455 dfnum, 'dfnum', CONS_POSITIVE, 1456 dfden, 'dfden', CONS_POSITIVE, 1457 nonc, 'nonc', CONS_NON_NEGATIVE, None) 1458 1459 def chisquare(self, df, size=None): 1460 """ 1461 chisquare(df, size=None) 1462 1463 Draw samples from a chi-square distribution. 1464 1465 When `df` independent random variables, each with standard normal 1466 distributions (mean 0, variance 1), are squared and summed, the 1467 resulting distribution is chi-square (see Notes). This distribution 1468 is often used in hypothesis testing. 1469 1470 Parameters 1471 ---------- 1472 df : float or array_like of floats 1473 Number of degrees of freedom, must be > 0. 1474 size : int or tuple of ints, optional 1475 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1476 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1477 a single value is returned if ``df`` is a scalar. Otherwise, 1478 ``np.array(df).size`` samples are drawn. 1479 1480 Returns 1481 ------- 1482 out : ndarray or scalar 1483 Drawn samples from the parameterized chi-square distribution. 1484 1485 Raises 1486 ------ 1487 ValueError 1488 When `df` <= 0 or when an inappropriate `size` (e.g. ``size=-1``) 1489 is given. 1490 1491 Notes 1492 ----- 1493 The variable obtained by summing the squares of `df` independent, 1494 standard normally distributed random variables: 1495 1496 .. math:: Q = \\sum_{i=0}^{\\mathtt{df}} X^2_i 1497 1498 is chi-square distributed, denoted 1499 1500 .. math:: Q \\sim \\chi^2_k. 1501 1502 The probability density function of the chi-squared distribution is 1503 1504 .. math:: p(x) = \\frac{(1/2)^{k/2}}{\\Gamma(k/2)} 1505 x^{k/2 - 1} e^{-x/2}, 1506 1507 where :math:`\\Gamma` is the gamma function, 1508 1509 .. math:: \\Gamma(x) = \\int_0^{-\\infty} t^{x - 1} e^{-t} dt. 1510 1511 References 1512 ---------- 1513 .. [1] NIST "Engineering Statistics Handbook" 1514 https://www.itl.nist.gov/div898/handbook/eda/section3/eda3666.htm 1515 1516 Examples 1517 -------- 1518 >>> np.random.default_rng().chisquare(2,4) 1519 array([ 1.89920014, 9.00867716, 3.13710533, 5.62318272]) # random 1520 1521 """ 1522 return cont(&random_chisquare, &self._bitgen, size, self.lock, 1, 1523 df, 'df', CONS_POSITIVE, 1524 0.0, '', CONS_NONE, 1525 0.0, '', CONS_NONE, None) 1526 1527 def noncentral_chisquare(self, df, nonc, size=None): 1528 """ 1529 noncentral_chisquare(df, nonc, size=None) 1530 1531 Draw samples from a noncentral chi-square distribution. 1532 1533 The noncentral :math:`\\chi^2` distribution is a generalization of 1534 the :math:`\\chi^2` distribution. 1535 1536 Parameters 1537 ---------- 1538 df : float or array_like of floats 1539 Degrees of freedom, must be > 0. 1540 1541 .. versionchanged:: 1.10.0 1542 Earlier NumPy versions required dfnum > 1. 1543 nonc : float or array_like of floats 1544 Non-centrality, must be non-negative. 1545 size : int or tuple of ints, optional 1546 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1547 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1548 a single value is returned if ``df`` and ``nonc`` are both scalars. 1549 Otherwise, ``np.broadcast(df, nonc).size`` samples are drawn. 1550 1551 Returns 1552 ------- 1553 out : ndarray or scalar 1554 Drawn samples from the parameterized noncentral chi-square distribution. 1555 1556 Notes 1557 ----- 1558 The probability density function for the noncentral Chi-square 1559 distribution is 1560 1561 .. math:: P(x;df,nonc) = \\sum^{\\infty}_{i=0} 1562 \\frac{e^{-nonc/2}(nonc/2)^{i}}{i!} 1563 P_{Y_{df+2i}}(x), 1564 1565 where :math:`Y_{q}` is the Chi-square with q degrees of freedom. 1566 1567 References 1568 ---------- 1569 .. [1] Wikipedia, "Noncentral chi-squared distribution" 1570 https://en.wikipedia.org/wiki/Noncentral_chi-squared_distribution 1571 1572 Examples 1573 -------- 1574 Draw values from the distribution and plot the histogram 1575 1576 >>> rng = np.random.default_rng() 1577 >>> import matplotlib.pyplot as plt 1578 >>> values = plt.hist(rng.noncentral_chisquare(3, 20, 100000), 1579 ... bins=200, density=True) 1580 >>> plt.show() 1581 1582 Draw values from a noncentral chisquare with very small noncentrality, 1583 and compare to a chisquare. 1584 1585 >>> plt.figure() 1586 >>> values = plt.hist(rng.noncentral_chisquare(3, .0000001, 100000), 1587 ... bins=np.arange(0., 25, .1), density=True) 1588 >>> values2 = plt.hist(rng.chisquare(3, 100000), 1589 ... bins=np.arange(0., 25, .1), density=True) 1590 >>> plt.plot(values[1][0:-1], values[0]-values2[0], 'ob') 1591 >>> plt.show() 1592 1593 Demonstrate how large values of non-centrality lead to a more symmetric 1594 distribution. 1595 1596 >>> plt.figure() 1597 >>> values = plt.hist(rng.noncentral_chisquare(3, 20, 100000), 1598 ... bins=200, density=True) 1599 >>> plt.show() 1600 1601 """ 1602 return cont(&random_noncentral_chisquare, &self._bitgen, size, self.lock, 2, 1603 df, 'df', CONS_POSITIVE, 1604 nonc, 'nonc', CONS_NON_NEGATIVE, 1605 0.0, '', CONS_NONE, None) 1606 1607 def standard_cauchy(self, size=None): 1608 """ 1609 standard_cauchy(size=None) 1610 1611 Draw samples from a standard Cauchy distribution with mode = 0. 1612 1613 Also known as the Lorentz distribution. 1614 1615 Parameters 1616 ---------- 1617 size : int or tuple of ints, optional 1618 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1619 ``m * n * k`` samples are drawn. Default is None, in which case a 1620 single value is returned. 1621 1622 Returns 1623 ------- 1624 samples : ndarray or scalar 1625 The drawn samples. 1626 1627 Notes 1628 ----- 1629 The probability density function for the full Cauchy distribution is 1630 1631 .. math:: P(x; x_0, \\gamma) = \\frac{1}{\\pi \\gamma \\bigl[ 1+ 1632 (\\frac{x-x_0}{\\gamma})^2 \\bigr] } 1633 1634 and the Standard Cauchy distribution just sets :math:`x_0=0` and 1635 :math:`\\gamma=1` 1636 1637 The Cauchy distribution arises in the solution to the driven harmonic 1638 oscillator problem, and also describes spectral line broadening. It 1639 also describes the distribution of values at which a line tilted at 1640 a random angle will cut the x axis. 1641 1642 When studying hypothesis tests that assume normality, seeing how the 1643 tests perform on data from a Cauchy distribution is a good indicator of 1644 their sensitivity to a heavy-tailed distribution, since the Cauchy looks 1645 very much like a Gaussian distribution, but with heavier tails. 1646 1647 References 1648 ---------- 1649 .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "Cauchy 1650 Distribution", 1651 https://www.itl.nist.gov/div898/handbook/eda/section3/eda3663.htm 1652 .. [2] Weisstein, Eric W. "Cauchy Distribution." From MathWorld--A 1653 Wolfram Web Resource. 1654 http://mathworld.wolfram.com/CauchyDistribution.html 1655 .. [3] Wikipedia, "Cauchy distribution" 1656 https://en.wikipedia.org/wiki/Cauchy_distribution 1657 1658 Examples 1659 -------- 1660 Draw samples and plot the distribution: 1661 1662 >>> import matplotlib.pyplot as plt 1663 >>> s = np.random.default_rng().standard_cauchy(1000000) 1664 >>> s = s[(s>-25) & (s<25)] # truncate distribution so it plots well 1665 >>> plt.hist(s, bins=100) 1666 >>> plt.show() 1667 1668 """ 1669 return cont(&random_standard_cauchy, &self._bitgen, size, self.lock, 0, 1670 0.0, '', CONS_NONE, 0.0, '', CONS_NONE, 0.0, '', CONS_NONE, None) 1671 1672 def standard_t(self, df, size=None): 1673 """ 1674 standard_t(df, size=None) 1675 1676 Draw samples from a standard Student's t distribution with `df` degrees 1677 of freedom. 1678 1679 A special case of the hyperbolic distribution. As `df` gets 1680 large, the result resembles that of the standard normal 1681 distribution (`standard_normal`). 1682 1683 Parameters 1684 ---------- 1685 df : float or array_like of floats 1686 Degrees of freedom, must be > 0. 1687 size : int or tuple of ints, optional 1688 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1689 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1690 a single value is returned if ``df`` is a scalar. Otherwise, 1691 ``np.array(df).size`` samples are drawn. 1692 1693 Returns 1694 ------- 1695 out : ndarray or scalar 1696 Drawn samples from the parameterized standard Student's t distribution. 1697 1698 Notes 1699 ----- 1700 The probability density function for the t distribution is 1701 1702 .. math:: P(x, df) = \\frac{\\Gamma(\\frac{df+1}{2})}{\\sqrt{\\pi df} 1703 \\Gamma(\\frac{df}{2})}\\Bigl( 1+\\frac{x^2}{df} \\Bigr)^{-(df+1)/2} 1704 1705 The t test is based on an assumption that the data come from a 1706 Normal distribution. The t test provides a way to test whether 1707 the sample mean (that is the mean calculated from the data) is 1708 a good estimate of the true mean. 1709 1710 The derivation of the t-distribution was first published in 1711 1908 by William Gosset while working for the Guinness Brewery 1712 in Dublin. Due to proprietary issues, he had to publish under 1713 a pseudonym, and so he used the name Student. 1714 1715 References 1716 ---------- 1717 .. [1] Dalgaard, Peter, "Introductory Statistics With R", 1718 Springer, 2002. 1719 .. [2] Wikipedia, "Student's t-distribution" 1720 https://en.wikipedia.org/wiki/Student's_t-distribution 1721 1722 Examples 1723 -------- 1724 From Dalgaard page 83 [1]_, suppose the daily energy intake for 11 1725 women in kilojoules (kJ) is: 1726 1727 >>> intake = np.array([5260., 5470, 5640, 6180, 6390, 6515, 6805, 7515, \\ 1728 ... 7515, 8230, 8770]) 1729 1730 Does their energy intake deviate systematically from the recommended 1731 value of 7725 kJ? 1732 1733 We have 10 degrees of freedom, so is the sample mean within 95% of the 1734 recommended value? 1735 1736 >>> s = np.random.default_rng().standard_t(10, size=100000) 1737 >>> np.mean(intake) 1738 6753.636363636364 1739 >>> intake.std(ddof=1) 1740 1142.1232221373727 1741 1742 Calculate the t statistic, setting the ddof parameter to the unbiased 1743 value so the divisor in the standard deviation will be degrees of 1744 freedom, N-1. 1745 1746 >>> t = (np.mean(intake)-7725)/(intake.std(ddof=1)/np.sqrt(len(intake))) 1747 >>> import matplotlib.pyplot as plt 1748 >>> h = plt.hist(s, bins=100, density=True) 1749 1750 For a one-sided t-test, how far out in the distribution does the t 1751 statistic appear? 1752 1753 >>> np.sum(s<t) / float(len(s)) 1754 0.0090699999999999999 #random 1755 1756 So the p-value is about 0.009, which says the null hypothesis has a 1757 probability of about 99% of being true. 1758 1759 """ 1760 return cont(&random_standard_t, &self._bitgen, size, self.lock, 1, 1761 df, 'df', CONS_POSITIVE, 1762 0, '', CONS_NONE, 1763 0, '', CONS_NONE, 1764 None) 1765 1766 def vonmises(self, mu, kappa, size=None): 1767 """ 1768 vonmises(mu, kappa, size=None) 1769 1770 Draw samples from a von Mises distribution. 1771 1772 Samples are drawn from a von Mises distribution with specified mode 1773 (mu) and dispersion (kappa), on the interval [-pi, pi]. 1774 1775 The von Mises distribution (also known as the circular normal 1776 distribution) is a continuous probability distribution on the unit 1777 circle. It may be thought of as the circular analogue of the normal 1778 distribution. 1779 1780 Parameters 1781 ---------- 1782 mu : float or array_like of floats 1783 Mode ("center") of the distribution. 1784 kappa : float or array_like of floats 1785 Dispersion of the distribution, has to be >=0. 1786 size : int or tuple of ints, optional 1787 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1788 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1789 a single value is returned if ``mu`` and ``kappa`` are both scalars. 1790 Otherwise, ``np.broadcast(mu, kappa).size`` samples are drawn. 1791 1792 Returns 1793 ------- 1794 out : ndarray or scalar 1795 Drawn samples from the parameterized von Mises distribution. 1796 1797 See Also 1798 -------- 1799 scipy.stats.vonmises : probability density function, distribution, or 1800 cumulative density function, etc. 1801 1802 Notes 1803 ----- 1804 The probability density for the von Mises distribution is 1805 1806 .. math:: p(x) = \\frac{e^{\\kappa cos(x-\\mu)}}{2\\pi I_0(\\kappa)}, 1807 1808 where :math:`\\mu` is the mode and :math:`\\kappa` the dispersion, 1809 and :math:`I_0(\\kappa)` is the modified Bessel function of order 0. 1810 1811 The von Mises is named for Richard Edler von Mises, who was born in 1812 Austria-Hungary, in what is now the Ukraine. He fled to the United 1813 States in 1939 and became a professor at Harvard. He worked in 1814 probability theory, aerodynamics, fluid mechanics, and philosophy of 1815 science. 1816 1817 References 1818 ---------- 1819 .. [1] Abramowitz, M. and Stegun, I. A. (Eds.). "Handbook of 1820 Mathematical Functions with Formulas, Graphs, and Mathematical 1821 Tables, 9th printing," New York: Dover, 1972. 1822 .. [2] von Mises, R., "Mathematical Theory of Probability 1823 and Statistics", New York: Academic Press, 1964. 1824 1825 Examples 1826 -------- 1827 Draw samples from the distribution: 1828 1829 >>> mu, kappa = 0.0, 4.0 # mean and dispersion 1830 >>> s = np.random.default_rng().vonmises(mu, kappa, 1000) 1831 1832 Display the histogram of the samples, along with 1833 the probability density function: 1834 1835 >>> import matplotlib.pyplot as plt 1836 >>> from scipy.special import i0 # doctest: +SKIP 1837 >>> plt.hist(s, 50, density=True) 1838 >>> x = np.linspace(-np.pi, np.pi, num=51) 1839 >>> y = np.exp(kappa*np.cos(x-mu))/(2*np.pi*i0(kappa)) # doctest: +SKIP 1840 >>> plt.plot(x, y, linewidth=2, color='r') # doctest: +SKIP 1841 >>> plt.show() 1842 1843 """ 1844 return cont(&random_vonmises, &self._bitgen, size, self.lock, 2, 1845 mu, 'mu', CONS_NONE, 1846 kappa, 'kappa', CONS_NON_NEGATIVE, 1847 0.0, '', CONS_NONE, None) 1848 1849 def pareto(self, a, size=None): 1850 """ 1851 pareto(a, size=None) 1852 1853 Draw samples from a Pareto II or Lomax distribution with 1854 specified shape. 1855 1856 The Lomax or Pareto II distribution is a shifted Pareto 1857 distribution. The classical Pareto distribution can be 1858 obtained from the Lomax distribution by adding 1 and 1859 multiplying by the scale parameter ``m`` (see Notes). The 1860 smallest value of the Lomax distribution is zero while for the 1861 classical Pareto distribution it is ``mu``, where the standard 1862 Pareto distribution has location ``mu = 1``. Lomax can also 1863 be considered as a simplified version of the Generalized 1864 Pareto distribution (available in SciPy), with the scale set 1865 to one and the location set to zero. 1866 1867 The Pareto distribution must be greater than zero, and is 1868 unbounded above. It is also known as the "80-20 rule". In 1869 this distribution, 80 percent of the weights are in the lowest 1870 20 percent of the range, while the other 20 percent fill the 1871 remaining 80 percent of the range. 1872 1873 Parameters 1874 ---------- 1875 a : float or array_like of floats 1876 Shape of the distribution. Must be positive. 1877 size : int or tuple of ints, optional 1878 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1879 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1880 a single value is returned if ``a`` is a scalar. Otherwise, 1881 ``np.array(a).size`` samples are drawn. 1882 1883 Returns 1884 ------- 1885 out : ndarray or scalar 1886 Drawn samples from the parameterized Pareto distribution. 1887 1888 See Also 1889 -------- 1890 scipy.stats.lomax : probability density function, distribution or 1891 cumulative density function, etc. 1892 scipy.stats.genpareto : probability density function, distribution or 1893 cumulative density function, etc. 1894 1895 Notes 1896 ----- 1897 The probability density for the Pareto distribution is 1898 1899 .. math:: p(x) = \\frac{am^a}{x^{a+1}} 1900 1901 where :math:`a` is the shape and :math:`m` the scale. 1902 1903 The Pareto distribution, named after the Italian economist 1904 Vilfredo Pareto, is a power law probability distribution 1905 useful in many real world problems. Outside the field of 1906 economics it is generally referred to as the Bradford 1907 distribution. Pareto developed the distribution to describe 1908 the distribution of wealth in an economy. It has also found 1909 use in insurance, web page access statistics, oil field sizes, 1910 and many other problems, including the download frequency for 1911 projects in Sourceforge [1]_. It is one of the so-called 1912 "fat-tailed" distributions. 1913 1914 1915 References 1916 ---------- 1917 .. [1] Francis Hunt and Paul Johnson, On the Pareto Distribution of 1918 Sourceforge projects. 1919 .. [2] Pareto, V. (1896). Course of Political Economy. Lausanne. 1920 .. [3] Reiss, R.D., Thomas, M.(2001), Statistical Analysis of Extreme 1921 Values, Birkhauser Verlag, Basel, pp 23-30. 1922 .. [4] Wikipedia, "Pareto distribution", 1923 https://en.wikipedia.org/wiki/Pareto_distribution 1924 1925 Examples 1926 -------- 1927 Draw samples from the distribution: 1928 1929 >>> a, m = 3., 2. # shape and mode 1930 >>> s = (np.random.default_rng().pareto(a, 1000) + 1) * m 1931 1932 Display the histogram of the samples, along with the probability 1933 density function: 1934 1935 >>> import matplotlib.pyplot as plt 1936 >>> count, bins, _ = plt.hist(s, 100, density=True) 1937 >>> fit = a*m**a / bins**(a+1) 1938 >>> plt.plot(bins, max(count)*fit/max(fit), linewidth=2, color='r') 1939 >>> plt.show() 1940 1941 """ 1942 return cont(&random_pareto, &self._bitgen, size, self.lock, 1, 1943 a, 'a', CONS_POSITIVE, 1944 0.0, '', CONS_NONE, 1945 0.0, '', CONS_NONE, None) 1946 1947 def weibull(self, a, size=None): 1948 """ 1949 weibull(a, size=None) 1950 1951 Draw samples from a Weibull distribution. 1952 1953 Draw samples from a 1-parameter Weibull distribution with the given 1954 shape parameter `a`. 1955 1956 .. math:: X = (-ln(U))^{1/a} 1957 1958 Here, U is drawn from the uniform distribution over (0,1]. 1959 1960 The more common 2-parameter Weibull, including a scale parameter 1961 :math:`\\lambda` is just :math:`X = \\lambda(-ln(U))^{1/a}`. 1962 1963 Parameters 1964 ---------- 1965 a : float or array_like of floats 1966 Shape parameter of the distribution. Must be nonnegative. 1967 size : int or tuple of ints, optional 1968 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 1969 ``m * n * k`` samples are drawn. If size is ``None`` (default), 1970 a single value is returned if ``a`` is a scalar. Otherwise, 1971 ``np.array(a).size`` samples are drawn. 1972 1973 Returns 1974 ------- 1975 out : ndarray or scalar 1976 Drawn samples from the parameterized Weibull distribution. 1977 1978 See Also 1979 -------- 1980 scipy.stats.weibull_max 1981 scipy.stats.weibull_min 1982 scipy.stats.genextreme 1983 gumbel 1984 1985 Notes 1986 ----- 1987 The Weibull (or Type III asymptotic extreme value distribution 1988 for smallest values, SEV Type III, or Rosin-Rammler 1989 distribution) is one of a class of Generalized Extreme Value 1990 (GEV) distributions used in modeling extreme value problems. 1991 This class includes the Gumbel and Frechet distributions. 1992 1993 The probability density for the Weibull distribution is 1994 1995 .. math:: p(x) = \\frac{a} 1996 {\\lambda}(\\frac{x}{\\lambda})^{a-1}e^{-(x/\\lambda)^a}, 1997 1998 where :math:`a` is the shape and :math:`\\lambda` the scale. 1999 2000 The function has its peak (the mode) at 2001 :math:`\\lambda(\\frac{a-1}{a})^{1/a}`. 2002 2003 When ``a = 1``, the Weibull distribution reduces to the exponential 2004 distribution. 2005 2006 References 2007 ---------- 2008 .. [1] Waloddi Weibull, Royal Technical University, Stockholm, 2009 1939 "A Statistical Theory Of The Strength Of Materials", 2010 Ingeniorsvetenskapsakademiens Handlingar Nr 151, 1939, 2011 Generalstabens Litografiska Anstalts Forlag, Stockholm. 2012 .. [2] Waloddi Weibull, "A Statistical Distribution Function of 2013 Wide Applicability", Journal Of Applied Mechanics ASME Paper 2014 1951. 2015 .. [3] Wikipedia, "Weibull distribution", 2016 https://en.wikipedia.org/wiki/Weibull_distribution 2017 2018 Examples 2019 -------- 2020 Draw samples from the distribution: 2021 2022 >>> rng = np.random.default_rng() 2023 >>> a = 5. # shape 2024 >>> s = rng.weibull(a, 1000) 2025 2026 Display the histogram of the samples, along with 2027 the probability density function: 2028 2029 >>> import matplotlib.pyplot as plt 2030 >>> x = np.arange(1,100.)/50. 2031 >>> def weib(x,n,a): 2032 ... return (a / n) * (x / n)**(a - 1) * np.exp(-(x / n)**a) 2033 2034 >>> count, bins, ignored = plt.hist(rng.weibull(5.,1000)) 2035 >>> x = np.arange(1,100.)/50. 2036 >>> scale = count.max()/weib(x, 1., 5.).max() 2037 >>> plt.plot(x, weib(x, 1., 5.)*scale) 2038 >>> plt.show() 2039 2040 """ 2041 return cont(&random_weibull, &self._bitgen, size, self.lock, 1, 2042 a, 'a', CONS_NON_NEGATIVE, 2043 0.0, '', CONS_NONE, 2044 0.0, '', CONS_NONE, None) 2045 2046 def power(self, a, size=None): 2047 """ 2048 power(a, size=None) 2049 2050 Draws samples in [0, 1] from a power distribution with positive 2051 exponent a - 1. 2052 2053 Also known as the power function distribution. 2054 2055 Parameters 2056 ---------- 2057 a : float or array_like of floats 2058 Parameter of the distribution. Must be non-negative. 2059 size : int or tuple of ints, optional 2060 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2061 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2062 a single value is returned if ``a`` is a scalar. Otherwise, 2063 ``np.array(a).size`` samples are drawn. 2064 2065 Returns 2066 ------- 2067 out : ndarray or scalar 2068 Drawn samples from the parameterized power distribution. 2069 2070 Raises 2071 ------ 2072 ValueError 2073 If a < 1. 2074 2075 Notes 2076 ----- 2077 The probability density function is 2078 2079 .. math:: P(x; a) = ax^{a-1}, 0 \\le x \\le 1, a>0. 2080 2081 The power function distribution is just the inverse of the Pareto 2082 distribution. It may also be seen as a special case of the Beta 2083 distribution. 2084 2085 It is used, for example, in modeling the over-reporting of insurance 2086 claims. 2087 2088 References 2089 ---------- 2090 .. [1] Christian Kleiber, Samuel Kotz, "Statistical size distributions 2091 in economics and actuarial sciences", Wiley, 2003. 2092 .. [2] Heckert, N. A. and Filliben, James J. "NIST Handbook 148: 2093 Dataplot Reference Manual, Volume 2: Let Subcommands and Library 2094 Functions", National Institute of Standards and Technology 2095 Handbook Series, June 2003. 2096 https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/powpdf.pdf 2097 2098 Examples 2099 -------- 2100 Draw samples from the distribution: 2101 2102 >>> rng = np.random.default_rng() 2103 >>> a = 5. # shape 2104 >>> samples = 1000 2105 >>> s = rng.power(a, samples) 2106 2107 Display the histogram of the samples, along with 2108 the probability density function: 2109 2110 >>> import matplotlib.pyplot as plt 2111 >>> count, bins, ignored = plt.hist(s, bins=30) 2112 >>> x = np.linspace(0, 1, 100) 2113 >>> y = a*x**(a-1.) 2114 >>> normed_y = samples*np.diff(bins)[0]*y 2115 >>> plt.plot(x, normed_y) 2116 >>> plt.show() 2117 2118 Compare the power function distribution to the inverse of the Pareto. 2119 2120 >>> from scipy import stats # doctest: +SKIP 2121 >>> rvs = rng.power(5, 1000000) 2122 >>> rvsp = rng.pareto(5, 1000000) 2123 >>> xx = np.linspace(0,1,100) 2124 >>> powpdf = stats.powerlaw.pdf(xx,5) # doctest: +SKIP 2125 2126 >>> plt.figure() 2127 >>> plt.hist(rvs, bins=50, density=True) 2128 >>> plt.plot(xx,powpdf,'r-') # doctest: +SKIP 2129 >>> plt.title('power(5)') 2130 2131 >>> plt.figure() 2132 >>> plt.hist(1./(1.+rvsp), bins=50, density=True) 2133 >>> plt.plot(xx,powpdf,'r-') # doctest: +SKIP 2134 >>> plt.title('inverse of 1 + Generator.pareto(5)') 2135 2136 >>> plt.figure() 2137 >>> plt.hist(1./(1.+rvsp), bins=50, density=True) 2138 >>> plt.plot(xx,powpdf,'r-') # doctest: +SKIP 2139 >>> plt.title('inverse of stats.pareto(5)') 2140 2141 """ 2142 return cont(&random_power, &self._bitgen, size, self.lock, 1, 2143 a, 'a', CONS_POSITIVE, 2144 0.0, '', CONS_NONE, 2145 0.0, '', CONS_NONE, None) 2146 2147 def laplace(self, loc=0.0, scale=1.0, size=None): 2148 """ 2149 laplace(loc=0.0, scale=1.0, size=None) 2150 2151 Draw samples from the Laplace or double exponential distribution with 2152 specified location (or mean) and scale (decay). 2153 2154 The Laplace distribution is similar to the Gaussian/normal distribution, 2155 but is sharper at the peak and has fatter tails. It represents the 2156 difference between two independent, identically distributed exponential 2157 random variables. 2158 2159 Parameters 2160 ---------- 2161 loc : float or array_like of floats, optional 2162 The position, :math:`\\mu`, of the distribution peak. Default is 0. 2163 scale : float or array_like of floats, optional 2164 :math:`\\lambda`, the exponential decay. Default is 1. Must be non- 2165 negative. 2166 size : int or tuple of ints, optional 2167 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2168 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2169 a single value is returned if ``loc`` and ``scale`` are both scalars. 2170 Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn. 2171 2172 Returns 2173 ------- 2174 out : ndarray or scalar 2175 Drawn samples from the parameterized Laplace distribution. 2176 2177 Notes 2178 ----- 2179 It has the probability density function 2180 2181 .. math:: f(x; \\mu, \\lambda) = \\frac{1}{2\\lambda} 2182 \\exp\\left(-\\frac{|x - \\mu|}{\\lambda}\\right). 2183 2184 The first law of Laplace, from 1774, states that the frequency 2185 of an error can be expressed as an exponential function of the 2186 absolute magnitude of the error, which leads to the Laplace 2187 distribution. For many problems in economics and health 2188 sciences, this distribution seems to model the data better 2189 than the standard Gaussian distribution. 2190 2191 References 2192 ---------- 2193 .. [1] Abramowitz, M. and Stegun, I. A. (Eds.). "Handbook of 2194 Mathematical Functions with Formulas, Graphs, and Mathematical 2195 Tables, 9th printing," New York: Dover, 1972. 2196 .. [2] Kotz, Samuel, et. al. "The Laplace Distribution and 2197 Generalizations, " Birkhauser, 2001. 2198 .. [3] Weisstein, Eric W. "Laplace Distribution." 2199 From MathWorld--A Wolfram Web Resource. 2200 http://mathworld.wolfram.com/LaplaceDistribution.html 2201 .. [4] Wikipedia, "Laplace distribution", 2202 https://en.wikipedia.org/wiki/Laplace_distribution 2203 2204 Examples 2205 -------- 2206 Draw samples from the distribution 2207 2208 >>> loc, scale = 0., 1. 2209 >>> s = np.random.default_rng().laplace(loc, scale, 1000) 2210 2211 Display the histogram of the samples, along with 2212 the probability density function: 2213 2214 >>> import matplotlib.pyplot as plt 2215 >>> count, bins, ignored = plt.hist(s, 30, density=True) 2216 >>> x = np.arange(-8., 8., .01) 2217 >>> pdf = np.exp(-abs(x-loc)/scale)/(2.*scale) 2218 >>> plt.plot(x, pdf) 2219 2220 Plot Gaussian for comparison: 2221 2222 >>> g = (1/(scale * np.sqrt(2 * np.pi)) * 2223 ... np.exp(-(x - loc)**2 / (2 * scale**2))) 2224 >>> plt.plot(x,g) 2225 2226 """ 2227 return cont(&random_laplace, &self._bitgen, size, self.lock, 2, 2228 loc, 'loc', CONS_NONE, 2229 scale, 'scale', CONS_NON_NEGATIVE, 2230 0.0, '', CONS_NONE, None) 2231 2232 def gumbel(self, loc=0.0, scale=1.0, size=None): 2233 """ 2234 gumbel(loc=0.0, scale=1.0, size=None) 2235 2236 Draw samples from a Gumbel distribution. 2237 2238 Draw samples from a Gumbel distribution with specified location and 2239 scale. For more information on the Gumbel distribution, see 2240 Notes and References below. 2241 2242 Parameters 2243 ---------- 2244 loc : float or array_like of floats, optional 2245 The location of the mode of the distribution. Default is 0. 2246 scale : float or array_like of floats, optional 2247 The scale parameter of the distribution. Default is 1. Must be non- 2248 negative. 2249 size : int or tuple of ints, optional 2250 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2251 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2252 a single value is returned if ``loc`` and ``scale`` are both scalars. 2253 Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn. 2254 2255 Returns 2256 ------- 2257 out : ndarray or scalar 2258 Drawn samples from the parameterized Gumbel distribution. 2259 2260 See Also 2261 -------- 2262 scipy.stats.gumbel_l 2263 scipy.stats.gumbel_r 2264 scipy.stats.genextreme 2265 weibull 2266 2267 Notes 2268 ----- 2269 The Gumbel (or Smallest Extreme Value (SEV) or the Smallest Extreme 2270 Value Type I) distribution is one of a class of Generalized Extreme 2271 Value (GEV) distributions used in modeling extreme value problems. 2272 The Gumbel is a special case of the Extreme Value Type I distribution 2273 for maximums from distributions with "exponential-like" tails. 2274 2275 The probability density for the Gumbel distribution is 2276 2277 .. math:: p(x) = \\frac{e^{-(x - \\mu)/ \\beta}}{\\beta} e^{ -e^{-(x - \\mu)/ 2278 \\beta}}, 2279 2280 where :math:`\\mu` is the mode, a location parameter, and 2281 :math:`\\beta` is the scale parameter. 2282 2283 The Gumbel (named for German mathematician Emil Julius Gumbel) was used 2284 very early in the hydrology literature, for modeling the occurrence of 2285 flood events. It is also used for modeling maximum wind speed and 2286 rainfall rates. It is a "fat-tailed" distribution - the probability of 2287 an event in the tail of the distribution is larger than if one used a 2288 Gaussian, hence the surprisingly frequent occurrence of 100-year 2289 floods. Floods were initially modeled as a Gaussian process, which 2290 underestimated the frequency of extreme events. 2291 2292 It is one of a class of extreme value distributions, the Generalized 2293 Extreme Value (GEV) distributions, which also includes the Weibull and 2294 Frechet. 2295 2296 The function has a mean of :math:`\\mu + 0.57721\\beta` and a variance 2297 of :math:`\\frac{\\pi^2}{6}\\beta^2`. 2298 2299 References 2300 ---------- 2301 .. [1] Gumbel, E. J., "Statistics of Extremes," 2302 New York: Columbia University Press, 1958. 2303 .. [2] Reiss, R.-D. and Thomas, M., "Statistical Analysis of Extreme 2304 Values from Insurance, Finance, Hydrology and Other Fields," 2305 Basel: Birkhauser Verlag, 2001. 2306 2307 Examples 2308 -------- 2309 Draw samples from the distribution: 2310 2311 >>> rng = np.random.default_rng() 2312 >>> mu, beta = 0, 0.1 # location and scale 2313 >>> s = rng.gumbel(mu, beta, 1000) 2314 2315 Display the histogram of the samples, along with 2316 the probability density function: 2317 2318 >>> import matplotlib.pyplot as plt 2319 >>> count, bins, ignored = plt.hist(s, 30, density=True) 2320 >>> plt.plot(bins, (1/beta)*np.exp(-(bins - mu)/beta) 2321 ... * np.exp( -np.exp( -(bins - mu) /beta) ), 2322 ... linewidth=2, color='r') 2323 >>> plt.show() 2324 2325 Show how an extreme value distribution can arise from a Gaussian process 2326 and compare to a Gaussian: 2327 2328 >>> means = [] 2329 >>> maxima = [] 2330 >>> for i in range(0,1000) : 2331 ... a = rng.normal(mu, beta, 1000) 2332 ... means.append(a.mean()) 2333 ... maxima.append(a.max()) 2334 >>> count, bins, ignored = plt.hist(maxima, 30, density=True) 2335 >>> beta = np.std(maxima) * np.sqrt(6) / np.pi 2336 >>> mu = np.mean(maxima) - 0.57721*beta 2337 >>> plt.plot(bins, (1/beta)*np.exp(-(bins - mu)/beta) 2338 ... * np.exp(-np.exp(-(bins - mu)/beta)), 2339 ... linewidth=2, color='r') 2340 >>> plt.plot(bins, 1/(beta * np.sqrt(2 * np.pi)) 2341 ... * np.exp(-(bins - mu)**2 / (2 * beta**2)), 2342 ... linewidth=2, color='g') 2343 >>> plt.show() 2344 2345 """ 2346 return cont(&random_gumbel, &self._bitgen, size, self.lock, 2, 2347 loc, 'loc', CONS_NONE, 2348 scale, 'scale', CONS_NON_NEGATIVE, 2349 0.0, '', CONS_NONE, None) 2350 2351 def logistic(self, loc=0.0, scale=1.0, size=None): 2352 """ 2353 logistic(loc=0.0, scale=1.0, size=None) 2354 2355 Draw samples from a logistic distribution. 2356 2357 Samples are drawn from a logistic distribution with specified 2358 parameters, loc (location or mean, also median), and scale (>0). 2359 2360 Parameters 2361 ---------- 2362 loc : float or array_like of floats, optional 2363 Parameter of the distribution. Default is 0. 2364 scale : float or array_like of floats, optional 2365 Parameter of the distribution. Must be non-negative. 2366 Default is 1. 2367 size : int or tuple of ints, optional 2368 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2369 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2370 a single value is returned if ``loc`` and ``scale`` are both scalars. 2371 Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn. 2372 2373 Returns 2374 ------- 2375 out : ndarray or scalar 2376 Drawn samples from the parameterized logistic distribution. 2377 2378 See Also 2379 -------- 2380 scipy.stats.logistic : probability density function, distribution or 2381 cumulative density function, etc. 2382 2383 Notes 2384 ----- 2385 The probability density for the Logistic distribution is 2386 2387 .. math:: P(x) = P(x) = \\frac{e^{-(x-\\mu)/s}}{s(1+e^{-(x-\\mu)/s})^2}, 2388 2389 where :math:`\\mu` = location and :math:`s` = scale. 2390 2391 The Logistic distribution is used in Extreme Value problems where it 2392 can act as a mixture of Gumbel distributions, in Epidemiology, and by 2393 the World Chess Federation (FIDE) where it is used in the Elo ranking 2394 system, assuming the performance of each player is a logistically 2395 distributed random variable. 2396 2397 References 2398 ---------- 2399 .. [1] Reiss, R.-D. and Thomas M. (2001), "Statistical Analysis of 2400 Extreme Values, from Insurance, Finance, Hydrology and Other 2401 Fields," Birkhauser Verlag, Basel, pp 132-133. 2402 .. [2] Weisstein, Eric W. "Logistic Distribution." From 2403 MathWorld--A Wolfram Web Resource. 2404 http://mathworld.wolfram.com/LogisticDistribution.html 2405 .. [3] Wikipedia, "Logistic-distribution", 2406 https://en.wikipedia.org/wiki/Logistic_distribution 2407 2408 Examples 2409 -------- 2410 Draw samples from the distribution: 2411 2412 >>> loc, scale = 10, 1 2413 >>> s = np.random.default_rng().logistic(loc, scale, 10000) 2414 >>> import matplotlib.pyplot as plt 2415 >>> count, bins, ignored = plt.hist(s, bins=50) 2416 2417 # plot against distribution 2418 2419 >>> def logist(x, loc, scale): 2420 ... return np.exp((loc-x)/scale)/(scale*(1+np.exp((loc-x)/scale))**2) 2421 >>> lgst_val = logist(bins, loc, scale) 2422 >>> plt.plot(bins, lgst_val * count.max() / lgst_val.max()) 2423 >>> plt.show() 2424 2425 """ 2426 return cont(&random_logistic, &self._bitgen, size, self.lock, 2, 2427 loc, 'loc', CONS_NONE, 2428 scale, 'scale', CONS_NON_NEGATIVE, 2429 0.0, '', CONS_NONE, None) 2430 2431 def lognormal(self, mean=0.0, sigma=1.0, size=None): 2432 """ 2433 lognormal(mean=0.0, sigma=1.0, size=None) 2434 2435 Draw samples from a log-normal distribution. 2436 2437 Draw samples from a log-normal distribution with specified mean, 2438 standard deviation, and array shape. Note that the mean and standard 2439 deviation are not the values for the distribution itself, but of the 2440 underlying normal distribution it is derived from. 2441 2442 Parameters 2443 ---------- 2444 mean : float or array_like of floats, optional 2445 Mean value of the underlying normal distribution. Default is 0. 2446 sigma : float or array_like of floats, optional 2447 Standard deviation of the underlying normal distribution. Must be 2448 non-negative. Default is 1. 2449 size : int or tuple of ints, optional 2450 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2451 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2452 a single value is returned if ``mean`` and ``sigma`` are both scalars. 2453 Otherwise, ``np.broadcast(mean, sigma).size`` samples are drawn. 2454 2455 Returns 2456 ------- 2457 out : ndarray or scalar 2458 Drawn samples from the parameterized log-normal distribution. 2459 2460 See Also 2461 -------- 2462 scipy.stats.lognorm : probability density function, distribution, 2463 cumulative density function, etc. 2464 2465 Notes 2466 ----- 2467 A variable `x` has a log-normal distribution if `log(x)` is normally 2468 distributed. The probability density function for the log-normal 2469 distribution is: 2470 2471 .. math:: p(x) = \\frac{1}{\\sigma x \\sqrt{2\\pi}} 2472 e^{(-\\frac{(ln(x)-\\mu)^2}{2\\sigma^2})} 2473 2474 where :math:`\\mu` is the mean and :math:`\\sigma` is the standard 2475 deviation of the normally distributed logarithm of the variable. 2476 A log-normal distribution results if a random variable is the *product* 2477 of a large number of independent, identically-distributed variables in 2478 the same way that a normal distribution results if the variable is the 2479 *sum* of a large number of independent, identically-distributed 2480 variables. 2481 2482 References 2483 ---------- 2484 .. [1] Limpert, E., Stahel, W. A., and Abbt, M., "Log-normal 2485 Distributions across the Sciences: Keys and Clues," 2486 BioScience, Vol. 51, No. 5, May, 2001. 2487 https://stat.ethz.ch/~stahel/lognormal/bioscience.pdf 2488 .. [2] Reiss, R.D. and Thomas, M., "Statistical Analysis of Extreme 2489 Values," Basel: Birkhauser Verlag, 2001, pp. 31-32. 2490 2491 Examples 2492 -------- 2493 Draw samples from the distribution: 2494 2495 >>> rng = np.random.default_rng() 2496 >>> mu, sigma = 3., 1. # mean and standard deviation 2497 >>> s = rng.lognormal(mu, sigma, 1000) 2498 2499 Display the histogram of the samples, along with 2500 the probability density function: 2501 2502 >>> import matplotlib.pyplot as plt 2503 >>> count, bins, ignored = plt.hist(s, 100, density=True, align='mid') 2504 2505 >>> x = np.linspace(min(bins), max(bins), 10000) 2506 >>> pdf = (np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) 2507 ... / (x * sigma * np.sqrt(2 * np.pi))) 2508 2509 >>> plt.plot(x, pdf, linewidth=2, color='r') 2510 >>> plt.axis('tight') 2511 >>> plt.show() 2512 2513 Demonstrate that taking the products of random samples from a uniform 2514 distribution can be fit well by a log-normal probability density 2515 function. 2516 2517 >>> # Generate a thousand samples: each is the product of 100 random 2518 >>> # values, drawn from a normal distribution. 2519 >>> rng = rng 2520 >>> b = [] 2521 >>> for i in range(1000): 2522 ... a = 10. + rng.standard_normal(100) 2523 ... b.append(np.product(a)) 2524 2525 >>> b = np.array(b) / np.min(b) # scale values to be positive 2526 >>> count, bins, ignored = plt.hist(b, 100, density=True, align='mid') 2527 >>> sigma = np.std(np.log(b)) 2528 >>> mu = np.mean(np.log(b)) 2529 2530 >>> x = np.linspace(min(bins), max(bins), 10000) 2531 >>> pdf = (np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) 2532 ... / (x * sigma * np.sqrt(2 * np.pi))) 2533 2534 >>> plt.plot(x, pdf, color='r', linewidth=2) 2535 >>> plt.show() 2536 2537 """ 2538 return cont(&random_lognormal, &self._bitgen, size, self.lock, 2, 2539 mean, 'mean', CONS_NONE, 2540 sigma, 'sigma', CONS_NON_NEGATIVE, 2541 0.0, '', CONS_NONE, None) 2542 2543 def rayleigh(self, scale=1.0, size=None): 2544 """ 2545 rayleigh(scale=1.0, size=None) 2546 2547 Draw samples from a Rayleigh distribution. 2548 2549 The :math:`\\chi` and Weibull distributions are generalizations of the 2550 Rayleigh. 2551 2552 Parameters 2553 ---------- 2554 scale : float or array_like of floats, optional 2555 Scale, also equals the mode. Must be non-negative. Default is 1. 2556 size : int or tuple of ints, optional 2557 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2558 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2559 a single value is returned if ``scale`` is a scalar. Otherwise, 2560 ``np.array(scale).size`` samples are drawn. 2561 2562 Returns 2563 ------- 2564 out : ndarray or scalar 2565 Drawn samples from the parameterized Rayleigh distribution. 2566 2567 Notes 2568 ----- 2569 The probability density function for the Rayleigh distribution is 2570 2571 .. math:: P(x;scale) = \\frac{x}{scale^2}e^{\\frac{-x^2}{2 \\cdotp scale^2}} 2572 2573 The Rayleigh distribution would arise, for example, if the East 2574 and North components of the wind velocity had identical zero-mean 2575 Gaussian distributions. Then the wind speed would have a Rayleigh 2576 distribution. 2577 2578 References 2579 ---------- 2580 .. [1] Brighton Webs Ltd., "Rayleigh Distribution," 2581 https://web.archive.org/web/20090514091424/http://brighton-webs.co.uk:80/distributions/rayleigh.asp 2582 .. [2] Wikipedia, "Rayleigh distribution" 2583 https://en.wikipedia.org/wiki/Rayleigh_distribution 2584 2585 Examples 2586 -------- 2587 Draw values from the distribution and plot the histogram 2588 2589 >>> from matplotlib.pyplot import hist 2590 >>> rng = np.random.default_rng() 2591 >>> values = hist(rng.rayleigh(3, 100000), bins=200, density=True) 2592 2593 Wave heights tend to follow a Rayleigh distribution. If the mean wave 2594 height is 1 meter, what fraction of waves are likely to be larger than 3 2595 meters? 2596 2597 >>> meanvalue = 1 2598 >>> modevalue = np.sqrt(2 / np.pi) * meanvalue 2599 >>> s = rng.rayleigh(modevalue, 1000000) 2600 2601 The percentage of waves larger than 3 meters is: 2602 2603 >>> 100.*sum(s>3)/1000000. 2604 0.087300000000000003 # random 2605 2606 """ 2607 return cont(&random_rayleigh, &self._bitgen, size, self.lock, 1, 2608 scale, 'scale', CONS_NON_NEGATIVE, 2609 0.0, '', CONS_NONE, 2610 0.0, '', CONS_NONE, None) 2611 2612 def wald(self, mean, scale, size=None): 2613 """ 2614 wald(mean, scale, size=None) 2615 2616 Draw samples from a Wald, or inverse Gaussian, distribution. 2617 2618 As the scale approaches infinity, the distribution becomes more like a 2619 Gaussian. Some references claim that the Wald is an inverse Gaussian 2620 with mean equal to 1, but this is by no means universal. 2621 2622 The inverse Gaussian distribution was first studied in relationship to 2623 Brownian motion. In 1956 M.C.K. Tweedie used the name inverse Gaussian 2624 because there is an inverse relationship between the time to cover a 2625 unit distance and distance covered in unit time. 2626 2627 Parameters 2628 ---------- 2629 mean : float or array_like of floats 2630 Distribution mean, must be > 0. 2631 scale : float or array_like of floats 2632 Scale parameter, must be > 0. 2633 size : int or tuple of ints, optional 2634 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2635 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2636 a single value is returned if ``mean`` and ``scale`` are both scalars. 2637 Otherwise, ``np.broadcast(mean, scale).size`` samples are drawn. 2638 2639 Returns 2640 ------- 2641 out : ndarray or scalar 2642 Drawn samples from the parameterized Wald distribution. 2643 2644 Notes 2645 ----- 2646 The probability density function for the Wald distribution is 2647 2648 .. math:: P(x;mean,scale) = \\sqrt{\\frac{scale}{2\\pi x^3}}e^ 2649 \\frac{-scale(x-mean)^2}{2\\cdotp mean^2x} 2650 2651 As noted above the inverse Gaussian distribution first arise 2652 from attempts to model Brownian motion. It is also a 2653 competitor to the Weibull for use in reliability modeling and 2654 modeling stock returns and interest rate processes. 2655 2656 References 2657 ---------- 2658 .. [1] Brighton Webs Ltd., Wald Distribution, 2659 https://web.archive.org/web/20090423014010/http://www.brighton-webs.co.uk:80/distributions/wald.asp 2660 .. [2] Chhikara, Raj S., and Folks, J. Leroy, "The Inverse Gaussian 2661 Distribution: Theory : Methodology, and Applications", CRC Press, 2662 1988. 2663 .. [3] Wikipedia, "Inverse Gaussian distribution" 2664 https://en.wikipedia.org/wiki/Inverse_Gaussian_distribution 2665 2666 Examples 2667 -------- 2668 Draw values from the distribution and plot the histogram: 2669 2670 >>> import matplotlib.pyplot as plt 2671 >>> h = plt.hist(np.random.default_rng().wald(3, 2, 100000), bins=200, density=True) 2672 >>> plt.show() 2673 2674 """ 2675 return cont(&random_wald, &self._bitgen, size, self.lock, 2, 2676 mean, 'mean', CONS_POSITIVE, 2677 scale, 'scale', CONS_POSITIVE, 2678 0.0, '', CONS_NONE, None) 2679 2680 def triangular(self, left, mode, right, size=None): 2681 """ 2682 triangular(left, mode, right, size=None) 2683 2684 Draw samples from the triangular distribution over the 2685 interval ``[left, right]``. 2686 2687 The triangular distribution is a continuous probability 2688 distribution with lower limit left, peak at mode, and upper 2689 limit right. Unlike the other distributions, these parameters 2690 directly define the shape of the pdf. 2691 2692 Parameters 2693 ---------- 2694 left : float or array_like of floats 2695 Lower limit. 2696 mode : float or array_like of floats 2697 The value where the peak of the distribution occurs. 2698 The value must fulfill the condition ``left <= mode <= right``. 2699 right : float or array_like of floats 2700 Upper limit, must be larger than `left`. 2701 size : int or tuple of ints, optional 2702 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2703 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2704 a single value is returned if ``left``, ``mode``, and ``right`` 2705 are all scalars. Otherwise, ``np.broadcast(left, mode, right).size`` 2706 samples are drawn. 2707 2708 Returns 2709 ------- 2710 out : ndarray or scalar 2711 Drawn samples from the parameterized triangular distribution. 2712 2713 Notes 2714 ----- 2715 The probability density function for the triangular distribution is 2716 2717 .. math:: P(x;l, m, r) = \\begin{cases} 2718 \\frac{2(x-l)}{(r-l)(m-l)}& \\text{for $l \\leq x \\leq m$},\\\\ 2719 \\frac{2(r-x)}{(r-l)(r-m)}& \\text{for $m \\leq x \\leq r$},\\\\ 2720 0& \\text{otherwise}. 2721 \\end{cases} 2722 2723 The triangular distribution is often used in ill-defined 2724 problems where the underlying distribution is not known, but 2725 some knowledge of the limits and mode exists. Often it is used 2726 in simulations. 2727 2728 References 2729 ---------- 2730 .. [1] Wikipedia, "Triangular distribution" 2731 https://en.wikipedia.org/wiki/Triangular_distribution 2732 2733 Examples 2734 -------- 2735 Draw values from the distribution and plot the histogram: 2736 2737 >>> import matplotlib.pyplot as plt 2738 >>> h = plt.hist(np.random.default_rng().triangular(-3, 0, 8, 100000), bins=200, 2739 ... density=True) 2740 >>> plt.show() 2741 2742 """ 2743 cdef bint is_scalar = True 2744 cdef double fleft, fmode, fright 2745 cdef np.ndarray oleft, omode, oright 2746 2747 oleft = <np.ndarray>np.PyArray_FROM_OTF(left, np.NPY_DOUBLE, np.NPY_ALIGNED) 2748 omode = <np.ndarray>np.PyArray_FROM_OTF(mode, np.NPY_DOUBLE, np.NPY_ALIGNED) 2749 oright = <np.ndarray>np.PyArray_FROM_OTF(right, np.NPY_DOUBLE, np.NPY_ALIGNED) 2750 2751 if np.PyArray_NDIM(oleft) == np.PyArray_NDIM(omode) == np.PyArray_NDIM(oright) == 0: 2752 fleft = PyFloat_AsDouble(left) 2753 fright = PyFloat_AsDouble(right) 2754 fmode = PyFloat_AsDouble(mode) 2755 2756 if fleft > fmode: 2757 raise ValueError("left > mode") 2758 if fmode > fright: 2759 raise ValueError("mode > right") 2760 if fleft == fright: 2761 raise ValueError("left == right") 2762 return cont(&random_triangular, &self._bitgen, size, self.lock, 3, 2763 fleft, '', CONS_NONE, 2764 fmode, '', CONS_NONE, 2765 fright, '', CONS_NONE, None) 2766 2767 if np.any(np.greater(oleft, omode)): 2768 raise ValueError("left > mode") 2769 if np.any(np.greater(omode, oright)): 2770 raise ValueError("mode > right") 2771 if np.any(np.equal(oleft, oright)): 2772 raise ValueError("left == right") 2773 2774 return cont_broadcast_3(&random_triangular, &self._bitgen, size, self.lock, 2775 oleft, '', CONS_NONE, 2776 omode, '', CONS_NONE, 2777 oright, '', CONS_NONE) 2778 2779 # Complicated, discrete distributions: 2780 def binomial(self, n, p, size=None): 2781 """ 2782 binomial(n, p, size=None) 2783 2784 Draw samples from a binomial distribution. 2785 2786 Samples are drawn from a binomial distribution with specified 2787 parameters, n trials and p probability of success where 2788 n an integer >= 0 and p is in the interval [0,1]. (n may be 2789 input as a float, but it is truncated to an integer in use) 2790 2791 Parameters 2792 ---------- 2793 n : int or array_like of ints 2794 Parameter of the distribution, >= 0. Floats are also accepted, 2795 but they will be truncated to integers. 2796 p : float or array_like of floats 2797 Parameter of the distribution, >= 0 and <=1. 2798 size : int or tuple of ints, optional 2799 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2800 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2801 a single value is returned if ``n`` and ``p`` are both scalars. 2802 Otherwise, ``np.broadcast(n, p).size`` samples are drawn. 2803 2804 Returns 2805 ------- 2806 out : ndarray or scalar 2807 Drawn samples from the parameterized binomial distribution, where 2808 each sample is equal to the number of successes over the n trials. 2809 2810 See Also 2811 -------- 2812 scipy.stats.binom : probability density function, distribution or 2813 cumulative density function, etc. 2814 2815 Notes 2816 ----- 2817 The probability density for the binomial distribution is 2818 2819 .. math:: P(N) = \\binom{n}{N}p^N(1-p)^{n-N}, 2820 2821 where :math:`n` is the number of trials, :math:`p` is the probability 2822 of success, and :math:`N` is the number of successes. 2823 2824 When estimating the standard error of a proportion in a population by 2825 using a random sample, the normal distribution works well unless the 2826 product p*n <=5, where p = population proportion estimate, and n = 2827 number of samples, in which case the binomial distribution is used 2828 instead. For example, a sample of 15 people shows 4 who are left 2829 handed, and 11 who are right handed. Then p = 4/15 = 27%. 0.27*15 = 4, 2830 so the binomial distribution should be used in this case. 2831 2832 References 2833 ---------- 2834 .. [1] Dalgaard, Peter, "Introductory Statistics with R", 2835 Springer-Verlag, 2002. 2836 .. [2] Glantz, Stanton A. "Primer of Biostatistics.", McGraw-Hill, 2837 Fifth Edition, 2002. 2838 .. [3] Lentner, Marvin, "Elementary Applied Statistics", Bogden 2839 and Quigley, 1972. 2840 .. [4] Weisstein, Eric W. "Binomial Distribution." From MathWorld--A 2841 Wolfram Web Resource. 2842 http://mathworld.wolfram.com/BinomialDistribution.html 2843 .. [5] Wikipedia, "Binomial distribution", 2844 https://en.wikipedia.org/wiki/Binomial_distribution 2845 2846 Examples 2847 -------- 2848 Draw samples from the distribution: 2849 2850 >>> rng = np.random.default_rng() 2851 >>> n, p = 10, .5 # number of trials, probability of each trial 2852 >>> s = rng.binomial(n, p, 1000) 2853 # result of flipping a coin 10 times, tested 1000 times. 2854 2855 A real world example. A company drills 9 wild-cat oil exploration 2856 wells, each with an estimated probability of success of 0.1. All nine 2857 wells fail. What is the probability of that happening? 2858 2859 Let's do 20,000 trials of the model, and count the number that 2860 generate zero positive results. 2861 2862 >>> sum(rng.binomial(9, 0.1, 20000) == 0)/20000. 2863 # answer = 0.38885, or 39%. 2864 2865 """ 2866 2867 # Uses a custom implementation since self._binomial is required 2868 cdef double _dp = 0 2869 cdef int64_t _in = 0 2870 cdef bint is_scalar = True 2871 cdef np.npy_intp i, cnt 2872 cdef np.ndarray randoms 2873 cdef np.int64_t *randoms_data 2874 cdef np.broadcast it 2875 2876 p_arr = <np.ndarray>np.PyArray_FROM_OTF(p, np.NPY_DOUBLE, np.NPY_ALIGNED) 2877 is_scalar = is_scalar and np.PyArray_NDIM(p_arr) == 0 2878 n_arr = <np.ndarray>np.PyArray_FROM_OTF(n, np.NPY_INT64, np.NPY_ALIGNED) 2879 is_scalar = is_scalar and np.PyArray_NDIM(n_arr) == 0 2880 2881 if not is_scalar: 2882 check_array_constraint(p_arr, 'p', CONS_BOUNDED_0_1) 2883 check_array_constraint(n_arr, 'n', CONS_NON_NEGATIVE) 2884 if size is not None: 2885 randoms = <np.ndarray>np.empty(size, np.int64) 2886 else: 2887 it = np.PyArray_MultiIterNew2(p_arr, n_arr) 2888 randoms = <np.ndarray>np.empty(it.shape, np.int64) 2889 2890 cnt = np.PyArray_SIZE(randoms) 2891 2892 it = np.PyArray_MultiIterNew3(randoms, p_arr, n_arr) 2893 validate_output_shape(it.shape, randoms) 2894 with self.lock, nogil: 2895 for i in range(cnt): 2896 _dp = (<double*>np.PyArray_MultiIter_DATA(it, 1))[0] 2897 _in = (<int64_t*>np.PyArray_MultiIter_DATA(it, 2))[0] 2898 (<int64_t*>np.PyArray_MultiIter_DATA(it, 0))[0] = random_binomial(&self._bitgen, _dp, _in, &self._binomial) 2899 2900 np.PyArray_MultiIter_NEXT(it) 2901 2902 return randoms 2903 2904 _dp = PyFloat_AsDouble(p) 2905 _in = <int64_t>n 2906 check_constraint(_dp, 'p', CONS_BOUNDED_0_1) 2907 check_constraint(<double>_in, 'n', CONS_NON_NEGATIVE) 2908 2909 if size is None: 2910 with self.lock: 2911 return random_binomial(&self._bitgen, _dp, _in, &self._binomial) 2912 2913 randoms = <np.ndarray>np.empty(size, np.int64) 2914 cnt = np.PyArray_SIZE(randoms) 2915 randoms_data = <np.int64_t *>np.PyArray_DATA(randoms) 2916 2917 with self.lock, nogil: 2918 for i in range(cnt): 2919 randoms_data[i] = random_binomial(&self._bitgen, _dp, _in, 2920 &self._binomial) 2921 2922 return randoms 2923 2924 def negative_binomial(self, n, p, size=None): 2925 """ 2926 negative_binomial(n, p, size=None) 2927 2928 Draw samples from a negative binomial distribution. 2929 2930 Samples are drawn from a negative binomial distribution with specified 2931 parameters, `n` successes and `p` probability of success where `n` 2932 is > 0 and `p` is in the interval (0, 1]. 2933 2934 Parameters 2935 ---------- 2936 n : float or array_like of floats 2937 Parameter of the distribution, > 0. 2938 p : float or array_like of floats 2939 Parameter of the distribution. Must satisfy 0 < p <= 1. 2940 size : int or tuple of ints, optional 2941 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 2942 ``m * n * k`` samples are drawn. If size is ``None`` (default), 2943 a single value is returned if ``n`` and ``p`` are both scalars. 2944 Otherwise, ``np.broadcast(n, p).size`` samples are drawn. 2945 2946 Returns 2947 ------- 2948 out : ndarray or scalar 2949 Drawn samples from the parameterized negative binomial distribution, 2950 where each sample is equal to N, the number of failures that 2951 occurred before a total of n successes was reached. 2952 2953 Notes 2954 ----- 2955 The probability mass function of the negative binomial distribution is 2956 2957 .. math:: P(N;n,p) = \\frac{\\Gamma(N+n)}{N!\\Gamma(n)}p^{n}(1-p)^{N}, 2958 2959 where :math:`n` is the number of successes, :math:`p` is the 2960 probability of success, :math:`N+n` is the number of trials, and 2961 :math:`\\Gamma` is the gamma function. When :math:`n` is an integer, 2962 :math:`\\frac{\\Gamma(N+n)}{N!\\Gamma(n)} = \\binom{N+n-1}{N}`, which is 2963 the more common form of this term in the the pmf. The negative 2964 binomial distribution gives the probability of N failures given n 2965 successes, with a success on the last trial. 2966 2967 If one throws a die repeatedly until the third time a "1" appears, 2968 then the probability distribution of the number of non-"1"s that 2969 appear before the third "1" is a negative binomial distribution. 2970 2971 References 2972 ---------- 2973 .. [1] Weisstein, Eric W. "Negative Binomial Distribution." From 2974 MathWorld--A Wolfram Web Resource. 2975 http://mathworld.wolfram.com/NegativeBinomialDistribution.html 2976 .. [2] Wikipedia, "Negative binomial distribution", 2977 https://en.wikipedia.org/wiki/Negative_binomial_distribution 2978 2979 Examples 2980 -------- 2981 Draw samples from the distribution: 2982 2983 A real world example. A company drills wild-cat oil 2984 exploration wells, each with an estimated probability of 2985 success of 0.1. What is the probability of having one success 2986 for each successive well, that is what is the probability of a 2987 single success after drilling 5 wells, after 6 wells, etc.? 2988 2989 >>> s = np.random.default_rng().negative_binomial(1, 0.1, 100000) 2990 >>> for i in range(1, 11): # doctest: +SKIP 2991 ... probability = sum(s<i) / 100000. 2992 ... print(i, "wells drilled, probability of one success =", probability) 2993 2994 """ 2995 return disc(&random_negative_binomial, &self._bitgen, size, self.lock, 2, 0, 2996 n, 'n', CONS_POSITIVE_NOT_NAN, 2997 p, 'p', CONS_BOUNDED_GT_0_1, 2998 0.0, '', CONS_NONE) 2999 3000 def poisson(self, lam=1.0, size=None): 3001 """ 3002 poisson(lam=1.0, size=None) 3003 3004 Draw samples from a Poisson distribution. 3005 3006 The Poisson distribution is the limit of the binomial distribution 3007 for large N. 3008 3009 Parameters 3010 ---------- 3011 lam : float or array_like of floats 3012 Expectation of interval, must be >= 0. A sequence of expectation 3013 intervals must be broadcastable over the requested size. 3014 size : int or tuple of ints, optional 3015 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 3016 ``m * n * k`` samples are drawn. If size is ``None`` (default), 3017 a single value is returned if ``lam`` is a scalar. Otherwise, 3018 ``np.array(lam).size`` samples are drawn. 3019 3020 Returns 3021 ------- 3022 out : ndarray or scalar 3023 Drawn samples from the parameterized Poisson distribution. 3024 3025 Notes 3026 ----- 3027 The Poisson distribution 3028 3029 .. math:: f(k; \\lambda)=\\frac{\\lambda^k e^{-\\lambda}}{k!} 3030 3031 For events with an expected separation :math:`\\lambda` the Poisson 3032 distribution :math:`f(k; \\lambda)` describes the probability of 3033 :math:`k` events occurring within the observed 3034 interval :math:`\\lambda`. 3035 3036 Because the output is limited to the range of the C int64 type, a 3037 ValueError is raised when `lam` is within 10 sigma of the maximum 3038 representable value. 3039 3040 References 3041 ---------- 3042 .. [1] Weisstein, Eric W. "Poisson Distribution." 3043 From MathWorld--A Wolfram Web Resource. 3044 http://mathworld.wolfram.com/PoissonDistribution.html 3045 .. [2] Wikipedia, "Poisson distribution", 3046 https://en.wikipedia.org/wiki/Poisson_distribution 3047 3048 Examples 3049 -------- 3050 Draw samples from the distribution: 3051 3052 >>> import numpy as np 3053 >>> rng = np.random.default_rng() 3054 >>> s = rng.poisson(5, 10000) 3055 3056 Display histogram of the sample: 3057 3058 >>> import matplotlib.pyplot as plt 3059 >>> count, bins, ignored = plt.hist(s, 14, density=True) 3060 >>> plt.show() 3061 3062 Draw each 100 values for lambda 100 and 500: 3063 3064 >>> s = rng.poisson(lam=(100., 500.), size=(100, 2)) 3065 3066 """ 3067 return disc(&random_poisson, &self._bitgen, size, self.lock, 1, 0, 3068 lam, 'lam', CONS_POISSON, 3069 0.0, '', CONS_NONE, 3070 0.0, '', CONS_NONE) 3071 3072 def zipf(self, a, size=None): 3073 """ 3074 zipf(a, size=None) 3075 3076 Draw samples from a Zipf distribution. 3077 3078 Samples are drawn from a Zipf distribution with specified parameter 3079 `a` > 1. 3080 3081 The Zipf distribution (also known as the zeta distribution) is a 3082 continuous probability distribution that satisfies Zipf's law: the 3083 frequency of an item is inversely proportional to its rank in a 3084 frequency table. 3085 3086 Parameters 3087 ---------- 3088 a : float or array_like of floats 3089 Distribution parameter. Must be greater than 1. 3090 size : int or tuple of ints, optional 3091 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 3092 ``m * n * k`` samples are drawn. If size is ``None`` (default), 3093 a single value is returned if ``a`` is a scalar. Otherwise, 3094 ``np.array(a).size`` samples are drawn. 3095 3096 Returns 3097 ------- 3098 out : ndarray or scalar 3099 Drawn samples from the parameterized Zipf distribution. 3100 3101 See Also 3102 -------- 3103 scipy.stats.zipf : probability density function, distribution, or 3104 cumulative density function, etc. 3105 3106 Notes 3107 ----- 3108 The probability density for the Zipf distribution is 3109 3110 .. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)}, 3111 3112 where :math:`\\zeta` is the Riemann Zeta function. 3113 3114 It is named for the American linguist George Kingsley Zipf, who noted 3115 that the frequency of any word in a sample of a language is inversely 3116 proportional to its rank in the frequency table. 3117 3118 References 3119 ---------- 3120 .. [1] Zipf, G. K., "Selected Studies of the Principle of Relative 3121 Frequency in Language," Cambridge, MA: Harvard Univ. Press, 3122 1932. 3123 3124 Examples 3125 -------- 3126 Draw samples from the distribution: 3127 3128 >>> a = 2. # parameter 3129 >>> s = np.random.default_rng().zipf(a, 1000) 3130 3131 Display the histogram of the samples, along with 3132 the probability density function: 3133 3134 >>> import matplotlib.pyplot as plt 3135 >>> from scipy import special # doctest: +SKIP 3136 3137 Truncate s values at 50 so plot is interesting: 3138 3139 >>> count, bins, ignored = plt.hist(s[s<50], 3140 ... 50, density=True) 3141 >>> x = np.arange(1., 50.) 3142 >>> y = x**(-a) / special.zetac(a) # doctest: +SKIP 3143 >>> plt.plot(x, y/max(y), linewidth=2, color='r') # doctest: +SKIP 3144 >>> plt.show() 3145 3146 """ 3147 return disc(&random_zipf, &self._bitgen, size, self.lock, 1, 0, 3148 a, 'a', CONS_GT_1, 3149 0.0, '', CONS_NONE, 3150 0.0, '', CONS_NONE) 3151 3152 def geometric(self, p, size=None): 3153 """ 3154 geometric(p, size=None) 3155 3156 Draw samples from the geometric distribution. 3157 3158 Bernoulli trials are experiments with one of two outcomes: 3159 success or failure (an example of such an experiment is flipping 3160 a coin). The geometric distribution models the number of trials 3161 that must be run in order to achieve success. It is therefore 3162 supported on the positive integers, ``k = 1, 2, ...``. 3163 3164 The probability mass function of the geometric distribution is 3165 3166 .. math:: f(k) = (1 - p)^{k - 1} p 3167 3168 where `p` is the probability of success of an individual trial. 3169 3170 Parameters 3171 ---------- 3172 p : float or array_like of floats 3173 The probability of success of an individual trial. 3174 size : int or tuple of ints, optional 3175 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 3176 ``m * n * k`` samples are drawn. If size is ``None`` (default), 3177 a single value is returned if ``p`` is a scalar. Otherwise, 3178 ``np.array(p).size`` samples are drawn. 3179 3180 Returns 3181 ------- 3182 out : ndarray or scalar 3183 Drawn samples from the parameterized geometric distribution. 3184 3185 Examples 3186 -------- 3187 Draw ten thousand values from the geometric distribution, 3188 with the probability of an individual success equal to 0.35: 3189 3190 >>> z = np.random.default_rng().geometric(p=0.35, size=10000) 3191 3192 How many trials succeeded after a single run? 3193 3194 >>> (z == 1).sum() / 10000. 3195 0.34889999999999999 #random 3196 3197 """ 3198 return disc(&random_geometric, &self._bitgen, size, self.lock, 1, 0, 3199 p, 'p', CONS_BOUNDED_GT_0_1, 3200 0.0, '', CONS_NONE, 3201 0.0, '', CONS_NONE) 3202 3203 def hypergeometric(self, ngood, nbad, nsample, size=None): 3204 """ 3205 hypergeometric(ngood, nbad, nsample, size=None) 3206 3207 Draw samples from a Hypergeometric distribution. 3208 3209 Samples are drawn from a hypergeometric distribution with specified 3210 parameters, `ngood` (ways to make a good selection), `nbad` (ways to make 3211 a bad selection), and `nsample` (number of items sampled, which is less 3212 than or equal to the sum ``ngood + nbad``). 3213 3214 Parameters 3215 ---------- 3216 ngood : int or array_like of ints 3217 Number of ways to make a good selection. Must be nonnegative and 3218 less than 10**9. 3219 nbad : int or array_like of ints 3220 Number of ways to make a bad selection. Must be nonnegative and 3221 less than 10**9. 3222 nsample : int or array_like of ints 3223 Number of items sampled. Must be nonnegative and less than 3224 ``ngood + nbad``. 3225 size : int or tuple of ints, optional 3226 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 3227 ``m * n * k`` samples are drawn. If size is ``None`` (default), 3228 a single value is returned if `ngood`, `nbad`, and `nsample` 3229 are all scalars. Otherwise, ``np.broadcast(ngood, nbad, nsample).size`` 3230 samples are drawn. 3231 3232 Returns 3233 ------- 3234 out : ndarray or scalar 3235 Drawn samples from the parameterized hypergeometric distribution. Each 3236 sample is the number of good items within a randomly selected subset of 3237 size `nsample` taken from a set of `ngood` good items and `nbad` bad items. 3238 3239 See Also 3240 -------- 3241 multivariate_hypergeometric : Draw samples from the multivariate 3242 hypergeometric distribution. 3243 scipy.stats.hypergeom : probability density function, distribution or 3244 cumulative density function, etc. 3245 3246 Notes 3247 ----- 3248 The probability density for the Hypergeometric distribution is 3249 3250 .. math:: P(x) = \\frac{\\binom{g}{x}\\binom{b}{n-x}}{\\binom{g+b}{n}}, 3251 3252 where :math:`0 \\le x \\le n` and :math:`n-b \\le x \\le g` 3253 3254 for P(x) the probability of ``x`` good results in the drawn sample, 3255 g = `ngood`, b = `nbad`, and n = `nsample`. 3256 3257 Consider an urn with black and white marbles in it, `ngood` of them 3258 are black and `nbad` are white. If you draw `nsample` balls without 3259 replacement, then the hypergeometric distribution describes the 3260 distribution of black balls in the drawn sample. 3261 3262 Note that this distribution is very similar to the binomial 3263 distribution, except that in this case, samples are drawn without 3264 replacement, whereas in the Binomial case samples are drawn with 3265 replacement (or the sample space is infinite). As the sample space 3266 becomes large, this distribution approaches the binomial. 3267 3268 The arguments `ngood` and `nbad` each must be less than `10**9`. For 3269 extremely large arguments, the algorithm that is used to compute the 3270 samples [4]_ breaks down because of loss of precision in floating point 3271 calculations. For such large values, if `nsample` is not also large, 3272 the distribution can be approximated with the binomial distribution, 3273 `binomial(n=nsample, p=ngood/(ngood + nbad))`. 3274 3275 References 3276 ---------- 3277 .. [1] Lentner, Marvin, "Elementary Applied Statistics", Bogden 3278 and Quigley, 1972. 3279 .. [2] Weisstein, Eric W. "Hypergeometric Distribution." From 3280 MathWorld--A Wolfram Web Resource. 3281 http://mathworld.wolfram.com/HypergeometricDistribution.html 3282 .. [3] Wikipedia, "Hypergeometric distribution", 3283 https://en.wikipedia.org/wiki/Hypergeometric_distribution 3284 .. [4] Stadlober, Ernst, "The ratio of uniforms approach for generating 3285 discrete random variates", Journal of Computational and Applied 3286 Mathematics, 31, pp. 181-189 (1990). 3287 3288 Examples 3289 -------- 3290 Draw samples from the distribution: 3291 3292 >>> rng = np.random.default_rng() 3293 >>> ngood, nbad, nsamp = 100, 2, 10 3294 # number of good, number of bad, and number of samples 3295 >>> s = rng.hypergeometric(ngood, nbad, nsamp, 1000) 3296 >>> from matplotlib.pyplot import hist 3297 >>> hist(s) 3298 # note that it is very unlikely to grab both bad items 3299 3300 Suppose you have an urn with 15 white and 15 black marbles. 3301 If you pull 15 marbles at random, how likely is it that 3302 12 or more of them are one color? 3303 3304 >>> s = rng.hypergeometric(15, 15, 15, 100000) 3305 >>> sum(s>=12)/100000. + sum(s<=3)/100000. 3306 # answer = 0.003 ... pretty unlikely! 3307 3308 """ 3309 DEF HYPERGEOM_MAX = 10**9 3310 cdef bint is_scalar = True 3311 cdef np.ndarray ongood, onbad, onsample 3312 cdef int64_t lngood, lnbad, lnsample 3313 3314 ongood = <np.ndarray>np.PyArray_FROM_OTF(ngood, np.NPY_INT64, np.NPY_ALIGNED) 3315 onbad = <np.ndarray>np.PyArray_FROM_OTF(nbad, np.NPY_INT64, np.NPY_ALIGNED) 3316 onsample = <np.ndarray>np.PyArray_FROM_OTF(nsample, np.NPY_INT64, np.NPY_ALIGNED) 3317 3318 if np.PyArray_NDIM(ongood) == np.PyArray_NDIM(onbad) == np.PyArray_NDIM(onsample) == 0: 3319 3320 lngood = <int64_t>ngood 3321 lnbad = <int64_t>nbad 3322 lnsample = <int64_t>nsample 3323 3324 if lngood >= HYPERGEOM_MAX or lnbad >= HYPERGEOM_MAX: 3325 raise ValueError("both ngood and nbad must be less than %d" % 3326 HYPERGEOM_MAX) 3327 if lngood + lnbad < lnsample: 3328 raise ValueError("ngood + nbad < nsample") 3329 return disc(&random_hypergeometric, &self._bitgen, size, self.lock, 0, 3, 3330 lngood, 'ngood', CONS_NON_NEGATIVE, 3331 lnbad, 'nbad', CONS_NON_NEGATIVE, 3332 lnsample, 'nsample', CONS_NON_NEGATIVE) 3333 3334 if np.any(ongood >= HYPERGEOM_MAX) or np.any(onbad >= HYPERGEOM_MAX): 3335 raise ValueError("both ngood and nbad must be less than %d" % 3336 HYPERGEOM_MAX) 3337 3338 if np.any(np.less(np.add(ongood, onbad), onsample)): 3339 raise ValueError("ngood + nbad < nsample") 3340 3341 return discrete_broadcast_iii(&random_hypergeometric, &self._bitgen, size, self.lock, 3342 ongood, 'ngood', CONS_NON_NEGATIVE, 3343 onbad, 'nbad', CONS_NON_NEGATIVE, 3344 onsample, 'nsample', CONS_NON_NEGATIVE) 3345 3346 def logseries(self, p, size=None): 3347 """ 3348 logseries(p, size=None) 3349 3350 Draw samples from a logarithmic series distribution. 3351 3352 Samples are drawn from a log series distribution with specified 3353 shape parameter, 0 < ``p`` < 1. 3354 3355 Parameters 3356 ---------- 3357 p : float or array_like of floats 3358 Shape parameter for the distribution. Must be in the range (0, 1). 3359 size : int or tuple of ints, optional 3360 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 3361 ``m * n * k`` samples are drawn. If size is ``None`` (default), 3362 a single value is returned if ``p`` is a scalar. Otherwise, 3363 ``np.array(p).size`` samples are drawn. 3364 3365 Returns 3366 ------- 3367 out : ndarray or scalar 3368 Drawn samples from the parameterized logarithmic series distribution. 3369 3370 See Also 3371 -------- 3372 scipy.stats.logser : probability density function, distribution or 3373 cumulative density function, etc. 3374 3375 Notes 3376 ----- 3377 The probability mass function for the Log Series distribution is 3378 3379 .. math:: P(k) = \\frac{-p^k}{k \\ln(1-p)}, 3380 3381 where p = probability. 3382 3383 The log series distribution is frequently used to represent species 3384 richness and occurrence, first proposed by Fisher, Corbet, and 3385 Williams in 1943 [2]. It may also be used to model the numbers of 3386 occupants seen in cars [3]. 3387 3388 References 3389 ---------- 3390 .. [1] Buzas, Martin A.; Culver, Stephen J., Understanding regional 3391 species diversity through the log series distribution of 3392 occurrences: BIODIVERSITY RESEARCH Diversity & Distributions, 3393 Volume 5, Number 5, September 1999 , pp. 187-195(9). 3394 .. [2] Fisher, R.A,, A.S. Corbet, and C.B. Williams. 1943. The 3395 relation between the number of species and the number of 3396 individuals in a random sample of an animal population. 3397 Journal of Animal Ecology, 12:42-58. 3398 .. [3] D. J. Hand, F. Daly, D. Lunn, E. Ostrowski, A Handbook of Small 3399 Data Sets, CRC Press, 1994. 3400 .. [4] Wikipedia, "Logarithmic distribution", 3401 https://en.wikipedia.org/wiki/Logarithmic_distribution 3402 3403 Examples 3404 -------- 3405 Draw samples from the distribution: 3406 3407 >>> a = .6 3408 >>> s = np.random.default_rng().logseries(a, 10000) 3409 >>> import matplotlib.pyplot as plt 3410 >>> count, bins, ignored = plt.hist(s) 3411 3412 # plot against distribution 3413 3414 >>> def logseries(k, p): 3415 ... return -p**k/(k*np.log(1-p)) 3416 >>> plt.plot(bins, logseries(bins, a) * count.max()/ 3417 ... logseries(bins, a).max(), 'r') 3418 >>> plt.show() 3419 3420 """ 3421 return disc(&random_logseries, &self._bitgen, size, self.lock, 1, 0, 3422 p, 'p', CONS_BOUNDED_0_1, 3423 0.0, '', CONS_NONE, 3424 0.0, '', CONS_NONE) 3425 3426 # Multivariate distributions: 3427 def multivariate_normal(self, mean, cov, size=None, check_valid='warn', 3428 tol=1e-8, *, method='svd'): 3429 """ 3430 multivariate_normal(mean, cov, size=None, check_valid='warn', 3431 tol=1e-8, *, method='svd') 3432 3433 Draw random samples from a multivariate normal distribution. 3434 3435 The multivariate normal, multinormal or Gaussian distribution is a 3436 generalization of the one-dimensional normal distribution to higher 3437 dimensions. Such a distribution is specified by its mean and 3438 covariance matrix. These parameters are analogous to the mean 3439 (average or "center") and variance (standard deviation, or "width," 3440 squared) of the one-dimensional normal distribution. 3441 3442 Parameters 3443 ---------- 3444 mean : 1-D array_like, of length N 3445 Mean of the N-dimensional distribution. 3446 cov : 2-D array_like, of shape (N, N) 3447 Covariance matrix of the distribution. It must be symmetric and 3448 positive-semidefinite for proper sampling. 3449 size : int or tuple of ints, optional 3450 Given a shape of, for example, ``(m,n,k)``, ``m*n*k`` samples are 3451 generated, and packed in an `m`-by-`n`-by-`k` arrangement. Because 3452 each sample is `N`-dimensional, the output shape is ``(m,n,k,N)``. 3453 If no shape is specified, a single (`N`-D) sample is returned. 3454 check_valid : { 'warn', 'raise', 'ignore' }, optional 3455 Behavior when the covariance matrix is not positive semidefinite. 3456 tol : float, optional 3457 Tolerance when checking the singular values in covariance matrix. 3458 cov is cast to double before the check. 3459 method : { 'svd', 'eigh', 'cholesky'}, optional 3460 The cov input is used to compute a factor matrix A such that 3461 ``A @ A.T = cov``. This argument is used to select the method 3462 used to compute the factor matrix A. The default method 'svd' is 3463 the slowest, while 'cholesky' is the fastest but less robust than 3464 the slowest method. The method `eigh` uses eigen decomposition to 3465 compute A and is faster than svd but slower than cholesky. 3466 3467 .. versionadded:: 1.18.0 3468 3469 Returns 3470 ------- 3471 out : ndarray 3472 The drawn samples, of shape *size*, if that was provided. If not, 3473 the shape is ``(N,)``. 3474 3475 In other words, each entry ``out[i,j,...,:]`` is an N-dimensional 3476 value drawn from the distribution. 3477 3478 Notes 3479 ----- 3480 The mean is a coordinate in N-dimensional space, which represents the 3481 location where samples are most likely to be generated. This is 3482 analogous to the peak of the bell curve for the one-dimensional or 3483 univariate normal distribution. 3484 3485 Covariance indicates the level to which two variables vary together. 3486 From the multivariate normal distribution, we draw N-dimensional 3487 samples, :math:`X = [x_1, x_2, ... x_N]`. The covariance matrix 3488 element :math:`C_{ij}` is the covariance of :math:`x_i` and :math:`x_j`. 3489 The element :math:`C_{ii}` is the variance of :math:`x_i` (i.e. its 3490 "spread"). 3491 3492 Instead of specifying the full covariance matrix, popular 3493 approximations include: 3494 3495 - Spherical covariance (`cov` is a multiple of the identity matrix) 3496 - Diagonal covariance (`cov` has non-negative elements, and only on 3497 the diagonal) 3498 3499 This geometrical property can be seen in two dimensions by plotting 3500 generated data-points: 3501 3502 >>> mean = [0, 0] 3503 >>> cov = [[1, 0], [0, 100]] # diagonal covariance 3504 3505 Diagonal covariance means that points are oriented along x or y-axis: 3506 3507 >>> import matplotlib.pyplot as plt 3508 >>> x, y = np.random.default_rng().multivariate_normal(mean, cov, 5000).T 3509 >>> plt.plot(x, y, 'x') 3510 >>> plt.axis('equal') 3511 >>> plt.show() 3512 3513 Note that the covariance matrix must be positive semidefinite (a.k.a. 3514 nonnegative-definite). Otherwise, the behavior of this method is 3515 undefined and backwards compatibility is not guaranteed. 3516 3517 References 3518 ---------- 3519 .. [1] Papoulis, A., "Probability, Random Variables, and Stochastic 3520 Processes," 3rd ed., New York: McGraw-Hill, 1991. 3521 .. [2] Duda, R. O., Hart, P. E., and Stork, D. G., "Pattern 3522 Classification," 2nd ed., New York: Wiley, 2001. 3523 3524 Examples 3525 -------- 3526 >>> mean = (1, 2) 3527 >>> cov = [[1, 0], [0, 1]] 3528 >>> rng = np.random.default_rng() 3529 >>> x = rng.multivariate_normal(mean, cov, (3, 3)) 3530 >>> x.shape 3531 (3, 3, 2) 3532 3533 We can use a different method other than the default to factorize cov: 3534 >>> y = rng.multivariate_normal(mean, cov, (3, 3), method='cholesky') 3535 >>> y.shape 3536 (3, 3, 2) 3537 3538 The following is probably true, given that 0.6 is roughly twice the 3539 standard deviation: 3540 3541 >>> list((x[0,0,:] - mean) < 0.6) 3542 [True, True] # random 3543 3544 """ 3545 if method not in {'eigh', 'svd', 'cholesky'}: 3546 raise ValueError( 3547 "method must be one of {'eigh', 'svd', 'cholesky'}") 3548 3549 # Check preconditions on arguments 3550 mean = np.array(mean) 3551 cov = np.array(cov) 3552 if size is None: 3553 shape = [] 3554 elif isinstance(size, (int, long, np.integer)): 3555 shape = [size] 3556 else: 3557 shape = size 3558 3559 if len(mean.shape) != 1: 3560 raise ValueError("mean must be 1 dimensional") 3561 if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]): 3562 raise ValueError("cov must be 2 dimensional and square") 3563 if mean.shape[0] != cov.shape[0]: 3564 raise ValueError("mean and cov must have same length") 3565 3566 # Compute shape of output and create a matrix of independent 3567 # standard normally distributed random numbers. The matrix has rows 3568 # with the same length as mean and as many rows are necessary to 3569 # form a matrix of shape final_shape. 3570 final_shape = list(shape[:]) 3571 final_shape.append(mean.shape[0]) 3572 x = self.standard_normal(final_shape).reshape(-1, mean.shape[0]) 3573 3574 # Transform matrix of standard normals into matrix where each row 3575 # contains multivariate normals with the desired covariance. 3576 # Compute A such that dot(transpose(A),A) == cov. 3577 # Then the matrix products of the rows of x and A has the desired 3578 # covariance. Note that sqrt(s)*v where (u,s,v) is the singular value 3579 # decomposition of cov is such an A. 3580 # 3581 # Also check that cov is positive-semidefinite. If so, the u.T and v 3582 # matrices should be equal up to roundoff error if cov is 3583 # symmetric and the singular value of the corresponding row is 3584 # not zero. We continue to use the SVD rather than Cholesky in 3585 # order to preserve current outputs. Note that symmetry has not 3586 # been checked. 3587 3588 # GH10839, ensure double to make tol meaningful 3589 cov = cov.astype(np.double) 3590 if method == 'svd': 3591 from numpy.linalg import svd 3592 (u, s, vh) = svd(cov) 3593 elif method == 'eigh': 3594 from numpy.linalg import eigh 3595 # could call linalg.svd(hermitian=True), but that calculates a vh we don't need 3596 (s, u) = eigh(cov) 3597 else: 3598 from numpy.linalg import cholesky 3599 l = cholesky(cov) 3600 3601 # make sure check_valid is ignored whe method == 'cholesky' 3602 # since the decomposition will have failed if cov is not valid. 3603 if check_valid != 'ignore' and method != 'cholesky': 3604 if check_valid != 'warn' and check_valid != 'raise': 3605 raise ValueError( 3606 "check_valid must equal 'warn', 'raise', or 'ignore'") 3607 if method == 'svd': 3608 psd = np.allclose(np.dot(vh.T * s, vh), cov, rtol=tol, atol=tol) 3609 else: 3610 psd = not np.any(s < -tol) 3611 if not psd: 3612 if check_valid == 'warn': 3613 warnings.warn("covariance is not positive-semidefinite.", 3614 RuntimeWarning) 3615 else: 3616 raise ValueError("covariance is not positive-semidefinite.") 3617 3618 if method == 'cholesky': 3619 _factor = l 3620 elif method == 'eigh': 3621 # if check_valid == 'ignore' we need to ensure that np.sqrt does not 3622 # return a NaN if s is a very small negative number that is 3623 # approximately zero or when the covariance is not positive-semidefinite 3624 _factor = u * np.sqrt(abs(s)) 3625 else: 3626 _factor = u * np.sqrt(s) 3627 3628 x = mean + x @ _factor.T 3629 x.shape = tuple(final_shape) 3630 return x 3631 3632 def multinomial(self, object n, object pvals, size=None): 3633 """ 3634 multinomial(n, pvals, size=None) 3635 3636 Draw samples from a multinomial distribution. 3637 3638 The multinomial distribution is a multivariate generalization of the 3639 binomial distribution. Take an experiment with one of ``p`` 3640 possible outcomes. An example of such an experiment is throwing a dice, 3641 where the outcome can be 1 through 6. Each sample drawn from the 3642 distribution represents `n` such experiments. Its values, 3643 ``X_i = [X_0, X_1, ..., X_p]``, represent the number of times the 3644 outcome was ``i``. 3645 3646 Parameters 3647 ---------- 3648 n : int or array-like of ints 3649 Number of experiments. 3650 pvals : sequence of floats, length p 3651 Probabilities of each of the ``p`` different outcomes. These 3652 must sum to 1 (however, the last element is always assumed to 3653 account for the remaining probability, as long as 3654 ``sum(pvals[:-1]) <= 1)``. 3655 size : int or tuple of ints, optional 3656 Output shape. If the given shape is, e.g., ``(m, n, k)``, then 3657 ``m * n * k`` samples are drawn. Default is None, in which case a 3658 single value is returned. 3659 3660 Returns 3661 ------- 3662 out : ndarray 3663 The drawn samples, of shape *size*, if that was provided. If not, 3664 the shape is ``(N,)``. 3665 3666 In other words, each entry ``out[i,j,...,:]`` is an N-dimensional 3667 value drawn from the distribution. 3668 3669 Examples 3670 -------- 3671 Throw a dice 20 times: 3672 3673 >>> rng = np.random.default_rng() 3674 >>> rng.multinomial(20, [1/6.]*6, size=1) 3675 array([[4, 1, 7, 5, 2, 1]]) # random 3676 3677 It landed 4 times on 1, once on 2, etc. 3678 3679 Now, throw the dice 20 times, and 20 times again: 3680 3681 >>> rng.multinomial(20, [1/6.]*6, size=2) 3682 array([[3, 4, 3, 3, 4, 3], 3683 [2, 4, 3, 4, 0, 7]]) # random 3684 3685 For the first run, we threw 3 times 1, 4 times 2, etc. For the second, 3686 we threw 2 times 1, 4 times 2, etc. 3687 3688 Now, do one experiment throwing the dice 10 time, and 10 times again, 3689 and another throwing the dice 20 times, and 20 times again: 3690 3691 >>> rng.multinomial([[10], [20]], [1/6.]*6, size=(2, 2)) 3692 array([[[2, 4, 0, 1, 2, 1], 3693 [1, 3, 0, 3, 1, 2]], 3694 [[1, 4, 4, 4, 4, 3], 3695 [3, 3, 2, 5, 5, 2]]]) # random 3696 3697 The first array shows the outcomes of throwing the dice 10 times, and 3698 the second shows the outcomes from throwing the dice 20 times. 3699 3700 A loaded die is more likely to land on number 6: 3701 3702 >>> rng.multinomial(100, [1/7.]*5 + [2/7.]) 3703 array([11, 16, 14, 17, 16, 26]) # random 3704 3705 The probability inputs should be normalized. As an implementation 3706 detail, the value of the last entry is ignored and assumed to take 3707 up any leftover probability mass, but this should not be relied on. 3708 A biased coin which has twice as much weight on one side as on the 3709 other should be sampled like so: 3710 3711 >>> rng.multinomial(100, [1.0 / 3, 2.0 / 3]) # RIGHT 3712 array([38, 62]) # random 3713 3714 not like: 3715 3716 >>> rng.multinomial(100, [1.0, 2.0]) # WRONG 3717 Traceback (most recent call last): 3718 ValueError: pvals < 0, pvals > 1 or pvals contains NaNs 3719 3720 """ 3721 3722 cdef np.npy_intp d, i, sz, offset 3723 cdef np.ndarray parr, mnarr, on, temp_arr 3724 cdef double *pix 3725 cdef int64_t *mnix 3726 cdef int64_t ni 3727 cdef np.broadcast it 3728 3729 d = len(pvals) 3730 on = <np.ndarray>np.PyArray_FROM_OTF(n, np.NPY_INT64, np.NPY_ALIGNED) 3731 parr = <np.ndarray>np.PyArray_FROMANY( 3732 pvals, np.NPY_DOUBLE, 1, 1, np.NPY_ARRAY_ALIGNED | np.NPY_ARRAY_C_CONTIGUOUS) 3733 pix = <double*>np.PyArray_DATA(parr) 3734 check_array_constraint(parr, 'pvals', CONS_BOUNDED_0_1) 3735 if kahan_sum(pix, d-1) > (1.0 + 1e-12): 3736 raise ValueError("sum(pvals[:-1]) > 1.0") 3737 3738 if np.PyArray_NDIM(on) != 0: # vector 3739 check_array_constraint(on, 'n', CONS_NON_NEGATIVE) 3740 if size is None: 3741 it = np.PyArray_MultiIterNew1(on) 3742 else: 3743 temp = np.empty(size, dtype=np.int8) 3744 temp_arr = <np.ndarray>temp 3745 it = np.PyArray_MultiIterNew2(on, temp_arr) 3746 validate_output_shape(it.shape, temp_arr) 3747 shape = it.shape + (d,) 3748 multin = np.zeros(shape, dtype=np.int64) 3749 mnarr = <np.ndarray>multin 3750 mnix = <int64_t*>np.PyArray_DATA(mnarr) 3751 offset = 0 3752 sz = it.size 3753 with self.lock, nogil: 3754 for i in range(sz): 3755 ni = (<int64_t*>np.PyArray_MultiIter_DATA(it, 0))[0] 3756 random_multinomial(&self._bitgen, ni, &mnix[offset], pix, d, &self._binomial) 3757 offset += d 3758 np.PyArray_MultiIter_NEXT(it) 3759 return multin 3760 3761 if size is None: 3762 shape = (d,) 3763 else: 3764 try: 3765 shape = (operator.index(size), d) 3766 except: 3767 shape = tuple(size) + (d,) 3768 3769 multin = np.zeros(shape, dtype=np.int64) 3770 mnarr = <np.ndarray>multin 3771 mnix = <int64_t*>np.PyArray_DATA(mnarr) 3772 sz = np.PyArray_SIZE(mnarr) 3773 ni = n 3774 check_constraint(ni, 'n', CONS_NON_NEGATIVE) 3775 offset = 0 3776 with self.lock, nogil: 3777 for i in range(sz // d): 3778 random_multinomial(&self._bitgen, ni, &mnix[offset], pix, d, &self._binomial) 3779 offset += d 3780 3781 return multin 3782 3783 def multivariate_hypergeometric(self, object colors, object nsample, 3784 size=None, method='marginals'): 3785 """ 3786 multivariate_hypergeometric(colors, nsample, size=None, 3787 method='marginals') 3788 3789 Generate variates from a multivariate hypergeometric distribution. 3790 3791 The multivariate hypergeometric distribution is a generalization 3792 of the hypergeometric distribution. 3793 3794 Choose ``nsample`` items at random without replacement from a 3795 collection with ``N`` distinct types. ``N`` is the length of 3796 ``colors``, and the values in ``colors`` are the number of occurrences 3797 of that type in the collection. The total number of items in the 3798 collection is ``sum(colors)``. Each random variate generated by this 3799 function is a vector of length ``N`` holding the counts of the 3800 different types that occurred in the ``nsample`` items. 3801 3802 The name ``colors`` comes from a common description of the 3803 distribution: it is the probability distribution of the number of 3804 marbles of each color selected without replacement from an urn 3805 containing marbles of different colors; ``colors[i]`` is the number 3806 of marbles in the urn with color ``i``. 3807 3808 Parameters 3809 ---------- 3810 colors : sequence of integers 3811 The number of each type of item in the collection from which 3812 a sample is drawn. The values in ``colors`` must be nonnegative. 3813 To avoid loss of precision in the algorithm, ``sum(colors)`` 3814 must be less than ``10**9`` when `method` is "marginals". 3815 nsample : int 3816 The number of items selected. ``nsample`` must not be greater 3817 than ``sum(colors)``. 3818 size : int or tuple of ints, optional 3819 The number of variates to generate, either an integer or a tuple 3820 holding the shape of the array of variates. If the given size is, 3821 e.g., ``(k, m)``, then ``k * m`` variates are drawn, where one 3822 variate is a vector of length ``len(colors)``, and the return value 3823 has shape ``(k, m, len(colors))``. If `size` is an integer, the 3824 output has shape ``(size, len(colors))``. Default is None, in 3825 which case a single variate is returned as an array with shape 3826 ``(len(colors),)``. 3827 method : string, optional 3828 Specify the algorithm that is used to generate the variates. 3829 Must be 'count' or 'marginals' (the default). See the Notes 3830 for a description of the methods. 3831 3832 Returns 3833 ------- 3834 variates : ndarray 3835 Array of variates drawn from the multivariate hypergeometric 3836 distribution. 3837 3838 See Also 3839 -------- 3840 hypergeometric : Draw samples from the (univariate) hypergeometric 3841 distribution. 3842 3843 Notes 3844 ----- 3845 The two methods do not return the same sequence of variates. 3846 3847 The "count" algorithm is roughly equivalent to the following numpy 3848 code:: 3849 3850 choices = np.repeat(np.arange(len(colors)), colors) 3851 selection = np.random.choice(choices, nsample, replace=False) 3852 variate = np.bincount(selection, minlength=len(colors)) 3853 3854 The "count" algorithm uses a temporary array of integers with length 3855 ``sum(colors)``. 3856 3857 The "marginals" algorithm generates a variate by using repeated 3858 calls to the univariate hypergeometric sampler. It is roughly 3859 equivalent to:: 3860 3861 variate = np.zeros(len(colors), dtype=np.int64) 3862 # `remaining` is the cumulative sum of `colors` from the last 3863 # element to the first; e.g. if `colors` is [3, 1, 5], then 3864 # `remaining` is [9, 6, 5]. 3865 remaining = np.cumsum(colors[::-1])[::-1] 3866 for i in range(len(colors)-1): 3867 if nsample < 1: 3868 break 3869 variate[i] = hypergeometric(colors[i], remaining[i+1], 3870 nsample) 3871 nsample -= variate[i] 3872 variate[-1] = nsample 3873 3874 The default method is "marginals". For some cases (e.g. when 3875 `colors` contains relatively small integers), the "count" method 3876 can be significantly faster than the "marginals" method. If 3877 performance of the algorithm is important, test the two methods 3878 with typical inputs to decide which works best. 3879 3880 .. versionadded:: 1.18.0 3881 3882 Examples 3883 -------- 3884 >>> colors = [16, 8, 4] 3885 >>> seed = 4861946401452 3886 >>> gen = np.random.Generator(np.random.PCG64(seed)) 3887 >>> gen.multivariate_hypergeometric(colors, 6) 3888 array([5, 0, 1]) 3889 >>> gen.multivariate_hypergeometric(colors, 6, size=3) 3890 array([[5, 0, 1], 3891 [2, 2, 2], 3892 [3, 3, 0]]) 3893 >>> gen.multivariate_hypergeometric(colors, 6, size=(2, 2)) 3894 array([[[3, 2, 1], 3895 [3, 2, 1]], 3896 [[4, 1, 1], 3897 [3, 2, 1]]]) 3898 """ 3899 cdef int64_t nsamp 3900 cdef size_t num_colors 3901 cdef int64_t total 3902 cdef int64_t *colors_ptr 3903 cdef int64_t max_index 3904 cdef size_t num_variates 3905 cdef int64_t *variates_ptr 3906 cdef int result 3907 3908 if method not in ['count', 'marginals']: 3909 raise ValueError('method must be "count" or "marginals".') 3910 3911 try: 3912 operator.index(nsample) 3913 except TypeError: 3914 raise ValueError('nsample must be an integer') 3915 3916 if nsample < 0: 3917 raise ValueError("nsample must be nonnegative.") 3918 if nsample > INT64_MAX: 3919 raise ValueError("nsample must not exceed %d" % INT64_MAX) 3920 nsamp = nsample 3921 3922 # Validation of colors, a 1-d sequence of nonnegative integers. 3923 invalid_colors = False 3924 try: 3925 colors = np.asarray(colors) 3926 if colors.ndim != 1: 3927 invalid_colors = True 3928 elif colors.size > 0 and not np.issubdtype(colors.dtype, 3929 np.integer): 3930 invalid_colors = True 3931 elif np.any((colors < 0) | (colors > INT64_MAX)): 3932 invalid_colors = True 3933 except ValueError: 3934 invalid_colors = True 3935 if invalid_colors: 3936 raise ValueError('colors must be a one-dimensional sequence ' 3937 'of nonnegative integers not exceeding %d.' % 3938 INT64_MAX) 3939 3940 colors = np.ascontiguousarray(colors, dtype=np.int64) 3941 num_colors = colors.size 3942 3943 colors_ptr = <int64_t *> np.PyArray_DATA(colors) 3944 3945 total = _safe_sum_nonneg_int64(num_colors, colors_ptr) 3946 if total == -1: 3947 raise ValueError("sum(colors) must not exceed the maximum value " 3948 "of a 64 bit signed integer (%d)" % INT64_MAX) 3949 3950 if method == 'marginals' and total >= 1000000000: 3951 raise ValueError('When method is "marginals", sum(colors) must ' 3952 'be less than 1000000000.') 3953 3954 # The C code that implements the 'count' method will malloc an 3955 # array of size total*sizeof(size_t). Here we ensure that that 3956 # product does not overflow. 3957 if SIZE_MAX > <uint64_t>INT64_MAX: 3958 max_index = INT64_MAX // sizeof(size_t) 3959 else: 3960 max_index = SIZE_MAX // sizeof(size_t) 3961 if method == 'count' and total > max_index: 3962 raise ValueError("When method is 'count', sum(colors) must not " 3963 "exceed %d" % max_index) 3964 if nsamp > total: 3965 raise ValueError("nsample > sum(colors)") 3966 3967 # Figure out the shape of the return array. 3968 if size is None: 3969 shape = (num_colors,) 3970 elif np.isscalar(size): 3971 shape = (size, num_colors) 3972 else: 3973 shape = tuple(size) + (num_colors,) 3974 variates = np.zeros(shape, dtype=np.int64) 3975 3976 if num_colors == 0: 3977 return variates 3978 3979 # One variate is a vector of length num_colors. 3980 num_variates = variates.size // num_colors 3981 variates_ptr = <int64_t *> np.PyArray_DATA(variates) 3982 3983 if method == 'count': 3984 with self.lock, nogil: 3985 result = random_multivariate_hypergeometric_count(&self._bitgen, 3986 total, num_colors, colors_ptr, nsamp, 3987 num_variates, variates_ptr) 3988 if result == -1: 3989 raise MemoryError("Insufficient memory for multivariate_" 3990 "hypergeometric with method='count' and " 3991 "sum(colors)=%d" % total) 3992 else: 3993 with self.lock, nogil: 3994 random_multivariate_hypergeometric_marginals(&self._bitgen, 3995 total, num_colors, colors_ptr, nsamp, 3996 num_variates, variates_ptr) 3997 return variates 3998 3999 def dirichlet(self, object alpha, size=None): 4000 """ 4001 dirichlet(alpha, size=None) 4002 4003 Draw samples from the Dirichlet distribution. 4004 4005 Draw `size` samples of dimension k from a Dirichlet distribution. A 4006 Dirichlet-distributed random variable can be seen as a multivariate 4007 generalization of a Beta distribution. The Dirichlet distribution 4008 is a conjugate prior of a multinomial distribution in Bayesian 4009 inference. 4010 4011 Parameters 4012 ---------- 4013 alpha : sequence of floats, length k 4014 Parameter of the distribution (length ``k`` for sample of 4015 length ``k``). 4016 size : int or tuple of ints, optional 4017 Output shape. If the given shape is, e.g., ``(m, n)``, then 4018 ``m * n * k`` samples are drawn. Default is None, in which case a 4019 vector of length ``k`` is returned. 4020 4021 Returns 4022 ------- 4023 samples : ndarray, 4024 The drawn samples, of shape ``(size, k)``. 4025 4026 Raises 4027 ------- 4028 ValueError 4029 If any value in ``alpha`` is less than or equal to zero 4030 4031 Notes 4032 ----- 4033 The Dirichlet distribution is a distribution over vectors 4034 :math:`x` that fulfil the conditions :math:`x_i>0` and 4035 :math:`\\sum_{i=1}^k x_i = 1`. 4036 4037 The probability density function :math:`p` of a 4038 Dirichlet-distributed random vector :math:`X` is 4039 proportional to 4040 4041 .. math:: p(x) \\propto \\prod_{i=1}^{k}{x^{\\alpha_i-1}_i}, 4042 4043 where :math:`\\alpha` is a vector containing the positive 4044 concentration parameters. 4045 4046 The method uses the following property for computation: let :math:`Y` 4047 be a random vector which has components that follow a standard gamma 4048 distribution, then :math:`X = \\frac{1}{\\sum_{i=1}^k{Y_i}} Y` 4049 is Dirichlet-distributed 4050 4051 References 4052 ---------- 4053 .. [1] David McKay, "Information Theory, Inference and Learning 4054 Algorithms," chapter 23, 4055 http://www.inference.org.uk/mackay/itila/ 4056 .. [2] Wikipedia, "Dirichlet distribution", 4057 https://en.wikipedia.org/wiki/Dirichlet_distribution 4058 4059 Examples 4060 -------- 4061 Taking an example cited in Wikipedia, this distribution can be used if 4062 one wanted to cut strings (each of initial length 1.0) into K pieces 4063 with different lengths, where each piece had, on average, a designated 4064 average length, but allowing some variation in the relative sizes of 4065 the pieces. 4066 4067 >>> s = np.random.default_rng().dirichlet((10, 5, 3), 20).transpose() 4068 4069 >>> import matplotlib.pyplot as plt 4070 >>> plt.barh(range(20), s[0]) 4071 >>> plt.barh(range(20), s[1], left=s[0], color='g') 4072 >>> plt.barh(range(20), s[2], left=s[0]+s[1], color='r') 4073 >>> plt.title("Lengths of Strings") 4074 4075 """ 4076 4077 # ================= 4078 # Pure python algo 4079 # ================= 4080 # alpha = N.atleast_1d(alpha) 4081 # k = alpha.size 4082 4083 # if n == 1: 4084 # val = N.zeros(k) 4085 # for i in range(k): 4086 # val[i] = sgamma(alpha[i], n) 4087 # val /= N.sum(val) 4088 # else: 4089 # val = N.zeros((k, n)) 4090 # for i in range(k): 4091 # val[i] = sgamma(alpha[i], n) 4092 # val /= N.sum(val, axis = 0) 4093 # val = val.T 4094 # return val 4095 4096 cdef np.npy_intp k, totsize, i, j 4097 cdef np.ndarray alpha_arr, val_arr, alpha_csum_arr 4098 cdef double csum 4099 cdef double *alpha_data 4100 cdef double *alpha_csum_data 4101 cdef double *val_data 4102 cdef double acc, invacc, v 4103 4104 k = len(alpha) 4105 alpha_arr = <np.ndarray>np.PyArray_FROMANY( 4106 alpha, np.NPY_DOUBLE, 1, 1, 4107 np.NPY_ARRAY_ALIGNED | np.NPY_ARRAY_C_CONTIGUOUS) 4108 if np.any(np.less_equal(alpha_arr, 0)): 4109 raise ValueError('alpha <= 0') 4110 alpha_data = <double*>np.PyArray_DATA(alpha_arr) 4111 4112 if size is None: 4113 shape = (k,) 4114 else: 4115 try: 4116 shape = (operator.index(size), k) 4117 except: 4118 shape = tuple(size) + (k,) 4119 4120 diric = np.zeros(shape, np.float64) 4121 val_arr = <np.ndarray>diric 4122 val_data= <double*>np.PyArray_DATA(val_arr) 4123 4124 i = 0 4125 totsize = np.PyArray_SIZE(val_arr) 4126 4127 # Select one of the following two algorithms for the generation 4128 # of Dirichlet random variates (RVs) 4129 # 4130 # A) Small alpha case: Use the stick-breaking approach with beta 4131 # random variates (RVs). 4132 # B) Standard case: Perform unit normalisation of a vector 4133 # of gamma random variates 4134 # 4135 # A) prevents NaNs resulting from 0/0 that may occur in B) 4136 # when all values in the vector ':math:\\alpha' are smaller 4137 # than 1, then there is a nonzero probability that all 4138 # generated gamma RVs will be 0. When that happens, the 4139 # normalization process ends up computing 0/0, giving nan. A) 4140 # does not use divisions, so that a situation in which 0/0 has 4141 # to be computed cannot occur. A) is slower than B) as 4142 # generation of beta RVs is slower than generation of gamma 4143 # RVs. A) is selected whenever `alpha.max() < t`, where `t < 4144 # 1` is a threshold that controls the probability of 4145 # generating a NaN value when B) is used. For a given 4146 # threshold `t` this probability can be bounded by 4147 # `gammainc(t, d)` where `gammainc` is the regularized 4148 # incomplete gamma function and `d` is the smallest positive 4149 # floating point number that can be represented with a given 4150 # precision. For the chosen threshold `t=0.1` this probability 4151 # is smaller than `1.8e-31` for double precision floating 4152 # point numbers. 4153 4154 if (k > 0) and (alpha_arr.max() < 0.1): 4155 # Small alpha case: Use stick-breaking approach with beta 4156 # random variates (RVs). 4157 # alpha_csum_data will hold the cumulative sum, right to 4158 # left, of alpha_arr. 4159 # Use a numpy array for memory management only. We could just as 4160 # well have malloc'd alpha_csum_data. alpha_arr is a C-contiguous 4161 # double array, therefore so is alpha_csum_arr. 4162 alpha_csum_arr = np.empty_like(alpha_arr) 4163 alpha_csum_data = <double*>np.PyArray_DATA(alpha_csum_arr) 4164 csum = 0.0 4165 for j in range(k - 1, -1, -1): 4166 csum += alpha_data[j] 4167 alpha_csum_data[j] = csum 4168 4169 with self.lock, nogil: 4170 while i < totsize: 4171 acc = 1. 4172 for j in range(k - 1): 4173 v = random_beta(&self._bitgen, alpha_data[j], 4174 alpha_csum_data[j + 1]) 4175 val_data[i + j] = acc * v 4176 acc *= (1. - v) 4177 val_data[i + k - 1] = acc 4178 i = i + k 4179 4180 else: 4181 # Standard case: Unit normalisation of a vector of gamma random 4182 # variates 4183 with self.lock, nogil: 4184 while i < totsize: 4185 acc = 0. 4186 for j in range(k): 4187 val_data[i + j] = random_standard_gamma(&self._bitgen, 4188 alpha_data[j]) 4189 acc = acc + val_data[i + j] 4190 invacc = 1. / acc 4191 for j in range(k): 4192 val_data[i + j] = val_data[i + j] * invacc 4193 i = i + k 4194 4195 return diric 4196 4197 def permuted(self, object x, *, axis=None, out=None): 4198 """ 4199 permuted(x, axis=None, out=None) 4200 4201 Randomly permute `x` along axis `axis`. 4202 4203 Unlike `shuffle`, each slice along the given axis is shuffled 4204 independently of the others. 4205 4206 Parameters 4207 ---------- 4208 x : array_like, at least one-dimensional 4209 Array to be shuffled. 4210 axis : int, optional 4211 Slices of `x` in this axis are shuffled. Each slice 4212 is shuffled independently of the others. If `axis` is 4213 None, the flattened array is shuffled. 4214 out : ndarray, optional 4215 If given, this is the destinaton of the shuffled array. 4216 If `out` is None, a shuffled copy of the array is returned. 4217 4218 Returns 4219 ------- 4220 ndarray 4221 If `out` is None, a shuffled copy of `x` is returned. 4222 Otherwise, the shuffled array is stored in `out`, 4223 and `out` is returned 4224 4225 See Also 4226 -------- 4227 shuffle 4228 permutation 4229 4230 Examples 4231 -------- 4232 Create a `numpy.random.Generator` instance: 4233 4234 >>> rng = np.random.default_rng() 4235 4236 Create a test array: 4237 4238 >>> x = np.arange(24).reshape(3, 8) 4239 >>> x 4240 array([[ 0, 1, 2, 3, 4, 5, 6, 7], 4241 [ 8, 9, 10, 11, 12, 13, 14, 15], 4242 [16, 17, 18, 19, 20, 21, 22, 23]]) 4243 4244 Shuffle the rows of `x`: 4245 4246 >>> y = rng.permuted(x, axis=1) 4247 >>> y 4248 array([[ 4, 3, 6, 7, 1, 2, 5, 0], # random 4249 [15, 10, 14, 9, 12, 11, 8, 13], 4250 [17, 16, 20, 21, 18, 22, 23, 19]]) 4251 4252 `x` has not been modified: 4253 4254 >>> x 4255 array([[ 0, 1, 2, 3, 4, 5, 6, 7], 4256 [ 8, 9, 10, 11, 12, 13, 14, 15], 4257 [16, 17, 18, 19, 20, 21, 22, 23]]) 4258 4259 To shuffle the rows of `x` in-place, pass `x` as the `out` 4260 parameter: 4261 4262 >>> y = rng.permuted(x, axis=1, out=x) 4263 >>> x 4264 array([[ 3, 0, 4, 7, 1, 6, 2, 5], # random 4265 [ 8, 14, 13, 9, 12, 11, 15, 10], 4266 [17, 18, 16, 22, 19, 23, 20, 21]]) 4267 4268 Note that when the ``out`` parameter is given, the return 4269 value is ``out``: 4270 4271 >>> y is x 4272 True 4273 """ 4274 4275 cdef int ax 4276 cdef np.npy_intp axlen, axstride, itemsize 4277 cdef void *buf 4278 cdef np.flatiter it 4279 cdef np.ndarray to_shuffle 4280 cdef int status 4281 cdef int flags 4282 4283 x = np.asarray(x) 4284 4285 if out is None: 4286 out = x.copy(order='K') 4287 else: 4288 if type(out) is not np.ndarray: 4289 raise TypeError('out must be a numpy array') 4290 if out.shape != x.shape: 4291 raise ValueError('out must have the same shape as x') 4292 np.copyto(out, x, casting='safe') 4293 4294 if axis is None: 4295 if x.ndim > 1: 4296 if not (np.PyArray_FLAGS(out) & (np.NPY_ARRAY_C_CONTIGUOUS | 4297 np.NPY_ARRAY_F_CONTIGUOUS)): 4298 flags = (np.NPY_ARRAY_C_CONTIGUOUS | 4299 NPY_ARRAY_WRITEBACKIFCOPY) 4300 to_shuffle = PyArray_FromArray(<np.PyArrayObject *>out, 4301 <np.PyArray_Descr *>NULL, flags) 4302 self.shuffle(to_shuffle.ravel(order='K')) 4303 # Because we only execute this block if out is not 4304 # contiguous, we know this call will always result in a 4305 # copy of to_shuffle back to out. I.e. status will be 1. 4306 status = PyArray_ResolveWritebackIfCopy(to_shuffle) 4307 assert status == 1 4308 else: 4309 # out is n-d with n > 1, but is either C- or F-contiguous, 4310 # so we know out.ravel(order='A') is a view. 4311 self.shuffle(out.ravel(order='A')) 4312 else: 4313 # out is 1-d 4314 self.shuffle(out) 4315 return out 4316 4317 ax = normalize_axis_index(axis, np.ndim(out)) 4318 itemsize = out.itemsize 4319 axlen = out.shape[ax] 4320 axstride = out.strides[ax] 4321 4322 it = np.PyArray_IterAllButAxis(out, &ax) 4323 4324 buf = PyMem_Malloc(itemsize) 4325 if buf == NULL: 4326 raise MemoryError('memory allocation failed in permuted') 4327 4328 if out.dtype.hasobject: 4329 # Keep the GIL when shuffling an object array. 4330 with self.lock: 4331 while np.PyArray_ITER_NOTDONE(it): 4332 _shuffle_raw_wrap(&self._bitgen, axlen, 0, itemsize, 4333 axstride, 4334 <char *>np.PyArray_ITER_DATA(it), 4335 <char *>buf) 4336 np.PyArray_ITER_NEXT(it) 4337 else: 4338 # out is not an object array, so we can release the GIL. 4339 with self.lock, nogil: 4340 while np.PyArray_ITER_NOTDONE(it): 4341 _shuffle_raw_wrap(&self._bitgen, axlen, 0, itemsize, 4342 axstride, 4343 <char *>np.PyArray_ITER_DATA(it), 4344 <char *>buf) 4345 np.PyArray_ITER_NEXT(it) 4346 4347 PyMem_Free(buf) 4348 return out 4349 4350 def shuffle(self, object x, axis=0): 4351 """ 4352 shuffle(x, axis=0) 4353 4354 Modify an array or sequence in-place by shuffling its contents. 4355 4356 The order of sub-arrays is changed but their contents remains the same. 4357 4358 Parameters 4359 ---------- 4360 x : ndarray or MutableSequence 4361 The array, list or mutable sequence to be shuffled. 4362 axis : int, optional 4363 The axis which `x` is shuffled along. Default is 0. 4364 It is only supported on `ndarray` objects. 4365 4366 Returns 4367 ------- 4368 None 4369 4370 Examples 4371 -------- 4372 >>> rng = np.random.default_rng() 4373 >>> arr = np.arange(10) 4374 >>> rng.shuffle(arr) 4375 >>> arr 4376 [1 7 5 2 9 4 3 6 0 8] # random 4377 4378 >>> arr = np.arange(9).reshape((3, 3)) 4379 >>> rng.shuffle(arr) 4380 >>> arr 4381 array([[3, 4, 5], # random 4382 [6, 7, 8], 4383 [0, 1, 2]]) 4384 4385 >>> arr = np.arange(9).reshape((3, 3)) 4386 >>> rng.shuffle(arr, axis=1) 4387 >>> arr 4388 array([[2, 0, 1], # random 4389 [5, 3, 4], 4390 [8, 6, 7]]) 4391 """ 4392 cdef: 4393 np.npy_intp i, j, n = len(x), stride, itemsize 4394 char* x_ptr 4395 char* buf_ptr 4396 4397 axis = normalize_axis_index(axis, np.ndim(x)) 4398 4399 if type(x) is np.ndarray and x.ndim == 1 and x.size: 4400 # Fast, statically typed path: shuffle the underlying buffer. 4401 # Only for non-empty, 1d objects of class ndarray (subclasses such 4402 # as MaskedArrays may not support this approach). 4403 x_ptr = <char*><size_t>np.PyArray_DATA(x) 4404 stride = x.strides[0] 4405 itemsize = x.dtype.itemsize 4406 # As the array x could contain python objects we use a buffer 4407 # of bytes for the swaps to avoid leaving one of the objects 4408 # within the buffer and erroneously decrementing it's refcount 4409 # when the function exits. 4410 buf = np.empty(itemsize, dtype=np.int8) # GC'd at function exit 4411 buf_ptr = <char*><size_t>np.PyArray_DATA(buf) 4412 if x.dtype.hasobject: 4413 with self.lock: 4414 _shuffle_raw_wrap(&self._bitgen, n, 1, itemsize, stride, 4415 x_ptr, buf_ptr) 4416 else: 4417 # Same as above, but the GIL is released. 4418 with self.lock, nogil: 4419 _shuffle_raw_wrap(&self._bitgen, n, 1, itemsize, stride, 4420 x_ptr, buf_ptr) 4421 elif isinstance(x, np.ndarray): 4422 if x.size == 0: 4423 # shuffling is a no-op 4424 return 4425 4426 x = np.swapaxes(x, 0, axis) 4427 buf = np.empty_like(x[0, ...]) 4428 with self.lock: 4429 for i in reversed(range(1, len(x))): 4430 j = random_interval(&self._bitgen, i) 4431 if i == j: 4432 # i == j is not needed and memcpy is undefined. 4433 continue 4434 buf[...] = x[j] 4435 x[j] = x[i] 4436 x[i] = buf 4437 else: 4438 # Untyped path. 4439 if not isinstance(x, Sequence): 4440 # See gh-18206. We may decide to deprecate here in the future. 4441 warnings.warn( 4442 "`x` isn't a recognized object; `shuffle` is not guaranteed " 4443 "to behave correctly. E.g., non-numpy array/tensor objects " 4444 "with view semantics may contain duplicates after shuffling.", 4445 UserWarning, stacklevel=2 4446 ) 4447 4448 if axis != 0: 4449 raise NotImplementedError("Axis argument is only supported " 4450 "on ndarray objects") 4451 with self.lock: 4452 for i in reversed(range(1, n)): 4453 j = random_interval(&self._bitgen, i) 4454 x[i], x[j] = x[j], x[i] 4455 4456 def permutation(self, object x, axis=0): 4457 """ 4458 permutation(x, axis=0) 4459 4460 Randomly permute a sequence, or return a permuted range. 4461 4462 Parameters 4463 ---------- 4464 x : int or array_like 4465 If `x` is an integer, randomly permute ``np.arange(x)``. 4466 If `x` is an array, make a copy and shuffle the elements 4467 randomly. 4468 axis : int, optional 4469 The axis which `x` is shuffled along. Default is 0. 4470 4471 Returns 4472 ------- 4473 out : ndarray 4474 Permuted sequence or array range. 4475 4476 Examples 4477 -------- 4478 >>> rng = np.random.default_rng() 4479 >>> rng.permutation(10) 4480 array([1, 7, 4, 3, 0, 9, 2, 5, 8, 6]) # random 4481 4482 >>> rng.permutation([1, 4, 9, 12, 15]) 4483 array([15, 1, 9, 4, 12]) # random 4484 4485 >>> arr = np.arange(9).reshape((3, 3)) 4486 >>> rng.permutation(arr) 4487 array([[6, 7, 8], # random 4488 [0, 1, 2], 4489 [3, 4, 5]]) 4490 4491 >>> rng.permutation("abc") 4492 Traceback (most recent call last): 4493 ... 4494 numpy.AxisError: axis 0 is out of bounds for array of dimension 0 4495 4496 >>> arr = np.arange(9).reshape((3, 3)) 4497 >>> rng.permutation(arr, axis=1) 4498 array([[0, 2, 1], # random 4499 [3, 5, 4], 4500 [6, 8, 7]]) 4501 4502 """ 4503 if isinstance(x, (int, np.integer)): 4504 arr = np.arange(x) 4505 self.shuffle(arr) 4506 return arr 4507 4508 arr = np.asarray(x) 4509 4510 axis = normalize_axis_index(axis, arr.ndim) 4511 4512 # shuffle has fast-path for 1-d 4513 if arr.ndim == 1: 4514 # Return a copy if same memory 4515 if np.may_share_memory(arr, x): 4516 arr = np.array(arr) 4517 self.shuffle(arr) 4518 return arr 4519 4520 # Shuffle index array, dtype to ensure fast path 4521 idx = np.arange(arr.shape[axis], dtype=np.intp) 4522 self.shuffle(idx) 4523 slices = [slice(None)]*arr.ndim 4524 slices[axis] = idx 4525 return arr[tuple(slices)] 4526 4527 4528def default_rng(seed=None): 4529 """Construct a new Generator with the default BitGenerator (PCG64). 4530 4531 Parameters 4532 ---------- 4533 seed : {None, int, array_like[ints], SeedSequence, BitGenerator, Generator}, optional 4534 A seed to initialize the `BitGenerator`. If None, then fresh, 4535 unpredictable entropy will be pulled from the OS. If an ``int`` or 4536 ``array_like[ints]`` is passed, then it will be passed to 4537 `SeedSequence` to derive the initial `BitGenerator` state. One may also 4538 pass in a `SeedSequence` instance. 4539 Additionally, when passed a `BitGenerator`, it will be wrapped by 4540 `Generator`. If passed a `Generator`, it will be returned unaltered. 4541 4542 Returns 4543 ------- 4544 Generator 4545 The initialized generator object. 4546 4547 Notes 4548 ----- 4549 If ``seed`` is not a `BitGenerator` or a `Generator`, a new `BitGenerator` 4550 is instantiated. This function does not manage a default global instance. 4551 4552 Examples 4553 -------- 4554 ``default_rng`` is the reccomended constructor for the random number class 4555 ``Generator``. Here are several ways we can construct a random 4556 number generator using ``default_rng`` and the ``Generator`` class. 4557 4558 Here we use ``default_rng`` to generate a random float: 4559 4560 >>> import numpy as np 4561 >>> rng = np.random.default_rng(12345) 4562 >>> print(rng) 4563 Generator(PCG64) 4564 >>> rfloat = rng.random() 4565 >>> rfloat 4566 0.22733602246716966 4567 >>> type(rfloat) 4568 <class 'float'> 4569 4570 Here we use ``default_rng`` to generate 3 random integers between 0 4571 (inclusive) and 10 (exclusive): 4572 4573 >>> import numpy as np 4574 >>> rng = np.random.default_rng(12345) 4575 >>> rints = rng.integers(low=0, high=10, size=3) 4576 >>> rints 4577 array([6, 2, 7]) 4578 >>> type(rints[0]) 4579 <class 'numpy.int64'> 4580 4581 Here we specify a seed so that we have reproducible results: 4582 4583 >>> import numpy as np 4584 >>> rng = np.random.default_rng(seed=42) 4585 >>> print(rng) 4586 Generator(PCG64) 4587 >>> arr1 = rng.random((3, 3)) 4588 >>> arr1 4589 array([[0.77395605, 0.43887844, 0.85859792], 4590 [0.69736803, 0.09417735, 0.97562235], 4591 [0.7611397 , 0.78606431, 0.12811363]]) 4592 4593 If we exit and restart our Python interpreter, we'll see that we 4594 generate the same random numbers again: 4595 4596 >>> import numpy as np 4597 >>> rng = np.random.default_rng(seed=42) 4598 >>> arr2 = rng.random((3, 3)) 4599 >>> arr2 4600 array([[0.77395605, 0.43887844, 0.85859792], 4601 [0.69736803, 0.09417735, 0.97562235], 4602 [0.7611397 , 0.78606431, 0.12811363]]) 4603 4604 """ 4605 if _check_bit_generator(seed): 4606 # We were passed a BitGenerator, so just wrap it up. 4607 return Generator(seed) 4608 elif isinstance(seed, Generator): 4609 # Pass through a Generator. 4610 return seed 4611 # Otherwise we need to instantiate a new BitGenerator and Generator as 4612 # normal. 4613 return Generator(PCG64(seed)) 4614