1# 2# Author: Joris Vankerschaver 2013 3# 4import math 5import numpy as np 6from numpy import asarray_chkfinite, asarray 7import scipy.linalg 8from scipy._lib import doccer 9from scipy.special import gammaln, psi, multigammaln, xlogy, entr, betaln 10from scipy._lib._util import check_random_state 11from scipy.linalg.blas import drot 12from scipy.linalg.misc import LinAlgError 13from scipy.linalg.lapack import get_lapack_funcs 14 15from ._discrete_distns import binom 16from . import mvn 17 18__all__ = ['multivariate_normal', 19 'matrix_normal', 20 'dirichlet', 21 'wishart', 22 'invwishart', 23 'multinomial', 24 'special_ortho_group', 25 'ortho_group', 26 'random_correlation', 27 'unitary_group', 28 'multivariate_t', 29 'multivariate_hypergeom'] 30 31_LOG_2PI = np.log(2 * np.pi) 32_LOG_2 = np.log(2) 33_LOG_PI = np.log(np.pi) 34 35 36_doc_random_state = """\ 37random_state : {None, int, `numpy.random.Generator`, 38 `numpy.random.RandomState`}, optional 39 40 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 41 singleton is used. 42 If `seed` is an int, a new ``RandomState`` instance is used, 43 seeded with `seed`. 44 If `seed` is already a ``Generator`` or ``RandomState`` instance then 45 that instance is used. 46""" 47 48 49def _squeeze_output(out): 50 """ 51 Remove single-dimensional entries from array and convert to scalar, 52 if necessary. 53 """ 54 out = out.squeeze() 55 if out.ndim == 0: 56 out = out[()] 57 return out 58 59 60def _eigvalsh_to_eps(spectrum, cond=None, rcond=None): 61 """Determine which eigenvalues are "small" given the spectrum. 62 63 This is for compatibility across various linear algebra functions 64 that should agree about whether or not a Hermitian matrix is numerically 65 singular and what is its numerical matrix rank. 66 This is designed to be compatible with scipy.linalg.pinvh. 67 68 Parameters 69 ---------- 70 spectrum : 1d ndarray 71 Array of eigenvalues of a Hermitian matrix. 72 cond, rcond : float, optional 73 Cutoff for small eigenvalues. 74 Singular values smaller than rcond * largest_eigenvalue are 75 considered zero. 76 If None or -1, suitable machine precision is used. 77 78 Returns 79 ------- 80 eps : float 81 Magnitude cutoff for numerical negligibility. 82 83 """ 84 if rcond is not None: 85 cond = rcond 86 if cond in [None, -1]: 87 t = spectrum.dtype.char.lower() 88 factor = {'f': 1E3, 'd': 1E6} 89 cond = factor[t] * np.finfo(t).eps 90 eps = cond * np.max(abs(spectrum)) 91 return eps 92 93 94def _pinv_1d(v, eps=1e-5): 95 """A helper function for computing the pseudoinverse. 96 97 Parameters 98 ---------- 99 v : iterable of numbers 100 This may be thought of as a vector of eigenvalues or singular values. 101 eps : float 102 Values with magnitude no greater than eps are considered negligible. 103 104 Returns 105 ------- 106 v_pinv : 1d float ndarray 107 A vector of pseudo-inverted numbers. 108 109 """ 110 return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float) 111 112 113class _PSD: 114 """ 115 Compute coordinated functions of a symmetric positive semidefinite matrix. 116 117 This class addresses two issues. Firstly it allows the pseudoinverse, 118 the logarithm of the pseudo-determinant, and the rank of the matrix 119 to be computed using one call to eigh instead of three. 120 Secondly it allows these functions to be computed in a way 121 that gives mutually compatible results. 122 All of the functions are computed with a common understanding as to 123 which of the eigenvalues are to be considered negligibly small. 124 The functions are designed to coordinate with scipy.linalg.pinvh() 125 but not necessarily with np.linalg.det() or with np.linalg.matrix_rank(). 126 127 Parameters 128 ---------- 129 M : array_like 130 Symmetric positive semidefinite matrix (2-D). 131 cond, rcond : float, optional 132 Cutoff for small eigenvalues. 133 Singular values smaller than rcond * largest_eigenvalue are 134 considered zero. 135 If None or -1, suitable machine precision is used. 136 lower : bool, optional 137 Whether the pertinent array data is taken from the lower 138 or upper triangle of M. (Default: lower) 139 check_finite : bool, optional 140 Whether to check that the input matrices contain only finite 141 numbers. Disabling may give a performance gain, but may result 142 in problems (crashes, non-termination) if the inputs do contain 143 infinities or NaNs. 144 allow_singular : bool, optional 145 Whether to allow a singular matrix. (Default: True) 146 147 Notes 148 ----- 149 The arguments are similar to those of scipy.linalg.pinvh(). 150 151 """ 152 153 def __init__(self, M, cond=None, rcond=None, lower=True, 154 check_finite=True, allow_singular=True): 155 # Compute the symmetric eigendecomposition. 156 # Note that eigh takes care of array conversion, chkfinite, 157 # and assertion that the matrix is square. 158 s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite) 159 160 eps = _eigvalsh_to_eps(s, cond, rcond) 161 if np.min(s) < -eps: 162 raise ValueError('the input matrix must be positive semidefinite') 163 d = s[s > eps] 164 if len(d) < len(s) and not allow_singular: 165 raise np.linalg.LinAlgError('singular matrix') 166 s_pinv = _pinv_1d(s, eps) 167 U = np.multiply(u, np.sqrt(s_pinv)) 168 169 # Initialize the eagerly precomputed attributes. 170 self.rank = len(d) 171 self.U = U 172 self.log_pdet = np.sum(np.log(d)) 173 174 # Initialize an attribute to be lazily computed. 175 self._pinv = None 176 177 @property 178 def pinv(self): 179 if self._pinv is None: 180 self._pinv = np.dot(self.U, self.U.T) 181 return self._pinv 182 183 184class multi_rv_generic: 185 """ 186 Class which encapsulates common functionality between all multivariate 187 distributions. 188 """ 189 def __init__(self, seed=None): 190 super().__init__() 191 self._random_state = check_random_state(seed) 192 193 @property 194 def random_state(self): 195 """ Get or set the Generator object for generating random variates. 196 197 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 198 singleton is used. 199 If `seed` is an int, a new ``RandomState`` instance is used, 200 seeded with `seed`. 201 If `seed` is already a ``Generator`` or ``RandomState`` instance then 202 that instance is used. 203 204 """ 205 return self._random_state 206 207 @random_state.setter 208 def random_state(self, seed): 209 self._random_state = check_random_state(seed) 210 211 def _get_random_state(self, random_state): 212 if random_state is not None: 213 return check_random_state(random_state) 214 else: 215 return self._random_state 216 217 218class multi_rv_frozen: 219 """ 220 Class which encapsulates common functionality between all frozen 221 multivariate distributions. 222 """ 223 @property 224 def random_state(self): 225 return self._dist._random_state 226 227 @random_state.setter 228 def random_state(self, seed): 229 self._dist._random_state = check_random_state(seed) 230 231 232_mvn_doc_default_callparams = """\ 233mean : array_like, optional 234 Mean of the distribution (default zero) 235cov : array_like, optional 236 Covariance matrix of the distribution (default one) 237allow_singular : bool, optional 238 Whether to allow a singular covariance matrix. (Default: False) 239""" 240 241_mvn_doc_callparams_note = \ 242 """Setting the parameter `mean` to `None` is equivalent to having `mean` 243 be the zero-vector. The parameter `cov` can be a scalar, in which case 244 the covariance matrix is the identity times that value, a vector of 245 diagonal entries for the covariance matrix, or a two-dimensional 246 array_like. 247 """ 248 249_mvn_doc_frozen_callparams = "" 250 251_mvn_doc_frozen_callparams_note = \ 252 """See class definition for a detailed description of parameters.""" 253 254mvn_docdict_params = { 255 '_mvn_doc_default_callparams': _mvn_doc_default_callparams, 256 '_mvn_doc_callparams_note': _mvn_doc_callparams_note, 257 '_doc_random_state': _doc_random_state 258} 259 260mvn_docdict_noparams = { 261 '_mvn_doc_default_callparams': _mvn_doc_frozen_callparams, 262 '_mvn_doc_callparams_note': _mvn_doc_frozen_callparams_note, 263 '_doc_random_state': _doc_random_state 264} 265 266 267class multivariate_normal_gen(multi_rv_generic): 268 r"""A multivariate normal random variable. 269 270 The `mean` keyword specifies the mean. The `cov` keyword specifies the 271 covariance matrix. 272 273 Methods 274 ------- 275 ``pdf(x, mean=None, cov=1, allow_singular=False)`` 276 Probability density function. 277 ``logpdf(x, mean=None, cov=1, allow_singular=False)`` 278 Log of the probability density function. 279 ``cdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)`` 280 Cumulative distribution function. 281 ``logcdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)`` 282 Log of the cumulative distribution function. 283 ``rvs(mean=None, cov=1, size=1, random_state=None)`` 284 Draw random samples from a multivariate normal distribution. 285 ``entropy()`` 286 Compute the differential entropy of the multivariate normal. 287 288 Parameters 289 ---------- 290 x : array_like 291 Quantiles, with the last axis of `x` denoting the components. 292 %(_mvn_doc_default_callparams)s 293 %(_doc_random_state)s 294 295 Alternatively, the object may be called (as a function) to fix the mean 296 and covariance parameters, returning a "frozen" multivariate normal 297 random variable: 298 299 rv = multivariate_normal(mean=None, cov=1, allow_singular=False) 300 - Frozen object with the same methods but holding the given 301 mean and covariance fixed. 302 303 Notes 304 ----- 305 %(_mvn_doc_callparams_note)s 306 307 The covariance matrix `cov` must be a (symmetric) positive 308 semi-definite matrix. The determinant and inverse of `cov` are computed 309 as the pseudo-determinant and pseudo-inverse, respectively, so 310 that `cov` does not need to have full rank. 311 312 The probability density function for `multivariate_normal` is 313 314 .. math:: 315 316 f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}} 317 \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right), 318 319 where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix, 320 and :math:`k` is the dimension of the space where :math:`x` takes values. 321 322 .. versionadded:: 0.14.0 323 324 Examples 325 -------- 326 >>> import matplotlib.pyplot as plt 327 >>> from scipy.stats import multivariate_normal 328 329 >>> x = np.linspace(0, 5, 10, endpoint=False) 330 >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y 331 array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129, 332 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349]) 333 >>> fig1 = plt.figure() 334 >>> ax = fig1.add_subplot(111) 335 >>> ax.plot(x, y) 336 337 The input quantiles can be any shape of array, as long as the last 338 axis labels the components. This allows us for instance to 339 display the frozen pdf for a non-isotropic random variable in 2D as 340 follows: 341 342 >>> x, y = np.mgrid[-1:1:.01, -1:1:.01] 343 >>> pos = np.dstack((x, y)) 344 >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]]) 345 >>> fig2 = plt.figure() 346 >>> ax2 = fig2.add_subplot(111) 347 >>> ax2.contourf(x, y, rv.pdf(pos)) 348 349 """ 350 351 def __init__(self, seed=None): 352 super().__init__(seed) 353 self.__doc__ = doccer.docformat(self.__doc__, mvn_docdict_params) 354 355 def __call__(self, mean=None, cov=1, allow_singular=False, seed=None): 356 """Create a frozen multivariate normal distribution. 357 358 See `multivariate_normal_frozen` for more information. 359 """ 360 return multivariate_normal_frozen(mean, cov, 361 allow_singular=allow_singular, 362 seed=seed) 363 364 def _process_parameters(self, dim, mean, cov): 365 """ 366 Infer dimensionality from mean or covariance matrix, ensure that 367 mean and covariance are full vector resp. matrix. 368 """ 369 # Try to infer dimensionality 370 if dim is None: 371 if mean is None: 372 if cov is None: 373 dim = 1 374 else: 375 cov = np.asarray(cov, dtype=float) 376 if cov.ndim < 2: 377 dim = 1 378 else: 379 dim = cov.shape[0] 380 else: 381 mean = np.asarray(mean, dtype=float) 382 dim = mean.size 383 else: 384 if not np.isscalar(dim): 385 raise ValueError("Dimension of random variable must be " 386 "a scalar.") 387 388 # Check input sizes and return full arrays for mean and cov if 389 # necessary 390 if mean is None: 391 mean = np.zeros(dim) 392 mean = np.asarray(mean, dtype=float) 393 394 if cov is None: 395 cov = 1.0 396 cov = np.asarray(cov, dtype=float) 397 398 if dim == 1: 399 mean.shape = (1,) 400 cov.shape = (1, 1) 401 402 if mean.ndim != 1 or mean.shape[0] != dim: 403 raise ValueError("Array 'mean' must be a vector of length %d." % 404 dim) 405 if cov.ndim == 0: 406 cov = cov * np.eye(dim) 407 elif cov.ndim == 1: 408 cov = np.diag(cov) 409 elif cov.ndim == 2 and cov.shape != (dim, dim): 410 rows, cols = cov.shape 411 if rows != cols: 412 msg = ("Array 'cov' must be square if it is two dimensional," 413 " but cov.shape = %s." % str(cov.shape)) 414 else: 415 msg = ("Dimension mismatch: array 'cov' is of shape %s," 416 " but 'mean' is a vector of length %d.") 417 msg = msg % (str(cov.shape), len(mean)) 418 raise ValueError(msg) 419 elif cov.ndim > 2: 420 raise ValueError("Array 'cov' must be at most two-dimensional," 421 " but cov.ndim = %d" % cov.ndim) 422 423 return dim, mean, cov 424 425 def _process_quantiles(self, x, dim): 426 """ 427 Adjust quantiles array so that last axis labels the components of 428 each data point. 429 """ 430 x = np.asarray(x, dtype=float) 431 432 if x.ndim == 0: 433 x = x[np.newaxis] 434 elif x.ndim == 1: 435 if dim == 1: 436 x = x[:, np.newaxis] 437 else: 438 x = x[np.newaxis, :] 439 440 return x 441 442 def _logpdf(self, x, mean, prec_U, log_det_cov, rank): 443 """Log of the multivariate normal probability density function. 444 445 Parameters 446 ---------- 447 x : ndarray 448 Points at which to evaluate the log of the probability 449 density function 450 mean : ndarray 451 Mean of the distribution 452 prec_U : ndarray 453 A decomposition such that np.dot(prec_U, prec_U.T) 454 is the precision matrix, i.e. inverse of the covariance matrix. 455 log_det_cov : float 456 Logarithm of the determinant of the covariance matrix 457 rank : int 458 Rank of the covariance matrix. 459 460 Notes 461 ----- 462 As this function does no argument checking, it should not be 463 called directly; use 'logpdf' instead. 464 465 """ 466 dev = x - mean 467 maha = np.sum(np.square(np.dot(dev, prec_U)), axis=-1) 468 return -0.5 * (rank * _LOG_2PI + log_det_cov + maha) 469 470 def logpdf(self, x, mean=None, cov=1, allow_singular=False): 471 """Log of the multivariate normal probability density function. 472 473 Parameters 474 ---------- 475 x : array_like 476 Quantiles, with the last axis of `x` denoting the components. 477 %(_mvn_doc_default_callparams)s 478 479 Returns 480 ------- 481 pdf : ndarray or scalar 482 Log of the probability density function evaluated at `x` 483 484 Notes 485 ----- 486 %(_mvn_doc_callparams_note)s 487 488 """ 489 dim, mean, cov = self._process_parameters(None, mean, cov) 490 x = self._process_quantiles(x, dim) 491 psd = _PSD(cov, allow_singular=allow_singular) 492 out = self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank) 493 return _squeeze_output(out) 494 495 def pdf(self, x, mean=None, cov=1, allow_singular=False): 496 """Multivariate normal probability density function. 497 498 Parameters 499 ---------- 500 x : array_like 501 Quantiles, with the last axis of `x` denoting the components. 502 %(_mvn_doc_default_callparams)s 503 504 Returns 505 ------- 506 pdf : ndarray or scalar 507 Probability density function evaluated at `x` 508 509 Notes 510 ----- 511 %(_mvn_doc_callparams_note)s 512 513 """ 514 dim, mean, cov = self._process_parameters(None, mean, cov) 515 x = self._process_quantiles(x, dim) 516 psd = _PSD(cov, allow_singular=allow_singular) 517 out = np.exp(self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank)) 518 return _squeeze_output(out) 519 520 def _cdf(self, x, mean, cov, maxpts, abseps, releps): 521 """Log of the multivariate normal cumulative distribution function. 522 523 Parameters 524 ---------- 525 x : ndarray 526 Points at which to evaluate the cumulative distribution function. 527 mean : ndarray 528 Mean of the distribution 529 cov : array_like 530 Covariance matrix of the distribution 531 maxpts : integer 532 The maximum number of points to use for integration 533 abseps : float 534 Absolute error tolerance 535 releps : float 536 Relative error tolerance 537 538 Notes 539 ----- 540 As this function does no argument checking, it should not be 541 called directly; use 'cdf' instead. 542 543 .. versionadded:: 1.0.0 544 545 """ 546 lower = np.full(mean.shape, -np.inf) 547 # mvnun expects 1-d arguments, so process points sequentially 548 func1d = lambda x_slice: mvn.mvnun(lower, x_slice, mean, cov, 549 maxpts, abseps, releps)[0] 550 out = np.apply_along_axis(func1d, -1, x) 551 return _squeeze_output(out) 552 553 def logcdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None, 554 abseps=1e-5, releps=1e-5): 555 """Log of the multivariate normal cumulative distribution function. 556 557 Parameters 558 ---------- 559 x : array_like 560 Quantiles, with the last axis of `x` denoting the components. 561 %(_mvn_doc_default_callparams)s 562 maxpts : integer, optional 563 The maximum number of points to use for integration 564 (default `1000000*dim`) 565 abseps : float, optional 566 Absolute error tolerance (default 1e-5) 567 releps : float, optional 568 Relative error tolerance (default 1e-5) 569 570 Returns 571 ------- 572 cdf : ndarray or scalar 573 Log of the cumulative distribution function evaluated at `x` 574 575 Notes 576 ----- 577 %(_mvn_doc_callparams_note)s 578 579 .. versionadded:: 1.0.0 580 581 """ 582 dim, mean, cov = self._process_parameters(None, mean, cov) 583 x = self._process_quantiles(x, dim) 584 # Use _PSD to check covariance matrix 585 _PSD(cov, allow_singular=allow_singular) 586 if not maxpts: 587 maxpts = 1000000 * dim 588 out = np.log(self._cdf(x, mean, cov, maxpts, abseps, releps)) 589 return out 590 591 def cdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None, 592 abseps=1e-5, releps=1e-5): 593 """Multivariate normal cumulative distribution function. 594 595 Parameters 596 ---------- 597 x : array_like 598 Quantiles, with the last axis of `x` denoting the components. 599 %(_mvn_doc_default_callparams)s 600 maxpts : integer, optional 601 The maximum number of points to use for integration 602 (default `1000000*dim`) 603 abseps : float, optional 604 Absolute error tolerance (default 1e-5) 605 releps : float, optional 606 Relative error tolerance (default 1e-5) 607 608 Returns 609 ------- 610 cdf : ndarray or scalar 611 Cumulative distribution function evaluated at `x` 612 613 Notes 614 ----- 615 %(_mvn_doc_callparams_note)s 616 617 .. versionadded:: 1.0.0 618 619 """ 620 dim, mean, cov = self._process_parameters(None, mean, cov) 621 x = self._process_quantiles(x, dim) 622 # Use _PSD to check covariance matrix 623 _PSD(cov, allow_singular=allow_singular) 624 if not maxpts: 625 maxpts = 1000000 * dim 626 out = self._cdf(x, mean, cov, maxpts, abseps, releps) 627 return out 628 629 def rvs(self, mean=None, cov=1, size=1, random_state=None): 630 """Draw random samples from a multivariate normal distribution. 631 632 Parameters 633 ---------- 634 %(_mvn_doc_default_callparams)s 635 size : integer, optional 636 Number of samples to draw (default 1). 637 %(_doc_random_state)s 638 639 Returns 640 ------- 641 rvs : ndarray or scalar 642 Random variates of size (`size`, `N`), where `N` is the 643 dimension of the random variable. 644 645 Notes 646 ----- 647 %(_mvn_doc_callparams_note)s 648 649 """ 650 dim, mean, cov = self._process_parameters(None, mean, cov) 651 652 random_state = self._get_random_state(random_state) 653 out = random_state.multivariate_normal(mean, cov, size) 654 return _squeeze_output(out) 655 656 def entropy(self, mean=None, cov=1): 657 """Compute the differential entropy of the multivariate normal. 658 659 Parameters 660 ---------- 661 %(_mvn_doc_default_callparams)s 662 663 Returns 664 ------- 665 h : scalar 666 Entropy of the multivariate normal distribution 667 668 Notes 669 ----- 670 %(_mvn_doc_callparams_note)s 671 672 """ 673 dim, mean, cov = self._process_parameters(None, mean, cov) 674 _, logdet = np.linalg.slogdet(2 * np.pi * np.e * cov) 675 return 0.5 * logdet 676 677 678multivariate_normal = multivariate_normal_gen() 679 680 681class multivariate_normal_frozen(multi_rv_frozen): 682 def __init__(self, mean=None, cov=1, allow_singular=False, seed=None, 683 maxpts=None, abseps=1e-5, releps=1e-5): 684 """Create a frozen multivariate normal distribution. 685 686 Parameters 687 ---------- 688 mean : array_like, optional 689 Mean of the distribution (default zero) 690 cov : array_like, optional 691 Covariance matrix of the distribution (default one) 692 allow_singular : bool, optional 693 If this flag is True then tolerate a singular 694 covariance matrix (default False). 695 seed : {None, int, `numpy.random.Generator`, 696 `numpy.random.RandomState`}, optional 697 698 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 699 singleton is used. 700 If `seed` is an int, a new ``RandomState`` instance is used, 701 seeded with `seed`. 702 If `seed` is already a ``Generator`` or ``RandomState`` instance 703 then that instance is used. 704 maxpts : integer, optional 705 The maximum number of points to use for integration of the 706 cumulative distribution function (default `1000000*dim`) 707 abseps : float, optional 708 Absolute error tolerance for the cumulative distribution function 709 (default 1e-5) 710 releps : float, optional 711 Relative error tolerance for the cumulative distribution function 712 (default 1e-5) 713 714 Examples 715 -------- 716 When called with the default parameters, this will create a 1D random 717 variable with mean 0 and covariance 1: 718 719 >>> from scipy.stats import multivariate_normal 720 >>> r = multivariate_normal() 721 >>> r.mean 722 array([ 0.]) 723 >>> r.cov 724 array([[1.]]) 725 726 """ 727 self._dist = multivariate_normal_gen(seed) 728 self.dim, self.mean, self.cov = self._dist._process_parameters( 729 None, mean, cov) 730 self.cov_info = _PSD(self.cov, allow_singular=allow_singular) 731 if not maxpts: 732 maxpts = 1000000 * self.dim 733 self.maxpts = maxpts 734 self.abseps = abseps 735 self.releps = releps 736 737 def logpdf(self, x): 738 x = self._dist._process_quantiles(x, self.dim) 739 out = self._dist._logpdf(x, self.mean, self.cov_info.U, 740 self.cov_info.log_pdet, self.cov_info.rank) 741 return _squeeze_output(out) 742 743 def pdf(self, x): 744 return np.exp(self.logpdf(x)) 745 746 def logcdf(self, x): 747 return np.log(self.cdf(x)) 748 749 def cdf(self, x): 750 x = self._dist._process_quantiles(x, self.dim) 751 out = self._dist._cdf(x, self.mean, self.cov, self.maxpts, self.abseps, 752 self.releps) 753 return _squeeze_output(out) 754 755 def rvs(self, size=1, random_state=None): 756 return self._dist.rvs(self.mean, self.cov, size, random_state) 757 758 def entropy(self): 759 """Computes the differential entropy of the multivariate normal. 760 761 Returns 762 ------- 763 h : scalar 764 Entropy of the multivariate normal distribution 765 766 """ 767 log_pdet = self.cov_info.log_pdet 768 rank = self.cov_info.rank 769 return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet) 770 771 772# Set frozen generator docstrings from corresponding docstrings in 773# multivariate_normal_gen and fill in default strings in class docstrings 774for name in ['logpdf', 'pdf', 'logcdf', 'cdf', 'rvs']: 775 method = multivariate_normal_gen.__dict__[name] 776 method_frozen = multivariate_normal_frozen.__dict__[name] 777 method_frozen.__doc__ = doccer.docformat(method.__doc__, 778 mvn_docdict_noparams) 779 method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params) 780 781_matnorm_doc_default_callparams = """\ 782mean : array_like, optional 783 Mean of the distribution (default: `None`) 784rowcov : array_like, optional 785 Among-row covariance matrix of the distribution (default: `1`) 786colcov : array_like, optional 787 Among-column covariance matrix of the distribution (default: `1`) 788""" 789 790_matnorm_doc_callparams_note = \ 791 """If `mean` is set to `None` then a matrix of zeros is used for the mean. 792 The dimensions of this matrix are inferred from the shape of `rowcov` and 793 `colcov`, if these are provided, or set to `1` if ambiguous. 794 795 `rowcov` and `colcov` can be two-dimensional array_likes specifying the 796 covariance matrices directly. Alternatively, a one-dimensional array will 797 be be interpreted as the entries of a diagonal matrix, and a scalar or 798 zero-dimensional array will be interpreted as this value times the 799 identity matrix. 800 """ 801 802_matnorm_doc_frozen_callparams = "" 803 804_matnorm_doc_frozen_callparams_note = \ 805 """See class definition for a detailed description of parameters.""" 806 807matnorm_docdict_params = { 808 '_matnorm_doc_default_callparams': _matnorm_doc_default_callparams, 809 '_matnorm_doc_callparams_note': _matnorm_doc_callparams_note, 810 '_doc_random_state': _doc_random_state 811} 812 813matnorm_docdict_noparams = { 814 '_matnorm_doc_default_callparams': _matnorm_doc_frozen_callparams, 815 '_matnorm_doc_callparams_note': _matnorm_doc_frozen_callparams_note, 816 '_doc_random_state': _doc_random_state 817} 818 819 820class matrix_normal_gen(multi_rv_generic): 821 r"""A matrix normal random variable. 822 823 The `mean` keyword specifies the mean. The `rowcov` keyword specifies the 824 among-row covariance matrix. The 'colcov' keyword specifies the 825 among-column covariance matrix. 826 827 Methods 828 ------- 829 ``pdf(X, mean=None, rowcov=1, colcov=1)`` 830 Probability density function. 831 ``logpdf(X, mean=None, rowcov=1, colcov=1)`` 832 Log of the probability density function. 833 ``rvs(mean=None, rowcov=1, colcov=1, size=1, random_state=None)`` 834 Draw random samples. 835 836 Parameters 837 ---------- 838 X : array_like 839 Quantiles, with the last two axes of `X` denoting the components. 840 %(_matnorm_doc_default_callparams)s 841 %(_doc_random_state)s 842 843 Alternatively, the object may be called (as a function) to fix the mean 844 and covariance parameters, returning a "frozen" matrix normal 845 random variable: 846 847 rv = matrix_normal(mean=None, rowcov=1, colcov=1) 848 - Frozen object with the same methods but holding the given 849 mean and covariance fixed. 850 851 Notes 852 ----- 853 %(_matnorm_doc_callparams_note)s 854 855 The covariance matrices specified by `rowcov` and `colcov` must be 856 (symmetric) positive definite. If the samples in `X` are 857 :math:`m \times n`, then `rowcov` must be :math:`m \times m` and 858 `colcov` must be :math:`n \times n`. `mean` must be the same shape as `X`. 859 860 The probability density function for `matrix_normal` is 861 862 .. math:: 863 864 f(X) = (2 \pi)^{-\frac{mn}{2}}|U|^{-\frac{n}{2}} |V|^{-\frac{m}{2}} 865 \exp\left( -\frac{1}{2} \mathrm{Tr}\left[ U^{-1} (X-M) V^{-1} 866 (X-M)^T \right] \right), 867 868 where :math:`M` is the mean, :math:`U` the among-row covariance matrix, 869 :math:`V` the among-column covariance matrix. 870 871 The `allow_singular` behaviour of the `multivariate_normal` 872 distribution is not currently supported. Covariance matrices must be 873 full rank. 874 875 The `matrix_normal` distribution is closely related to the 876 `multivariate_normal` distribution. Specifically, :math:`\mathrm{Vec}(X)` 877 (the vector formed by concatenating the columns of :math:`X`) has a 878 multivariate normal distribution with mean :math:`\mathrm{Vec}(M)` 879 and covariance :math:`V \otimes U` (where :math:`\otimes` is the Kronecker 880 product). Sampling and pdf evaluation are 881 :math:`\mathcal{O}(m^3 + n^3 + m^2 n + m n^2)` for the matrix normal, but 882 :math:`\mathcal{O}(m^3 n^3)` for the equivalent multivariate normal, 883 making this equivalent form algorithmically inefficient. 884 885 .. versionadded:: 0.17.0 886 887 Examples 888 -------- 889 890 >>> from scipy.stats import matrix_normal 891 892 >>> M = np.arange(6).reshape(3,2); M 893 array([[0, 1], 894 [2, 3], 895 [4, 5]]) 896 >>> U = np.diag([1,2,3]); U 897 array([[1, 0, 0], 898 [0, 2, 0], 899 [0, 0, 3]]) 900 >>> V = 0.3*np.identity(2); V 901 array([[ 0.3, 0. ], 902 [ 0. , 0.3]]) 903 >>> X = M + 0.1; X 904 array([[ 0.1, 1.1], 905 [ 2.1, 3.1], 906 [ 4.1, 5.1]]) 907 >>> matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V) 908 0.023410202050005054 909 910 >>> # Equivalent multivariate normal 911 >>> from scipy.stats import multivariate_normal 912 >>> vectorised_X = X.T.flatten() 913 >>> equiv_mean = M.T.flatten() 914 >>> equiv_cov = np.kron(V,U) 915 >>> multivariate_normal.pdf(vectorised_X, mean=equiv_mean, cov=equiv_cov) 916 0.023410202050005054 917 918 """ 919 920 def __init__(self, seed=None): 921 super().__init__(seed) 922 self.__doc__ = doccer.docformat(self.__doc__, matnorm_docdict_params) 923 924 def __call__(self, mean=None, rowcov=1, colcov=1, seed=None): 925 """Create a frozen matrix normal distribution. 926 927 See `matrix_normal_frozen` for more information. 928 929 """ 930 return matrix_normal_frozen(mean, rowcov, colcov, seed=seed) 931 932 def _process_parameters(self, mean, rowcov, colcov): 933 """ 934 Infer dimensionality from mean or covariance matrices. Handle 935 defaults. Ensure compatible dimensions. 936 """ 937 938 # Process mean 939 if mean is not None: 940 mean = np.asarray(mean, dtype=float) 941 meanshape = mean.shape 942 if len(meanshape) != 2: 943 raise ValueError("Array `mean` must be two dimensional.") 944 if np.any(meanshape == 0): 945 raise ValueError("Array `mean` has invalid shape.") 946 947 # Process among-row covariance 948 rowcov = np.asarray(rowcov, dtype=float) 949 if rowcov.ndim == 0: 950 if mean is not None: 951 rowcov = rowcov * np.identity(meanshape[0]) 952 else: 953 rowcov = rowcov * np.identity(1) 954 elif rowcov.ndim == 1: 955 rowcov = np.diag(rowcov) 956 rowshape = rowcov.shape 957 if len(rowshape) != 2: 958 raise ValueError("`rowcov` must be a scalar or a 2D array.") 959 if rowshape[0] != rowshape[1]: 960 raise ValueError("Array `rowcov` must be square.") 961 if rowshape[0] == 0: 962 raise ValueError("Array `rowcov` has invalid shape.") 963 numrows = rowshape[0] 964 965 # Process among-column covariance 966 colcov = np.asarray(colcov, dtype=float) 967 if colcov.ndim == 0: 968 if mean is not None: 969 colcov = colcov * np.identity(meanshape[1]) 970 else: 971 colcov = colcov * np.identity(1) 972 elif colcov.ndim == 1: 973 colcov = np.diag(colcov) 974 colshape = colcov.shape 975 if len(colshape) != 2: 976 raise ValueError("`colcov` must be a scalar or a 2D array.") 977 if colshape[0] != colshape[1]: 978 raise ValueError("Array `colcov` must be square.") 979 if colshape[0] == 0: 980 raise ValueError("Array `colcov` has invalid shape.") 981 numcols = colshape[0] 982 983 # Ensure mean and covariances compatible 984 if mean is not None: 985 if meanshape[0] != numrows: 986 raise ValueError("Arrays `mean` and `rowcov` must have the " 987 "same number of rows.") 988 if meanshape[1] != numcols: 989 raise ValueError("Arrays `mean` and `colcov` must have the " 990 "same number of columns.") 991 else: 992 mean = np.zeros((numrows, numcols)) 993 994 dims = (numrows, numcols) 995 996 return dims, mean, rowcov, colcov 997 998 def _process_quantiles(self, X, dims): 999 """ 1000 Adjust quantiles array so that last two axes labels the components of 1001 each data point. 1002 """ 1003 X = np.asarray(X, dtype=float) 1004 if X.ndim == 2: 1005 X = X[np.newaxis, :] 1006 if X.shape[-2:] != dims: 1007 raise ValueError("The shape of array `X` is not compatible " 1008 "with the distribution parameters.") 1009 return X 1010 1011 def _logpdf(self, dims, X, mean, row_prec_rt, log_det_rowcov, 1012 col_prec_rt, log_det_colcov): 1013 """Log of the matrix normal probability density function. 1014 1015 Parameters 1016 ---------- 1017 dims : tuple 1018 Dimensions of the matrix variates 1019 X : ndarray 1020 Points at which to evaluate the log of the probability 1021 density function 1022 mean : ndarray 1023 Mean of the distribution 1024 row_prec_rt : ndarray 1025 A decomposition such that np.dot(row_prec_rt, row_prec_rt.T) 1026 is the inverse of the among-row covariance matrix 1027 log_det_rowcov : float 1028 Logarithm of the determinant of the among-row covariance matrix 1029 col_prec_rt : ndarray 1030 A decomposition such that np.dot(col_prec_rt, col_prec_rt.T) 1031 is the inverse of the among-column covariance matrix 1032 log_det_colcov : float 1033 Logarithm of the determinant of the among-column covariance matrix 1034 1035 Notes 1036 ----- 1037 As this function does no argument checking, it should not be 1038 called directly; use 'logpdf' instead. 1039 1040 """ 1041 numrows, numcols = dims 1042 roll_dev = np.rollaxis(X-mean, axis=-1, start=0) 1043 scale_dev = np.tensordot(col_prec_rt.T, 1044 np.dot(roll_dev, row_prec_rt), 1) 1045 maha = np.sum(np.sum(np.square(scale_dev), axis=-1), axis=0) 1046 return -0.5 * (numrows*numcols*_LOG_2PI + numcols*log_det_rowcov 1047 + numrows*log_det_colcov + maha) 1048 1049 def logpdf(self, X, mean=None, rowcov=1, colcov=1): 1050 """Log of the matrix normal probability density function. 1051 1052 Parameters 1053 ---------- 1054 X : array_like 1055 Quantiles, with the last two axes of `X` denoting the components. 1056 %(_matnorm_doc_default_callparams)s 1057 1058 Returns 1059 ------- 1060 logpdf : ndarray 1061 Log of the probability density function evaluated at `X` 1062 1063 Notes 1064 ----- 1065 %(_matnorm_doc_callparams_note)s 1066 1067 """ 1068 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov, 1069 colcov) 1070 X = self._process_quantiles(X, dims) 1071 rowpsd = _PSD(rowcov, allow_singular=False) 1072 colpsd = _PSD(colcov, allow_singular=False) 1073 out = self._logpdf(dims, X, mean, rowpsd.U, rowpsd.log_pdet, colpsd.U, 1074 colpsd.log_pdet) 1075 return _squeeze_output(out) 1076 1077 def pdf(self, X, mean=None, rowcov=1, colcov=1): 1078 """Matrix normal probability density function. 1079 1080 Parameters 1081 ---------- 1082 X : array_like 1083 Quantiles, with the last two axes of `X` denoting the components. 1084 %(_matnorm_doc_default_callparams)s 1085 1086 Returns 1087 ------- 1088 pdf : ndarray 1089 Probability density function evaluated at `X` 1090 1091 Notes 1092 ----- 1093 %(_matnorm_doc_callparams_note)s 1094 1095 """ 1096 return np.exp(self.logpdf(X, mean, rowcov, colcov)) 1097 1098 def rvs(self, mean=None, rowcov=1, colcov=1, size=1, random_state=None): 1099 """Draw random samples from a matrix normal distribution. 1100 1101 Parameters 1102 ---------- 1103 %(_matnorm_doc_default_callparams)s 1104 size : integer, optional 1105 Number of samples to draw (default 1). 1106 %(_doc_random_state)s 1107 1108 Returns 1109 ------- 1110 rvs : ndarray or scalar 1111 Random variates of size (`size`, `dims`), where `dims` is the 1112 dimension of the random matrices. 1113 1114 Notes 1115 ----- 1116 %(_matnorm_doc_callparams_note)s 1117 1118 """ 1119 size = int(size) 1120 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov, 1121 colcov) 1122 rowchol = scipy.linalg.cholesky(rowcov, lower=True) 1123 colchol = scipy.linalg.cholesky(colcov, lower=True) 1124 random_state = self._get_random_state(random_state) 1125 std_norm = random_state.standard_normal(size=(dims[1], size, dims[0])) 1126 roll_rvs = np.tensordot(colchol, np.dot(std_norm, rowchol.T), 1) 1127 out = np.rollaxis(roll_rvs.T, axis=1, start=0) + mean[np.newaxis, :, :] 1128 if size == 1: 1129 out = out.reshape(mean.shape) 1130 return out 1131 1132 1133matrix_normal = matrix_normal_gen() 1134 1135 1136class matrix_normal_frozen(multi_rv_frozen): 1137 """Create a frozen matrix normal distribution. 1138 1139 Parameters 1140 ---------- 1141 %(_matnorm_doc_default_callparams)s 1142 seed : {None, int, `numpy.random.Generator`, 1143 `numpy.random.RandomState`}, optional 1144 1145 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 1146 singleton is used. 1147 If `seed` is an int, a new ``RandomState`` instance is used, 1148 seeded with `seed`. 1149 If `seed` is already a ``Generator`` or ``RandomState`` instance 1150 then that instance is used. 1151 1152 Examples 1153 -------- 1154 >>> from scipy.stats import matrix_normal 1155 1156 >>> distn = matrix_normal(mean=np.zeros((3,3))) 1157 >>> X = distn.rvs(); X 1158 array([[-0.02976962, 0.93339138, -0.09663178], 1159 [ 0.67405524, 0.28250467, -0.93308929], 1160 [-0.31144782, 0.74535536, 1.30412916]]) 1161 >>> distn.pdf(X) 1162 2.5160642368346784e-05 1163 >>> distn.logpdf(X) 1164 -10.590229595124615 1165 """ 1166 1167 def __init__(self, mean=None, rowcov=1, colcov=1, seed=None): 1168 1169 self._dist = matrix_normal_gen(seed) 1170 self.dims, self.mean, self.rowcov, self.colcov = \ 1171 self._dist._process_parameters(mean, rowcov, colcov) 1172 self.rowpsd = _PSD(self.rowcov, allow_singular=False) 1173 self.colpsd = _PSD(self.colcov, allow_singular=False) 1174 1175 def logpdf(self, X): 1176 X = self._dist._process_quantiles(X, self.dims) 1177 out = self._dist._logpdf(self.dims, X, self.mean, self.rowpsd.U, 1178 self.rowpsd.log_pdet, self.colpsd.U, 1179 self.colpsd.log_pdet) 1180 return _squeeze_output(out) 1181 1182 def pdf(self, X): 1183 return np.exp(self.logpdf(X)) 1184 1185 def rvs(self, size=1, random_state=None): 1186 return self._dist.rvs(self.mean, self.rowcov, self.colcov, size, 1187 random_state) 1188 1189 1190# Set frozen generator docstrings from corresponding docstrings in 1191# matrix_normal_gen and fill in default strings in class docstrings 1192for name in ['logpdf', 'pdf', 'rvs']: 1193 method = matrix_normal_gen.__dict__[name] 1194 method_frozen = matrix_normal_frozen.__dict__[name] 1195 method_frozen.__doc__ = doccer.docformat(method.__doc__, 1196 matnorm_docdict_noparams) 1197 method.__doc__ = doccer.docformat(method.__doc__, matnorm_docdict_params) 1198 1199_dirichlet_doc_default_callparams = """\ 1200alpha : array_like 1201 The concentration parameters. The number of entries determines the 1202 dimensionality of the distribution. 1203""" 1204_dirichlet_doc_frozen_callparams = "" 1205 1206_dirichlet_doc_frozen_callparams_note = \ 1207 """See class definition for a detailed description of parameters.""" 1208 1209dirichlet_docdict_params = { 1210 '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams, 1211 '_doc_random_state': _doc_random_state 1212} 1213 1214dirichlet_docdict_noparams = { 1215 '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams, 1216 '_doc_random_state': _doc_random_state 1217} 1218 1219 1220def _dirichlet_check_parameters(alpha): 1221 alpha = np.asarray(alpha) 1222 if np.min(alpha) <= 0: 1223 raise ValueError("All parameters must be greater than 0") 1224 elif alpha.ndim != 1: 1225 raise ValueError("Parameter vector 'a' must be one dimensional, " 1226 "but a.shape = %s." % (alpha.shape, )) 1227 return alpha 1228 1229 1230def _dirichlet_check_input(alpha, x): 1231 x = np.asarray(x) 1232 1233 if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]: 1234 raise ValueError("Vector 'x' must have either the same number " 1235 "of entries as, or one entry fewer than, " 1236 "parameter vector 'a', but alpha.shape = %s " 1237 "and x.shape = %s." % (alpha.shape, x.shape)) 1238 1239 if x.shape[0] != alpha.shape[0]: 1240 xk = np.array([1 - np.sum(x, 0)]) 1241 if xk.ndim == 1: 1242 x = np.append(x, xk) 1243 elif xk.ndim == 2: 1244 x = np.vstack((x, xk)) 1245 else: 1246 raise ValueError("The input must be one dimensional or a two " 1247 "dimensional matrix containing the entries.") 1248 1249 if np.min(x) < 0: 1250 raise ValueError("Each entry in 'x' must be greater than or equal " 1251 "to zero.") 1252 1253 if np.max(x) > 1: 1254 raise ValueError("Each entry in 'x' must be smaller or equal one.") 1255 1256 # Check x_i > 0 or alpha_i > 1 1257 xeq0 = (x == 0) 1258 alphalt1 = (alpha < 1) 1259 if x.shape != alpha.shape: 1260 alphalt1 = np.repeat(alphalt1, x.shape[-1], axis=-1).reshape(x.shape) 1261 chk = np.logical_and(xeq0, alphalt1) 1262 1263 if np.sum(chk): 1264 raise ValueError("Each entry in 'x' must be greater than zero if its " 1265 "alpha is less than one.") 1266 1267 if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any(): 1268 raise ValueError("The input vector 'x' must lie within the normal " 1269 "simplex. but np.sum(x, 0) = %s." % np.sum(x, 0)) 1270 1271 return x 1272 1273 1274def _lnB(alpha): 1275 r"""Internal helper function to compute the log of the useful quotient. 1276 1277 .. math:: 1278 1279 B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)} 1280 {\Gamma\left(\sum_{i=1}^{K} \alpha_i \right)} 1281 1282 Parameters 1283 ---------- 1284 %(_dirichlet_doc_default_callparams)s 1285 1286 Returns 1287 ------- 1288 B : scalar 1289 Helper quotient, internal use only 1290 1291 """ 1292 return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha)) 1293 1294 1295class dirichlet_gen(multi_rv_generic): 1296 r"""A Dirichlet random variable. 1297 1298 The `alpha` keyword specifies the concentration parameters of the 1299 distribution. 1300 1301 .. versionadded:: 0.15.0 1302 1303 Methods 1304 ------- 1305 ``pdf(x, alpha)`` 1306 Probability density function. 1307 ``logpdf(x, alpha)`` 1308 Log of the probability density function. 1309 ``rvs(alpha, size=1, random_state=None)`` 1310 Draw random samples from a Dirichlet distribution. 1311 ``mean(alpha)`` 1312 The mean of the Dirichlet distribution 1313 ``var(alpha)`` 1314 The variance of the Dirichlet distribution 1315 ``entropy(alpha)`` 1316 Compute the differential entropy of the Dirichlet distribution. 1317 1318 Parameters 1319 ---------- 1320 x : array_like 1321 Quantiles, with the last axis of `x` denoting the components. 1322 %(_dirichlet_doc_default_callparams)s 1323 %(_doc_random_state)s 1324 1325 Alternatively, the object may be called (as a function) to fix 1326 concentration parameters, returning a "frozen" Dirichlet 1327 random variable: 1328 1329 rv = dirichlet(alpha) 1330 - Frozen object with the same methods but holding the given 1331 concentration parameters fixed. 1332 1333 Notes 1334 ----- 1335 Each :math:`\alpha` entry must be positive. The distribution has only 1336 support on the simplex defined by 1337 1338 .. math:: 1339 \sum_{i=1}^{K} x_i = 1 1340 1341 where 0 < x_i < 1. 1342 1343 If the quantiles don't lie within the simplex, a ValueError is raised. 1344 1345 The probability density function for `dirichlet` is 1346 1347 .. math:: 1348 1349 f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1} 1350 1351 where 1352 1353 .. math:: 1354 1355 \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)} 1356 {\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)} 1357 1358 and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the 1359 concentration parameters and :math:`K` is the dimension of the space 1360 where :math:`x` takes values. 1361 1362 Note that the dirichlet interface is somewhat inconsistent. 1363 The array returned by the rvs function is transposed 1364 with respect to the format expected by the pdf and logpdf. 1365 1366 Examples 1367 -------- 1368 >>> from scipy.stats import dirichlet 1369 1370 Generate a dirichlet random variable 1371 1372 >>> quantiles = np.array([0.2, 0.2, 0.6]) # specify quantiles 1373 >>> alpha = np.array([0.4, 5, 15]) # specify concentration parameters 1374 >>> dirichlet.pdf(quantiles, alpha) 1375 0.2843831684937255 1376 1377 The same PDF but following a log scale 1378 1379 >>> dirichlet.logpdf(quantiles, alpha) 1380 -1.2574327653159187 1381 1382 Once we specify the dirichlet distribution 1383 we can then calculate quantities of interest 1384 1385 >>> dirichlet.mean(alpha) # get the mean of the distribution 1386 array([0.01960784, 0.24509804, 0.73529412]) 1387 >>> dirichlet.var(alpha) # get variance 1388 array([0.00089829, 0.00864603, 0.00909517]) 1389 >>> dirichlet.entropy(alpha) # calculate the differential entropy 1390 -4.3280162474082715 1391 1392 We can also return random samples from the distribution 1393 1394 >>> dirichlet.rvs(alpha, size=1, random_state=1) 1395 array([[0.00766178, 0.24670518, 0.74563305]]) 1396 >>> dirichlet.rvs(alpha, size=2, random_state=2) 1397 array([[0.01639427, 0.1292273 , 0.85437844], 1398 [0.00156917, 0.19033695, 0.80809388]]) 1399 1400 """ 1401 1402 def __init__(self, seed=None): 1403 super().__init__(seed) 1404 self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params) 1405 1406 def __call__(self, alpha, seed=None): 1407 return dirichlet_frozen(alpha, seed=seed) 1408 1409 def _logpdf(self, x, alpha): 1410 """Log of the Dirichlet probability density function. 1411 1412 Parameters 1413 ---------- 1414 x : ndarray 1415 Points at which to evaluate the log of the probability 1416 density function 1417 %(_dirichlet_doc_default_callparams)s 1418 1419 Notes 1420 ----- 1421 As this function does no argument checking, it should not be 1422 called directly; use 'logpdf' instead. 1423 1424 """ 1425 lnB = _lnB(alpha) 1426 return - lnB + np.sum((xlogy(alpha - 1, x.T)).T, 0) 1427 1428 def logpdf(self, x, alpha): 1429 """Log of the Dirichlet probability density function. 1430 1431 Parameters 1432 ---------- 1433 x : array_like 1434 Quantiles, with the last axis of `x` denoting the components. 1435 %(_dirichlet_doc_default_callparams)s 1436 1437 Returns 1438 ------- 1439 pdf : ndarray or scalar 1440 Log of the probability density function evaluated at `x`. 1441 1442 """ 1443 alpha = _dirichlet_check_parameters(alpha) 1444 x = _dirichlet_check_input(alpha, x) 1445 1446 out = self._logpdf(x, alpha) 1447 return _squeeze_output(out) 1448 1449 def pdf(self, x, alpha): 1450 """The Dirichlet probability density function. 1451 1452 Parameters 1453 ---------- 1454 x : array_like 1455 Quantiles, with the last axis of `x` denoting the components. 1456 %(_dirichlet_doc_default_callparams)s 1457 1458 Returns 1459 ------- 1460 pdf : ndarray or scalar 1461 The probability density function evaluated at `x`. 1462 1463 """ 1464 alpha = _dirichlet_check_parameters(alpha) 1465 x = _dirichlet_check_input(alpha, x) 1466 1467 out = np.exp(self._logpdf(x, alpha)) 1468 return _squeeze_output(out) 1469 1470 def mean(self, alpha): 1471 """Compute the mean of the dirichlet distribution. 1472 1473 Parameters 1474 ---------- 1475 %(_dirichlet_doc_default_callparams)s 1476 1477 Returns 1478 ------- 1479 mu : ndarray or scalar 1480 Mean of the Dirichlet distribution. 1481 1482 """ 1483 alpha = _dirichlet_check_parameters(alpha) 1484 1485 out = alpha / (np.sum(alpha)) 1486 return _squeeze_output(out) 1487 1488 def var(self, alpha): 1489 """Compute the variance of the dirichlet distribution. 1490 1491 Parameters 1492 ---------- 1493 %(_dirichlet_doc_default_callparams)s 1494 1495 Returns 1496 ------- 1497 v : ndarray or scalar 1498 Variance of the Dirichlet distribution. 1499 1500 """ 1501 1502 alpha = _dirichlet_check_parameters(alpha) 1503 1504 alpha0 = np.sum(alpha) 1505 out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1)) 1506 return _squeeze_output(out) 1507 1508 def entropy(self, alpha): 1509 """Compute the differential entropy of the dirichlet distribution. 1510 1511 Parameters 1512 ---------- 1513 %(_dirichlet_doc_default_callparams)s 1514 1515 Returns 1516 ------- 1517 h : scalar 1518 Entropy of the Dirichlet distribution 1519 1520 """ 1521 1522 alpha = _dirichlet_check_parameters(alpha) 1523 1524 alpha0 = np.sum(alpha) 1525 lnB = _lnB(alpha) 1526 K = alpha.shape[0] 1527 1528 out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum( 1529 (alpha - 1) * scipy.special.psi(alpha)) 1530 return _squeeze_output(out) 1531 1532 def rvs(self, alpha, size=1, random_state=None): 1533 """Draw random samples from a Dirichlet distribution. 1534 1535 Parameters 1536 ---------- 1537 %(_dirichlet_doc_default_callparams)s 1538 size : int, optional 1539 Number of samples to draw (default 1). 1540 %(_doc_random_state)s 1541 1542 Returns 1543 ------- 1544 rvs : ndarray or scalar 1545 Random variates of size (`size`, `N`), where `N` is the 1546 dimension of the random variable. 1547 1548 """ 1549 alpha = _dirichlet_check_parameters(alpha) 1550 random_state = self._get_random_state(random_state) 1551 return random_state.dirichlet(alpha, size=size) 1552 1553 1554dirichlet = dirichlet_gen() 1555 1556 1557class dirichlet_frozen(multi_rv_frozen): 1558 def __init__(self, alpha, seed=None): 1559 self.alpha = _dirichlet_check_parameters(alpha) 1560 self._dist = dirichlet_gen(seed) 1561 1562 def logpdf(self, x): 1563 return self._dist.logpdf(x, self.alpha) 1564 1565 def pdf(self, x): 1566 return self._dist.pdf(x, self.alpha) 1567 1568 def mean(self): 1569 return self._dist.mean(self.alpha) 1570 1571 def var(self): 1572 return self._dist.var(self.alpha) 1573 1574 def entropy(self): 1575 return self._dist.entropy(self.alpha) 1576 1577 def rvs(self, size=1, random_state=None): 1578 return self._dist.rvs(self.alpha, size, random_state) 1579 1580 1581# Set frozen generator docstrings from corresponding docstrings in 1582# multivariate_normal_gen and fill in default strings in class docstrings 1583for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']: 1584 method = dirichlet_gen.__dict__[name] 1585 method_frozen = dirichlet_frozen.__dict__[name] 1586 method_frozen.__doc__ = doccer.docformat( 1587 method.__doc__, dirichlet_docdict_noparams) 1588 method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params) 1589 1590 1591_wishart_doc_default_callparams = """\ 1592df : int 1593 Degrees of freedom, must be greater than or equal to dimension of the 1594 scale matrix 1595scale : array_like 1596 Symmetric positive definite scale matrix of the distribution 1597""" 1598 1599_wishart_doc_callparams_note = "" 1600 1601_wishart_doc_frozen_callparams = "" 1602 1603_wishart_doc_frozen_callparams_note = \ 1604 """See class definition for a detailed description of parameters.""" 1605 1606wishart_docdict_params = { 1607 '_doc_default_callparams': _wishart_doc_default_callparams, 1608 '_doc_callparams_note': _wishart_doc_callparams_note, 1609 '_doc_random_state': _doc_random_state 1610} 1611 1612wishart_docdict_noparams = { 1613 '_doc_default_callparams': _wishart_doc_frozen_callparams, 1614 '_doc_callparams_note': _wishart_doc_frozen_callparams_note, 1615 '_doc_random_state': _doc_random_state 1616} 1617 1618 1619class wishart_gen(multi_rv_generic): 1620 r"""A Wishart random variable. 1621 1622 The `df` keyword specifies the degrees of freedom. The `scale` keyword 1623 specifies the scale matrix, which must be symmetric and positive definite. 1624 In this context, the scale matrix is often interpreted in terms of a 1625 multivariate normal precision matrix (the inverse of the covariance 1626 matrix). These arguments must satisfy the relationship 1627 ``df > scale.ndim - 1``, but see notes on using the `rvs` method with 1628 ``df < scale.ndim``. 1629 1630 Methods 1631 ------- 1632 ``pdf(x, df, scale)`` 1633 Probability density function. 1634 ``logpdf(x, df, scale)`` 1635 Log of the probability density function. 1636 ``rvs(df, scale, size=1, random_state=None)`` 1637 Draw random samples from a Wishart distribution. 1638 ``entropy()`` 1639 Compute the differential entropy of the Wishart distribution. 1640 1641 Parameters 1642 ---------- 1643 x : array_like 1644 Quantiles, with the last axis of `x` denoting the components. 1645 %(_doc_default_callparams)s 1646 %(_doc_random_state)s 1647 1648 Alternatively, the object may be called (as a function) to fix the degrees 1649 of freedom and scale parameters, returning a "frozen" Wishart random 1650 variable: 1651 1652 rv = wishart(df=1, scale=1) 1653 - Frozen object with the same methods but holding the given 1654 degrees of freedom and scale fixed. 1655 1656 See Also 1657 -------- 1658 invwishart, chi2 1659 1660 Notes 1661 ----- 1662 %(_doc_callparams_note)s 1663 1664 The scale matrix `scale` must be a symmetric positive definite 1665 matrix. Singular matrices, including the symmetric positive semi-definite 1666 case, are not supported. 1667 1668 The Wishart distribution is often denoted 1669 1670 .. math:: 1671 1672 W_p(\nu, \Sigma) 1673 1674 where :math:`\nu` is the degrees of freedom and :math:`\Sigma` is the 1675 :math:`p \times p` scale matrix. 1676 1677 The probability density function for `wishart` has support over positive 1678 definite matrices :math:`S`; if :math:`S \sim W_p(\nu, \Sigma)`, then 1679 its PDF is given by: 1680 1681 .. math:: 1682 1683 f(S) = \frac{|S|^{\frac{\nu - p - 1}{2}}}{2^{ \frac{\nu p}{2} } 1684 |\Sigma|^\frac{\nu}{2} \Gamma_p \left ( \frac{\nu}{2} \right )} 1685 \exp\left( -tr(\Sigma^{-1} S) / 2 \right) 1686 1687 If :math:`S \sim W_p(\nu, \Sigma)` (Wishart) then 1688 :math:`S^{-1} \sim W_p^{-1}(\nu, \Sigma^{-1})` (inverse Wishart). 1689 1690 If the scale matrix is 1-dimensional and equal to one, then the Wishart 1691 distribution :math:`W_1(\nu, 1)` collapses to the :math:`\chi^2(\nu)` 1692 distribution. 1693 1694 The algorithm [2]_ implemented by the `rvs` method may 1695 produce numerically singular matrices with :math:`p - 1 < \nu < p`; the 1696 user may wish to check for this condition and generate replacement samples 1697 as necessary. 1698 1699 1700 .. versionadded:: 0.16.0 1701 1702 References 1703 ---------- 1704 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach", 1705 Wiley, 1983. 1706 .. [2] W.B. Smith and R.R. Hocking, "Algorithm AS 53: Wishart Variate 1707 Generator", Applied Statistics, vol. 21, pp. 341-345, 1972. 1708 1709 Examples 1710 -------- 1711 >>> import matplotlib.pyplot as plt 1712 >>> from scipy.stats import wishart, chi2 1713 >>> x = np.linspace(1e-5, 8, 100) 1714 >>> w = wishart.pdf(x, df=3, scale=1); w[:5] 1715 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ]) 1716 >>> c = chi2.pdf(x, 3); c[:5] 1717 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ]) 1718 >>> plt.plot(x, w) 1719 1720 The input quantiles can be any shape of array, as long as the last 1721 axis labels the components. 1722 1723 """ 1724 1725 def __init__(self, seed=None): 1726 super().__init__(seed) 1727 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params) 1728 1729 def __call__(self, df=None, scale=None, seed=None): 1730 """Create a frozen Wishart distribution. 1731 1732 See `wishart_frozen` for more information. 1733 """ 1734 return wishart_frozen(df, scale, seed) 1735 1736 def _process_parameters(self, df, scale): 1737 if scale is None: 1738 scale = 1.0 1739 scale = np.asarray(scale, dtype=float) 1740 1741 if scale.ndim == 0: 1742 scale = scale[np.newaxis, np.newaxis] 1743 elif scale.ndim == 1: 1744 scale = np.diag(scale) 1745 elif scale.ndim == 2 and not scale.shape[0] == scale.shape[1]: 1746 raise ValueError("Array 'scale' must be square if it is two" 1747 " dimensional, but scale.scale = %s." 1748 % str(scale.shape)) 1749 elif scale.ndim > 2: 1750 raise ValueError("Array 'scale' must be at most two-dimensional," 1751 " but scale.ndim = %d" % scale.ndim) 1752 1753 dim = scale.shape[0] 1754 1755 if df is None: 1756 df = dim 1757 elif not np.isscalar(df): 1758 raise ValueError("Degrees of freedom must be a scalar.") 1759 elif df <= dim - 1: 1760 raise ValueError("Degrees of freedom must be greater than the " 1761 "dimension of scale matrix minus 1.") 1762 1763 return dim, df, scale 1764 1765 def _process_quantiles(self, x, dim): 1766 """ 1767 Adjust quantiles array so that last axis labels the components of 1768 each data point. 1769 """ 1770 x = np.asarray(x, dtype=float) 1771 1772 if x.ndim == 0: 1773 x = x * np.eye(dim)[:, :, np.newaxis] 1774 if x.ndim == 1: 1775 if dim == 1: 1776 x = x[np.newaxis, np.newaxis, :] 1777 else: 1778 x = np.diag(x)[:, :, np.newaxis] 1779 elif x.ndim == 2: 1780 if not x.shape[0] == x.shape[1]: 1781 raise ValueError("Quantiles must be square if they are two" 1782 " dimensional, but x.shape = %s." 1783 % str(x.shape)) 1784 x = x[:, :, np.newaxis] 1785 elif x.ndim == 3: 1786 if not x.shape[0] == x.shape[1]: 1787 raise ValueError("Quantiles must be square in the first two" 1788 " dimensions if they are three dimensional" 1789 ", but x.shape = %s." % str(x.shape)) 1790 elif x.ndim > 3: 1791 raise ValueError("Quantiles must be at most two-dimensional with" 1792 " an additional dimension for multiple" 1793 "components, but x.ndim = %d" % x.ndim) 1794 1795 # Now we have 3-dim array; should have shape [dim, dim, *] 1796 if not x.shape[0:2] == (dim, dim): 1797 raise ValueError('Quantiles have incompatible dimensions: should' 1798 ' be %s, got %s.' % ((dim, dim), x.shape[0:2])) 1799 1800 return x 1801 1802 def _process_size(self, size): 1803 size = np.asarray(size) 1804 1805 if size.ndim == 0: 1806 size = size[np.newaxis] 1807 elif size.ndim > 1: 1808 raise ValueError('Size must be an integer or tuple of integers;' 1809 ' thus must have dimension <= 1.' 1810 ' Got size.ndim = %s' % str(tuple(size))) 1811 n = size.prod() 1812 shape = tuple(size) 1813 1814 return n, shape 1815 1816 def _logpdf(self, x, dim, df, scale, log_det_scale, C): 1817 """Log of the Wishart probability density function. 1818 1819 Parameters 1820 ---------- 1821 x : ndarray 1822 Points at which to evaluate the log of the probability 1823 density function 1824 dim : int 1825 Dimension of the scale matrix 1826 df : int 1827 Degrees of freedom 1828 scale : ndarray 1829 Scale matrix 1830 log_det_scale : float 1831 Logarithm of the determinant of the scale matrix 1832 C : ndarray 1833 Cholesky factorization of the scale matrix, lower triagular. 1834 1835 Notes 1836 ----- 1837 As this function does no argument checking, it should not be 1838 called directly; use 'logpdf' instead. 1839 1840 """ 1841 # log determinant of x 1842 # Note: x has components along the last axis, so that x.T has 1843 # components alone the 0-th axis. Then since det(A) = det(A'), this 1844 # gives us a 1-dim vector of determinants 1845 1846 # Retrieve tr(scale^{-1} x) 1847 log_det_x = np.empty(x.shape[-1]) 1848 scale_inv_x = np.empty(x.shape) 1849 tr_scale_inv_x = np.empty(x.shape[-1]) 1850 for i in range(x.shape[-1]): 1851 _, log_det_x[i] = self._cholesky_logdet(x[:, :, i]) 1852 scale_inv_x[:, :, i] = scipy.linalg.cho_solve((C, True), x[:, :, i]) 1853 tr_scale_inv_x[i] = scale_inv_x[:, :, i].trace() 1854 1855 # Log PDF 1856 out = ((0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) - 1857 (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale + 1858 multigammaln(0.5*df, dim))) 1859 1860 return out 1861 1862 def logpdf(self, x, df, scale): 1863 """Log of the Wishart probability density function. 1864 1865 Parameters 1866 ---------- 1867 x : array_like 1868 Quantiles, with the last axis of `x` denoting the components. 1869 Each quantile must be a symmetric positive definite matrix. 1870 %(_doc_default_callparams)s 1871 1872 Returns 1873 ------- 1874 pdf : ndarray 1875 Log of the probability density function evaluated at `x` 1876 1877 Notes 1878 ----- 1879 %(_doc_callparams_note)s 1880 1881 """ 1882 dim, df, scale = self._process_parameters(df, scale) 1883 x = self._process_quantiles(x, dim) 1884 1885 # Cholesky decomposition of scale, get log(det(scale)) 1886 C, log_det_scale = self._cholesky_logdet(scale) 1887 1888 out = self._logpdf(x, dim, df, scale, log_det_scale, C) 1889 return _squeeze_output(out) 1890 1891 def pdf(self, x, df, scale): 1892 """Wishart probability density function. 1893 1894 Parameters 1895 ---------- 1896 x : array_like 1897 Quantiles, with the last axis of `x` denoting the components. 1898 Each quantile must be a symmetric positive definite matrix. 1899 %(_doc_default_callparams)s 1900 1901 Returns 1902 ------- 1903 pdf : ndarray 1904 Probability density function evaluated at `x` 1905 1906 Notes 1907 ----- 1908 %(_doc_callparams_note)s 1909 1910 """ 1911 return np.exp(self.logpdf(x, df, scale)) 1912 1913 def _mean(self, dim, df, scale): 1914 """Mean of the Wishart distribution. 1915 1916 Parameters 1917 ---------- 1918 dim : int 1919 Dimension of the scale matrix 1920 %(_doc_default_callparams)s 1921 1922 Notes 1923 ----- 1924 As this function does no argument checking, it should not be 1925 called directly; use 'mean' instead. 1926 1927 """ 1928 return df * scale 1929 1930 def mean(self, df, scale): 1931 """Mean of the Wishart distribution. 1932 1933 Parameters 1934 ---------- 1935 %(_doc_default_callparams)s 1936 1937 Returns 1938 ------- 1939 mean : float 1940 The mean of the distribution 1941 """ 1942 dim, df, scale = self._process_parameters(df, scale) 1943 out = self._mean(dim, df, scale) 1944 return _squeeze_output(out) 1945 1946 def _mode(self, dim, df, scale): 1947 """Mode of the Wishart distribution. 1948 1949 Parameters 1950 ---------- 1951 dim : int 1952 Dimension of the scale matrix 1953 %(_doc_default_callparams)s 1954 1955 Notes 1956 ----- 1957 As this function does no argument checking, it should not be 1958 called directly; use 'mode' instead. 1959 1960 """ 1961 if df >= dim + 1: 1962 out = (df-dim-1) * scale 1963 else: 1964 out = None 1965 return out 1966 1967 def mode(self, df, scale): 1968 """Mode of the Wishart distribution 1969 1970 Only valid if the degrees of freedom are greater than the dimension of 1971 the scale matrix. 1972 1973 Parameters 1974 ---------- 1975 %(_doc_default_callparams)s 1976 1977 Returns 1978 ------- 1979 mode : float or None 1980 The Mode of the distribution 1981 """ 1982 dim, df, scale = self._process_parameters(df, scale) 1983 out = self._mode(dim, df, scale) 1984 return _squeeze_output(out) if out is not None else out 1985 1986 def _var(self, dim, df, scale): 1987 """Variance of the Wishart distribution. 1988 1989 Parameters 1990 ---------- 1991 dim : int 1992 Dimension of the scale matrix 1993 %(_doc_default_callparams)s 1994 1995 Notes 1996 ----- 1997 As this function does no argument checking, it should not be 1998 called directly; use 'var' instead. 1999 2000 """ 2001 var = scale**2 2002 diag = scale.diagonal() # 1 x dim array 2003 var += np.outer(diag, diag) 2004 var *= df 2005 return var 2006 2007 def var(self, df, scale): 2008 """Variance of the Wishart distribution. 2009 2010 Parameters 2011 ---------- 2012 %(_doc_default_callparams)s 2013 2014 Returns 2015 ------- 2016 var : float 2017 The variance of the distribution 2018 """ 2019 dim, df, scale = self._process_parameters(df, scale) 2020 out = self._var(dim, df, scale) 2021 return _squeeze_output(out) 2022 2023 def _standard_rvs(self, n, shape, dim, df, random_state): 2024 """ 2025 Parameters 2026 ---------- 2027 n : integer 2028 Number of variates to generate 2029 shape : iterable 2030 Shape of the variates to generate 2031 dim : int 2032 Dimension of the scale matrix 2033 df : int 2034 Degrees of freedom 2035 random_state : {None, int, `numpy.random.Generator`, 2036 `numpy.random.RandomState`}, optional 2037 2038 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 2039 singleton is used. 2040 If `seed` is an int, a new ``RandomState`` instance is used, 2041 seeded with `seed`. 2042 If `seed` is already a ``Generator`` or ``RandomState`` instance 2043 then that instance is used. 2044 2045 Notes 2046 ----- 2047 As this function does no argument checking, it should not be 2048 called directly; use 'rvs' instead. 2049 2050 """ 2051 # Random normal variates for off-diagonal elements 2052 n_tril = dim * (dim-1) // 2 2053 covariances = random_state.normal( 2054 size=n*n_tril).reshape(shape+(n_tril,)) 2055 2056 # Random chi-square variates for diagonal elements 2057 variances = (np.r_[[random_state.chisquare(df-(i+1)+1, size=n)**0.5 2058 for i in range(dim)]].reshape((dim,) + 2059 shape[::-1]).T) 2060 2061 # Create the A matri(ces) - lower triangular 2062 A = np.zeros(shape + (dim, dim)) 2063 2064 # Input the covariances 2065 size_idx = tuple([slice(None, None, None)]*len(shape)) 2066 tril_idx = np.tril_indices(dim, k=-1) 2067 A[size_idx + tril_idx] = covariances 2068 2069 # Input the variances 2070 diag_idx = np.diag_indices(dim) 2071 A[size_idx + diag_idx] = variances 2072 2073 return A 2074 2075 def _rvs(self, n, shape, dim, df, C, random_state): 2076 """Draw random samples from a Wishart distribution. 2077 2078 Parameters 2079 ---------- 2080 n : integer 2081 Number of variates to generate 2082 shape : iterable 2083 Shape of the variates to generate 2084 dim : int 2085 Dimension of the scale matrix 2086 df : int 2087 Degrees of freedom 2088 C : ndarray 2089 Cholesky factorization of the scale matrix, lower triangular. 2090 %(_doc_random_state)s 2091 2092 Notes 2093 ----- 2094 As this function does no argument checking, it should not be 2095 called directly; use 'rvs' instead. 2096 2097 """ 2098 random_state = self._get_random_state(random_state) 2099 # Calculate the matrices A, which are actually lower triangular 2100 # Cholesky factorizations of a matrix B such that B ~ W(df, I) 2101 A = self._standard_rvs(n, shape, dim, df, random_state) 2102 2103 # Calculate SA = C A A' C', where SA ~ W(df, scale) 2104 # Note: this is the product of a (lower) (lower) (lower)' (lower)' 2105 # or, denoting B = AA', it is C B C' where C is the lower 2106 # triangular Cholesky factorization of the scale matrix. 2107 # this appears to conflict with the instructions in [1]_, which 2108 # suggest that it should be D' B D where D is the lower 2109 # triangular factorization of the scale matrix. However, it is 2110 # meant to refer to the Bartlett (1933) representation of a 2111 # Wishart random variate as L A A' L' where L is lower triangular 2112 # so it appears that understanding D' to be upper triangular 2113 # is either a typo in or misreading of [1]_. 2114 for index in np.ndindex(shape): 2115 CA = np.dot(C, A[index]) 2116 A[index] = np.dot(CA, CA.T) 2117 2118 return A 2119 2120 def rvs(self, df, scale, size=1, random_state=None): 2121 """Draw random samples from a Wishart distribution. 2122 2123 Parameters 2124 ---------- 2125 %(_doc_default_callparams)s 2126 size : integer or iterable of integers, optional 2127 Number of samples to draw (default 1). 2128 %(_doc_random_state)s 2129 2130 Returns 2131 ------- 2132 rvs : ndarray 2133 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is 2134 the dimension of the scale matrix. 2135 2136 Notes 2137 ----- 2138 %(_doc_callparams_note)s 2139 2140 """ 2141 n, shape = self._process_size(size) 2142 dim, df, scale = self._process_parameters(df, scale) 2143 2144 # Cholesky decomposition of scale 2145 C = scipy.linalg.cholesky(scale, lower=True) 2146 2147 out = self._rvs(n, shape, dim, df, C, random_state) 2148 2149 return _squeeze_output(out) 2150 2151 def _entropy(self, dim, df, log_det_scale): 2152 """Compute the differential entropy of the Wishart. 2153 2154 Parameters 2155 ---------- 2156 dim : int 2157 Dimension of the scale matrix 2158 df : int 2159 Degrees of freedom 2160 log_det_scale : float 2161 Logarithm of the determinant of the scale matrix 2162 2163 Notes 2164 ----- 2165 As this function does no argument checking, it should not be 2166 called directly; use 'entropy' instead. 2167 2168 """ 2169 return ( 2170 0.5 * (dim+1) * log_det_scale + 2171 0.5 * dim * (dim+1) * _LOG_2 + 2172 multigammaln(0.5*df, dim) - 2173 0.5 * (df - dim - 1) * np.sum( 2174 [psi(0.5*(df + 1 - (i+1))) for i in range(dim)] 2175 ) + 2176 0.5 * df * dim 2177 ) 2178 2179 def entropy(self, df, scale): 2180 """Compute the differential entropy of the Wishart. 2181 2182 Parameters 2183 ---------- 2184 %(_doc_default_callparams)s 2185 2186 Returns 2187 ------- 2188 h : scalar 2189 Entropy of the Wishart distribution 2190 2191 Notes 2192 ----- 2193 %(_doc_callparams_note)s 2194 2195 """ 2196 dim, df, scale = self._process_parameters(df, scale) 2197 _, log_det_scale = self._cholesky_logdet(scale) 2198 return self._entropy(dim, df, log_det_scale) 2199 2200 def _cholesky_logdet(self, scale): 2201 """Compute Cholesky decomposition and determine (log(det(scale)). 2202 2203 Parameters 2204 ---------- 2205 scale : ndarray 2206 Scale matrix. 2207 2208 Returns 2209 ------- 2210 c_decomp : ndarray 2211 The Cholesky decomposition of `scale`. 2212 logdet : scalar 2213 The log of the determinant of `scale`. 2214 2215 Notes 2216 ----- 2217 This computation of ``logdet`` is equivalent to 2218 ``np.linalg.slogdet(scale)``. It is ~2x faster though. 2219 2220 """ 2221 c_decomp = scipy.linalg.cholesky(scale, lower=True) 2222 logdet = 2 * np.sum(np.log(c_decomp.diagonal())) 2223 return c_decomp, logdet 2224 2225 2226wishart = wishart_gen() 2227 2228 2229class wishart_frozen(multi_rv_frozen): 2230 """Create a frozen Wishart distribution. 2231 2232 Parameters 2233 ---------- 2234 df : array_like 2235 Degrees of freedom of the distribution 2236 scale : array_like 2237 Scale matrix of the distribution 2238 seed : {None, int, `numpy.random.Generator`, 2239 `numpy.random.RandomState`}, optional 2240 2241 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 2242 singleton is used. 2243 If `seed` is an int, a new ``RandomState`` instance is used, 2244 seeded with `seed`. 2245 If `seed` is already a ``Generator`` or ``RandomState`` instance then 2246 that instance is used. 2247 2248 """ 2249 def __init__(self, df, scale, seed=None): 2250 self._dist = wishart_gen(seed) 2251 self.dim, self.df, self.scale = self._dist._process_parameters( 2252 df, scale) 2253 self.C, self.log_det_scale = self._dist._cholesky_logdet(self.scale) 2254 2255 def logpdf(self, x): 2256 x = self._dist._process_quantiles(x, self.dim) 2257 2258 out = self._dist._logpdf(x, self.dim, self.df, self.scale, 2259 self.log_det_scale, self.C) 2260 return _squeeze_output(out) 2261 2262 def pdf(self, x): 2263 return np.exp(self.logpdf(x)) 2264 2265 def mean(self): 2266 out = self._dist._mean(self.dim, self.df, self.scale) 2267 return _squeeze_output(out) 2268 2269 def mode(self): 2270 out = self._dist._mode(self.dim, self.df, self.scale) 2271 return _squeeze_output(out) if out is not None else out 2272 2273 def var(self): 2274 out = self._dist._var(self.dim, self.df, self.scale) 2275 return _squeeze_output(out) 2276 2277 def rvs(self, size=1, random_state=None): 2278 n, shape = self._dist._process_size(size) 2279 out = self._dist._rvs(n, shape, self.dim, self.df, 2280 self.C, random_state) 2281 return _squeeze_output(out) 2282 2283 def entropy(self): 2284 return self._dist._entropy(self.dim, self.df, self.log_det_scale) 2285 2286 2287# Set frozen generator docstrings from corresponding docstrings in 2288# Wishart and fill in default strings in class docstrings 2289for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs', 'entropy']: 2290 method = wishart_gen.__dict__[name] 2291 method_frozen = wishart_frozen.__dict__[name] 2292 method_frozen.__doc__ = doccer.docformat( 2293 method.__doc__, wishart_docdict_noparams) 2294 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params) 2295 2296 2297def _cho_inv_batch(a, check_finite=True): 2298 """ 2299 Invert the matrices a_i, using a Cholesky factorization of A, where 2300 a_i resides in the last two dimensions of a and the other indices describe 2301 the index i. 2302 2303 Overwrites the data in a. 2304 2305 Parameters 2306 ---------- 2307 a : array 2308 Array of matrices to invert, where the matrices themselves are stored 2309 in the last two dimensions. 2310 check_finite : bool, optional 2311 Whether to check that the input matrices contain only finite numbers. 2312 Disabling may give a performance gain, but may result in problems 2313 (crashes, non-termination) if the inputs do contain infinities or NaNs. 2314 2315 Returns 2316 ------- 2317 x : array 2318 Array of inverses of the matrices ``a_i``. 2319 2320 See Also 2321 -------- 2322 scipy.linalg.cholesky : Cholesky factorization of a matrix 2323 2324 """ 2325 if check_finite: 2326 a1 = asarray_chkfinite(a) 2327 else: 2328 a1 = asarray(a) 2329 if len(a1.shape) < 2 or a1.shape[-2] != a1.shape[-1]: 2330 raise ValueError('expected square matrix in last two dimensions') 2331 2332 potrf, potri = get_lapack_funcs(('potrf', 'potri'), (a1,)) 2333 2334 triu_rows, triu_cols = np.triu_indices(a.shape[-2], k=1) 2335 for index in np.ndindex(a1.shape[:-2]): 2336 2337 # Cholesky decomposition 2338 a1[index], info = potrf(a1[index], lower=True, overwrite_a=False, 2339 clean=False) 2340 if info > 0: 2341 raise LinAlgError("%d-th leading minor not positive definite" 2342 % info) 2343 if info < 0: 2344 raise ValueError('illegal value in %d-th argument of internal' 2345 ' potrf' % -info) 2346 # Inversion 2347 a1[index], info = potri(a1[index], lower=True, overwrite_c=False) 2348 if info > 0: 2349 raise LinAlgError("the inverse could not be computed") 2350 if info < 0: 2351 raise ValueError('illegal value in %d-th argument of internal' 2352 ' potrf' % -info) 2353 2354 # Make symmetric (dpotri only fills in the lower triangle) 2355 a1[index][triu_rows, triu_cols] = a1[index][triu_cols, triu_rows] 2356 2357 return a1 2358 2359 2360class invwishart_gen(wishart_gen): 2361 r"""An inverse Wishart random variable. 2362 2363 The `df` keyword specifies the degrees of freedom. The `scale` keyword 2364 specifies the scale matrix, which must be symmetric and positive definite. 2365 In this context, the scale matrix is often interpreted in terms of a 2366 multivariate normal covariance matrix. 2367 2368 Methods 2369 ------- 2370 ``pdf(x, df, scale)`` 2371 Probability density function. 2372 ``logpdf(x, df, scale)`` 2373 Log of the probability density function. 2374 ``rvs(df, scale, size=1, random_state=None)`` 2375 Draw random samples from an inverse Wishart distribution. 2376 2377 Parameters 2378 ---------- 2379 x : array_like 2380 Quantiles, with the last axis of `x` denoting the components. 2381 %(_doc_default_callparams)s 2382 %(_doc_random_state)s 2383 2384 Alternatively, the object may be called (as a function) to fix the degrees 2385 of freedom and scale parameters, returning a "frozen" inverse Wishart 2386 random variable: 2387 2388 rv = invwishart(df=1, scale=1) 2389 - Frozen object with the same methods but holding the given 2390 degrees of freedom and scale fixed. 2391 2392 See Also 2393 -------- 2394 wishart 2395 2396 Notes 2397 ----- 2398 %(_doc_callparams_note)s 2399 2400 The scale matrix `scale` must be a symmetric positive definite 2401 matrix. Singular matrices, including the symmetric positive semi-definite 2402 case, are not supported. 2403 2404 The inverse Wishart distribution is often denoted 2405 2406 .. math:: 2407 2408 W_p^{-1}(\nu, \Psi) 2409 2410 where :math:`\nu` is the degrees of freedom and :math:`\Psi` is the 2411 :math:`p \times p` scale matrix. 2412 2413 The probability density function for `invwishart` has support over positive 2414 definite matrices :math:`S`; if :math:`S \sim W^{-1}_p(\nu, \Sigma)`, 2415 then its PDF is given by: 2416 2417 .. math:: 2418 2419 f(S) = \frac{|\Sigma|^\frac{\nu}{2}}{2^{ \frac{\nu p}{2} } 2420 |S|^{\frac{\nu + p + 1}{2}} \Gamma_p \left(\frac{\nu}{2} \right)} 2421 \exp\left( -tr(\Sigma S^{-1}) / 2 \right) 2422 2423 If :math:`S \sim W_p^{-1}(\nu, \Psi)` (inverse Wishart) then 2424 :math:`S^{-1} \sim W_p(\nu, \Psi^{-1})` (Wishart). 2425 2426 If the scale matrix is 1-dimensional and equal to one, then the inverse 2427 Wishart distribution :math:`W_1(\nu, 1)` collapses to the 2428 inverse Gamma distribution with parameters shape = :math:`\frac{\nu}{2}` 2429 and scale = :math:`\frac{1}{2}`. 2430 2431 .. versionadded:: 0.16.0 2432 2433 References 2434 ---------- 2435 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach", 2436 Wiley, 1983. 2437 .. [2] M.C. Jones, "Generating Inverse Wishart Matrices", Communications 2438 in Statistics - Simulation and Computation, vol. 14.2, pp.511-514, 2439 1985. 2440 2441 Examples 2442 -------- 2443 >>> import matplotlib.pyplot as plt 2444 >>> from scipy.stats import invwishart, invgamma 2445 >>> x = np.linspace(0.01, 1, 100) 2446 >>> iw = invwishart.pdf(x, df=6, scale=1) 2447 >>> iw[:3] 2448 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03]) 2449 >>> ig = invgamma.pdf(x, 6/2., scale=1./2) 2450 >>> ig[:3] 2451 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03]) 2452 >>> plt.plot(x, iw) 2453 2454 The input quantiles can be any shape of array, as long as the last 2455 axis labels the components. 2456 2457 """ 2458 2459 def __init__(self, seed=None): 2460 super().__init__(seed) 2461 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params) 2462 2463 def __call__(self, df=None, scale=None, seed=None): 2464 """Create a frozen inverse Wishart distribution. 2465 2466 See `invwishart_frozen` for more information. 2467 2468 """ 2469 return invwishart_frozen(df, scale, seed) 2470 2471 def _logpdf(self, x, dim, df, scale, log_det_scale): 2472 """Log of the inverse Wishart probability density function. 2473 2474 Parameters 2475 ---------- 2476 x : ndarray 2477 Points at which to evaluate the log of the probability 2478 density function. 2479 dim : int 2480 Dimension of the scale matrix 2481 df : int 2482 Degrees of freedom 2483 scale : ndarray 2484 Scale matrix 2485 log_det_scale : float 2486 Logarithm of the determinant of the scale matrix 2487 2488 Notes 2489 ----- 2490 As this function does no argument checking, it should not be 2491 called directly; use 'logpdf' instead. 2492 2493 """ 2494 log_det_x = np.empty(x.shape[-1]) 2495 x_inv = np.copy(x).T 2496 if dim > 1: 2497 _cho_inv_batch(x_inv) # works in-place 2498 else: 2499 x_inv = 1./x_inv 2500 tr_scale_x_inv = np.empty(x.shape[-1]) 2501 2502 for i in range(x.shape[-1]): 2503 C, lower = scipy.linalg.cho_factor(x[:, :, i], lower=True) 2504 2505 log_det_x[i] = 2 * np.sum(np.log(C.diagonal())) 2506 2507 tr_scale_x_inv[i] = np.dot(scale, x_inv[i]).trace() 2508 2509 # Log PDF 2510 out = ((0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) - 2511 (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) - 2512 multigammaln(0.5*df, dim)) 2513 2514 return out 2515 2516 def logpdf(self, x, df, scale): 2517 """Log of the inverse Wishart probability density function. 2518 2519 Parameters 2520 ---------- 2521 x : array_like 2522 Quantiles, with the last axis of `x` denoting the components. 2523 Each quantile must be a symmetric positive definite matrix. 2524 %(_doc_default_callparams)s 2525 2526 Returns 2527 ------- 2528 pdf : ndarray 2529 Log of the probability density function evaluated at `x` 2530 2531 Notes 2532 ----- 2533 %(_doc_callparams_note)s 2534 2535 """ 2536 dim, df, scale = self._process_parameters(df, scale) 2537 x = self._process_quantiles(x, dim) 2538 _, log_det_scale = self._cholesky_logdet(scale) 2539 out = self._logpdf(x, dim, df, scale, log_det_scale) 2540 return _squeeze_output(out) 2541 2542 def pdf(self, x, df, scale): 2543 """Inverse Wishart probability density function. 2544 2545 Parameters 2546 ---------- 2547 x : array_like 2548 Quantiles, with the last axis of `x` denoting the components. 2549 Each quantile must be a symmetric positive definite matrix. 2550 %(_doc_default_callparams)s 2551 2552 Returns 2553 ------- 2554 pdf : ndarray 2555 Probability density function evaluated at `x` 2556 2557 Notes 2558 ----- 2559 %(_doc_callparams_note)s 2560 2561 """ 2562 return np.exp(self.logpdf(x, df, scale)) 2563 2564 def _mean(self, dim, df, scale): 2565 """Mean of the inverse Wishart distribution. 2566 2567 Parameters 2568 ---------- 2569 dim : int 2570 Dimension of the scale matrix 2571 %(_doc_default_callparams)s 2572 2573 Notes 2574 ----- 2575 As this function does no argument checking, it should not be 2576 called directly; use 'mean' instead. 2577 2578 """ 2579 if df > dim + 1: 2580 out = scale / (df - dim - 1) 2581 else: 2582 out = None 2583 return out 2584 2585 def mean(self, df, scale): 2586 """Mean of the inverse Wishart distribution. 2587 2588 Only valid if the degrees of freedom are greater than the dimension of 2589 the scale matrix plus one. 2590 2591 Parameters 2592 ---------- 2593 %(_doc_default_callparams)s 2594 2595 Returns 2596 ------- 2597 mean : float or None 2598 The mean of the distribution 2599 2600 """ 2601 dim, df, scale = self._process_parameters(df, scale) 2602 out = self._mean(dim, df, scale) 2603 return _squeeze_output(out) if out is not None else out 2604 2605 def _mode(self, dim, df, scale): 2606 """Mode of the inverse Wishart distribution. 2607 2608 Parameters 2609 ---------- 2610 dim : int 2611 Dimension of the scale matrix 2612 %(_doc_default_callparams)s 2613 2614 Notes 2615 ----- 2616 As this function does no argument checking, it should not be 2617 called directly; use 'mode' instead. 2618 2619 """ 2620 return scale / (df + dim + 1) 2621 2622 def mode(self, df, scale): 2623 """Mode of the inverse Wishart distribution. 2624 2625 Parameters 2626 ---------- 2627 %(_doc_default_callparams)s 2628 2629 Returns 2630 ------- 2631 mode : float 2632 The Mode of the distribution 2633 2634 """ 2635 dim, df, scale = self._process_parameters(df, scale) 2636 out = self._mode(dim, df, scale) 2637 return _squeeze_output(out) 2638 2639 def _var(self, dim, df, scale): 2640 """Variance of the inverse Wishart distribution. 2641 2642 Parameters 2643 ---------- 2644 dim : int 2645 Dimension of the scale matrix 2646 %(_doc_default_callparams)s 2647 2648 Notes 2649 ----- 2650 As this function does no argument checking, it should not be 2651 called directly; use 'var' instead. 2652 2653 """ 2654 if df > dim + 3: 2655 var = (df - dim + 1) * scale**2 2656 diag = scale.diagonal() # 1 x dim array 2657 var += (df - dim - 1) * np.outer(diag, diag) 2658 var /= (df - dim) * (df - dim - 1)**2 * (df - dim - 3) 2659 else: 2660 var = None 2661 return var 2662 2663 def var(self, df, scale): 2664 """Variance of the inverse Wishart distribution. 2665 2666 Only valid if the degrees of freedom are greater than the dimension of 2667 the scale matrix plus three. 2668 2669 Parameters 2670 ---------- 2671 %(_doc_default_callparams)s 2672 2673 Returns 2674 ------- 2675 var : float 2676 The variance of the distribution 2677 """ 2678 dim, df, scale = self._process_parameters(df, scale) 2679 out = self._var(dim, df, scale) 2680 return _squeeze_output(out) if out is not None else out 2681 2682 def _rvs(self, n, shape, dim, df, C, random_state): 2683 """Draw random samples from an inverse Wishart distribution. 2684 2685 Parameters 2686 ---------- 2687 n : integer 2688 Number of variates to generate 2689 shape : iterable 2690 Shape of the variates to generate 2691 dim : int 2692 Dimension of the scale matrix 2693 df : int 2694 Degrees of freedom 2695 C : ndarray 2696 Cholesky factorization of the scale matrix, lower triagular. 2697 %(_doc_random_state)s 2698 2699 Notes 2700 ----- 2701 As this function does no argument checking, it should not be 2702 called directly; use 'rvs' instead. 2703 2704 """ 2705 random_state = self._get_random_state(random_state) 2706 # Get random draws A such that A ~ W(df, I) 2707 A = super()._standard_rvs(n, shape, dim, df, random_state) 2708 2709 # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale) 2710 eye = np.eye(dim) 2711 trtrs = get_lapack_funcs(('trtrs'), (A,)) 2712 2713 for index in np.ndindex(A.shape[:-2]): 2714 # Calculate CA 2715 CA = np.dot(C, A[index]) 2716 # Get (C A)^{-1} via triangular solver 2717 if dim > 1: 2718 CA, info = trtrs(CA, eye, lower=True) 2719 if info > 0: 2720 raise LinAlgError("Singular matrix.") 2721 if info < 0: 2722 raise ValueError('Illegal value in %d-th argument of' 2723 ' internal trtrs' % -info) 2724 else: 2725 CA = 1. / CA 2726 # Get SA 2727 A[index] = np.dot(CA.T, CA) 2728 2729 return A 2730 2731 def rvs(self, df, scale, size=1, random_state=None): 2732 """Draw random samples from an inverse Wishart distribution. 2733 2734 Parameters 2735 ---------- 2736 %(_doc_default_callparams)s 2737 size : integer or iterable of integers, optional 2738 Number of samples to draw (default 1). 2739 %(_doc_random_state)s 2740 2741 Returns 2742 ------- 2743 rvs : ndarray 2744 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is 2745 the dimension of the scale matrix. 2746 2747 Notes 2748 ----- 2749 %(_doc_callparams_note)s 2750 2751 """ 2752 n, shape = self._process_size(size) 2753 dim, df, scale = self._process_parameters(df, scale) 2754 2755 # Invert the scale 2756 eye = np.eye(dim) 2757 L, lower = scipy.linalg.cho_factor(scale, lower=True) 2758 inv_scale = scipy.linalg.cho_solve((L, lower), eye) 2759 # Cholesky decomposition of inverted scale 2760 C = scipy.linalg.cholesky(inv_scale, lower=True) 2761 2762 out = self._rvs(n, shape, dim, df, C, random_state) 2763 2764 return _squeeze_output(out) 2765 2766 def entropy(self): 2767 # Need to find reference for inverse Wishart entropy 2768 raise AttributeError 2769 2770 2771invwishart = invwishart_gen() 2772 2773 2774class invwishart_frozen(multi_rv_frozen): 2775 def __init__(self, df, scale, seed=None): 2776 """Create a frozen inverse Wishart distribution. 2777 2778 Parameters 2779 ---------- 2780 df : array_like 2781 Degrees of freedom of the distribution 2782 scale : array_like 2783 Scale matrix of the distribution 2784 seed : {None, int, `numpy.random.Generator`}, optional 2785 If `seed` is None the `numpy.random.Generator` singleton is used. 2786 If `seed` is an int, a new ``Generator`` instance is used, 2787 seeded with `seed`. 2788 If `seed` is already a ``Generator`` instance then that instance is 2789 used. 2790 2791 """ 2792 self._dist = invwishart_gen(seed) 2793 self.dim, self.df, self.scale = self._dist._process_parameters( 2794 df, scale 2795 ) 2796 2797 # Get the determinant via Cholesky factorization 2798 C, lower = scipy.linalg.cho_factor(self.scale, lower=True) 2799 self.log_det_scale = 2 * np.sum(np.log(C.diagonal())) 2800 2801 # Get the inverse using the Cholesky factorization 2802 eye = np.eye(self.dim) 2803 self.inv_scale = scipy.linalg.cho_solve((C, lower), eye) 2804 2805 # Get the Cholesky factorization of the inverse scale 2806 self.C = scipy.linalg.cholesky(self.inv_scale, lower=True) 2807 2808 def logpdf(self, x): 2809 x = self._dist._process_quantiles(x, self.dim) 2810 out = self._dist._logpdf(x, self.dim, self.df, self.scale, 2811 self.log_det_scale) 2812 return _squeeze_output(out) 2813 2814 def pdf(self, x): 2815 return np.exp(self.logpdf(x)) 2816 2817 def mean(self): 2818 out = self._dist._mean(self.dim, self.df, self.scale) 2819 return _squeeze_output(out) if out is not None else out 2820 2821 def mode(self): 2822 out = self._dist._mode(self.dim, self.df, self.scale) 2823 return _squeeze_output(out) 2824 2825 def var(self): 2826 out = self._dist._var(self.dim, self.df, self.scale) 2827 return _squeeze_output(out) if out is not None else out 2828 2829 def rvs(self, size=1, random_state=None): 2830 n, shape = self._dist._process_size(size) 2831 2832 out = self._dist._rvs(n, shape, self.dim, self.df, 2833 self.C, random_state) 2834 2835 return _squeeze_output(out) 2836 2837 def entropy(self): 2838 # Need to find reference for inverse Wishart entropy 2839 raise AttributeError 2840 2841 2842# Set frozen generator docstrings from corresponding docstrings in 2843# inverse Wishart and fill in default strings in class docstrings 2844for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs']: 2845 method = invwishart_gen.__dict__[name] 2846 method_frozen = wishart_frozen.__dict__[name] 2847 method_frozen.__doc__ = doccer.docformat( 2848 method.__doc__, wishart_docdict_noparams) 2849 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params) 2850 2851_multinomial_doc_default_callparams = """\ 2852n : int 2853 Number of trials 2854p : array_like 2855 Probability of a trial falling into each category; should sum to 1 2856""" 2857 2858_multinomial_doc_callparams_note = \ 2859"""`n` should be a positive integer. Each element of `p` should be in the 2860interval :math:`[0,1]` and the elements should sum to 1. If they do not sum to 28611, the last element of the `p` array is not used and is replaced with the 2862remaining probability left over from the earlier elements. 2863""" 2864 2865_multinomial_doc_frozen_callparams = "" 2866 2867_multinomial_doc_frozen_callparams_note = \ 2868 """See class definition for a detailed description of parameters.""" 2869 2870multinomial_docdict_params = { 2871 '_doc_default_callparams': _multinomial_doc_default_callparams, 2872 '_doc_callparams_note': _multinomial_doc_callparams_note, 2873 '_doc_random_state': _doc_random_state 2874} 2875 2876multinomial_docdict_noparams = { 2877 '_doc_default_callparams': _multinomial_doc_frozen_callparams, 2878 '_doc_callparams_note': _multinomial_doc_frozen_callparams_note, 2879 '_doc_random_state': _doc_random_state 2880} 2881 2882 2883class multinomial_gen(multi_rv_generic): 2884 r"""A multinomial random variable. 2885 2886 Methods 2887 ------- 2888 ``pmf(x, n, p)`` 2889 Probability mass function. 2890 ``logpmf(x, n, p)`` 2891 Log of the probability mass function. 2892 ``rvs(n, p, size=1, random_state=None)`` 2893 Draw random samples from a multinomial distribution. 2894 ``entropy(n, p)`` 2895 Compute the entropy of the multinomial distribution. 2896 ``cov(n, p)`` 2897 Compute the covariance matrix of the multinomial distribution. 2898 2899 Parameters 2900 ---------- 2901 x : array_like 2902 Quantiles, with the last axis of `x` denoting the components. 2903 %(_doc_default_callparams)s 2904 %(_doc_random_state)s 2905 2906 Notes 2907 ----- 2908 %(_doc_callparams_note)s 2909 2910 Alternatively, the object may be called (as a function) to fix the `n` and 2911 `p` parameters, returning a "frozen" multinomial random variable: 2912 2913 The probability mass function for `multinomial` is 2914 2915 .. math:: 2916 2917 f(x) = \frac{n!}{x_1! \cdots x_k!} p_1^{x_1} \cdots p_k^{x_k}, 2918 2919 supported on :math:`x=(x_1, \ldots, x_k)` where each :math:`x_i` is a 2920 nonnegative integer and their sum is :math:`n`. 2921 2922 .. versionadded:: 0.19.0 2923 2924 Examples 2925 -------- 2926 2927 >>> from scipy.stats import multinomial 2928 >>> rv = multinomial(8, [0.3, 0.2, 0.5]) 2929 >>> rv.pmf([1, 3, 4]) 2930 0.042000000000000072 2931 2932 The multinomial distribution for :math:`k=2` is identical to the 2933 corresponding binomial distribution (tiny numerical differences 2934 notwithstanding): 2935 2936 >>> from scipy.stats import binom 2937 >>> multinomial.pmf([3, 4], n=7, p=[0.4, 0.6]) 2938 0.29030399999999973 2939 >>> binom.pmf(3, 7, 0.4) 2940 0.29030400000000012 2941 2942 The functions ``pmf``, ``logpmf``, ``entropy``, and ``cov`` support 2943 broadcasting, under the convention that the vector parameters (``x`` and 2944 ``p``) are interpreted as if each row along the last axis is a single 2945 object. For instance: 2946 2947 >>> multinomial.pmf([[3, 4], [3, 5]], n=[7, 8], p=[.3, .7]) 2948 array([0.2268945, 0.25412184]) 2949 2950 Here, ``x.shape == (2, 2)``, ``n.shape == (2,)``, and ``p.shape == (2,)``, 2951 but following the rules mentioned above they behave as if the rows 2952 ``[3, 4]`` and ``[3, 5]`` in ``x`` and ``[.3, .7]`` in ``p`` were a single 2953 object, and as if we had ``x.shape = (2,)``, ``n.shape = (2,)``, and 2954 ``p.shape = ()``. To obtain the individual elements without broadcasting, 2955 we would do this: 2956 2957 >>> multinomial.pmf([3, 4], n=7, p=[.3, .7]) 2958 0.2268945 2959 >>> multinomial.pmf([3, 5], 8, p=[.3, .7]) 2960 0.25412184 2961 2962 This broadcasting also works for ``cov``, where the output objects are 2963 square matrices of size ``p.shape[-1]``. For example: 2964 2965 >>> multinomial.cov([4, 5], [[.3, .7], [.4, .6]]) 2966 array([[[ 0.84, -0.84], 2967 [-0.84, 0.84]], 2968 [[ 1.2 , -1.2 ], 2969 [-1.2 , 1.2 ]]]) 2970 2971 In this example, ``n.shape == (2,)`` and ``p.shape == (2, 2)``, and 2972 following the rules above, these broadcast as if ``p.shape == (2,)``. 2973 Thus the result should also be of shape ``(2,)``, but since each output is 2974 a :math:`2 \times 2` matrix, the result in fact has shape ``(2, 2, 2)``, 2975 where ``result[0]`` is equal to ``multinomial.cov(n=4, p=[.3, .7])`` and 2976 ``result[1]`` is equal to ``multinomial.cov(n=5, p=[.4, .6])``. 2977 2978 See also 2979 -------- 2980 scipy.stats.binom : The binomial distribution. 2981 numpy.random.Generator.multinomial : Sampling from the multinomial distribution. 2982 scipy.stats.multivariate_hypergeom : 2983 The multivariate hypergeometric distribution. 2984 """ # noqa: E501 2985 2986 def __init__(self, seed=None): 2987 super().__init__(seed) 2988 self.__doc__ = \ 2989 doccer.docformat(self.__doc__, multinomial_docdict_params) 2990 2991 def __call__(self, n, p, seed=None): 2992 """Create a frozen multinomial distribution. 2993 2994 See `multinomial_frozen` for more information. 2995 """ 2996 return multinomial_frozen(n, p, seed) 2997 2998 def _process_parameters(self, n, p): 2999 """Returns: n_, p_, npcond. 3000 3001 n_ and p_ are arrays of the correct shape; npcond is a boolean array 3002 flagging values out of the domain. 3003 """ 3004 p = np.array(p, dtype=np.float64, copy=True) 3005 p[..., -1] = 1. - p[..., :-1].sum(axis=-1) 3006 3007 # true for bad p 3008 pcond = np.any(p < 0, axis=-1) 3009 pcond |= np.any(p > 1, axis=-1) 3010 3011 n = np.array(n, dtype=np.int_, copy=True) 3012 3013 # true for bad n 3014 ncond = n <= 0 3015 3016 return n, p, ncond | pcond 3017 3018 def _process_quantiles(self, x, n, p): 3019 """Returns: x_, xcond. 3020 3021 x_ is an int array; xcond is a boolean array flagging values out of the 3022 domain. 3023 """ 3024 xx = np.asarray(x, dtype=np.int_) 3025 3026 if xx.ndim == 0: 3027 raise ValueError("x must be an array.") 3028 3029 if xx.size != 0 and not xx.shape[-1] == p.shape[-1]: 3030 raise ValueError("Size of each quantile should be size of p: " 3031 "received %d, but expected %d." % 3032 (xx.shape[-1], p.shape[-1])) 3033 3034 # true for x out of the domain 3035 cond = np.any(xx != x, axis=-1) 3036 cond |= np.any(xx < 0, axis=-1) 3037 cond = cond | (np.sum(xx, axis=-1) != n) 3038 3039 return xx, cond 3040 3041 def _checkresult(self, result, cond, bad_value): 3042 result = np.asarray(result) 3043 3044 if cond.ndim != 0: 3045 result[cond] = bad_value 3046 elif cond: 3047 if result.ndim == 0: 3048 return bad_value 3049 result[...] = bad_value 3050 return result 3051 3052 def _logpmf(self, x, n, p): 3053 return gammaln(n+1) + np.sum(xlogy(x, p) - gammaln(x+1), axis=-1) 3054 3055 def logpmf(self, x, n, p): 3056 """Log of the Multinomial probability mass function. 3057 3058 Parameters 3059 ---------- 3060 x : array_like 3061 Quantiles, with the last axis of `x` denoting the components. 3062 %(_doc_default_callparams)s 3063 3064 Returns 3065 ------- 3066 logpmf : ndarray or scalar 3067 Log of the probability mass function evaluated at `x` 3068 3069 Notes 3070 ----- 3071 %(_doc_callparams_note)s 3072 """ 3073 n, p, npcond = self._process_parameters(n, p) 3074 x, xcond = self._process_quantiles(x, n, p) 3075 3076 result = self._logpmf(x, n, p) 3077 3078 # replace values for which x was out of the domain; broadcast 3079 # xcond to the right shape 3080 xcond_ = xcond | np.zeros(npcond.shape, dtype=np.bool_) 3081 result = self._checkresult(result, xcond_, np.NINF) 3082 3083 # replace values bad for n or p; broadcast npcond to the right shape 3084 npcond_ = npcond | np.zeros(xcond.shape, dtype=np.bool_) 3085 return self._checkresult(result, npcond_, np.NAN) 3086 3087 def pmf(self, x, n, p): 3088 """Multinomial probability mass function. 3089 3090 Parameters 3091 ---------- 3092 x : array_like 3093 Quantiles, with the last axis of `x` denoting the components. 3094 %(_doc_default_callparams)s 3095 3096 Returns 3097 ------- 3098 pmf : ndarray or scalar 3099 Probability density function evaluated at `x` 3100 3101 Notes 3102 ----- 3103 %(_doc_callparams_note)s 3104 """ 3105 return np.exp(self.logpmf(x, n, p)) 3106 3107 def mean(self, n, p): 3108 """Mean of the Multinomial distribution. 3109 3110 Parameters 3111 ---------- 3112 %(_doc_default_callparams)s 3113 3114 Returns 3115 ------- 3116 mean : float 3117 The mean of the distribution 3118 """ 3119 n, p, npcond = self._process_parameters(n, p) 3120 result = n[..., np.newaxis]*p 3121 return self._checkresult(result, npcond, np.NAN) 3122 3123 def cov(self, n, p): 3124 """Covariance matrix of the multinomial distribution. 3125 3126 Parameters 3127 ---------- 3128 %(_doc_default_callparams)s 3129 3130 Returns 3131 ------- 3132 cov : ndarray 3133 The covariance matrix of the distribution 3134 """ 3135 n, p, npcond = self._process_parameters(n, p) 3136 3137 nn = n[..., np.newaxis, np.newaxis] 3138 result = nn * np.einsum('...j,...k->...jk', -p, p) 3139 3140 # change the diagonal 3141 for i in range(p.shape[-1]): 3142 result[..., i, i] += n*p[..., i] 3143 3144 return self._checkresult(result, npcond, np.nan) 3145 3146 def entropy(self, n, p): 3147 r"""Compute the entropy of the multinomial distribution. 3148 3149 The entropy is computed using this expression: 3150 3151 .. math:: 3152 3153 f(x) = - \log n! - n\sum_{i=1}^k p_i \log p_i + 3154 \sum_{i=1}^k \sum_{x=0}^n \binom n x p_i^x(1-p_i)^{n-x} \log x! 3155 3156 Parameters 3157 ---------- 3158 %(_doc_default_callparams)s 3159 3160 Returns 3161 ------- 3162 h : scalar 3163 Entropy of the Multinomial distribution 3164 3165 Notes 3166 ----- 3167 %(_doc_callparams_note)s 3168 """ 3169 n, p, npcond = self._process_parameters(n, p) 3170 3171 x = np.r_[1:np.max(n)+1] 3172 3173 term1 = n*np.sum(entr(p), axis=-1) 3174 term1 -= gammaln(n+1) 3175 3176 n = n[..., np.newaxis] 3177 new_axes_needed = max(p.ndim, n.ndim) - x.ndim + 1 3178 x.shape += (1,)*new_axes_needed 3179 3180 term2 = np.sum(binom.pmf(x, n, p)*gammaln(x+1), 3181 axis=(-1, -1-new_axes_needed)) 3182 3183 return self._checkresult(term1 + term2, npcond, np.nan) 3184 3185 def rvs(self, n, p, size=None, random_state=None): 3186 """Draw random samples from a Multinomial distribution. 3187 3188 Parameters 3189 ---------- 3190 %(_doc_default_callparams)s 3191 size : integer or iterable of integers, optional 3192 Number of samples to draw (default 1). 3193 %(_doc_random_state)s 3194 3195 Returns 3196 ------- 3197 rvs : ndarray or scalar 3198 Random variates of shape (`size`, `len(p)`) 3199 3200 Notes 3201 ----- 3202 %(_doc_callparams_note)s 3203 """ 3204 n, p, npcond = self._process_parameters(n, p) 3205 random_state = self._get_random_state(random_state) 3206 return random_state.multinomial(n, p, size) 3207 3208 3209multinomial = multinomial_gen() 3210 3211 3212class multinomial_frozen(multi_rv_frozen): 3213 r"""Create a frozen Multinomial distribution. 3214 3215 Parameters 3216 ---------- 3217 n : int 3218 number of trials 3219 p: array_like 3220 probability of a trial falling into each category; should sum to 1 3221 seed : {None, int, `numpy.random.Generator`, 3222 `numpy.random.RandomState`}, optional 3223 3224 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 3225 singleton is used. 3226 If `seed` is an int, a new ``RandomState`` instance is used, 3227 seeded with `seed`. 3228 If `seed` is already a ``Generator`` or ``RandomState`` instance then 3229 that instance is used. 3230 """ 3231 def __init__(self, n, p, seed=None): 3232 self._dist = multinomial_gen(seed) 3233 self.n, self.p, self.npcond = self._dist._process_parameters(n, p) 3234 3235 # monkey patch self._dist 3236 def _process_parameters(n, p): 3237 return self.n, self.p, self.npcond 3238 3239 self._dist._process_parameters = _process_parameters 3240 3241 def logpmf(self, x): 3242 return self._dist.logpmf(x, self.n, self.p) 3243 3244 def pmf(self, x): 3245 return self._dist.pmf(x, self.n, self.p) 3246 3247 def mean(self): 3248 return self._dist.mean(self.n, self.p) 3249 3250 def cov(self): 3251 return self._dist.cov(self.n, self.p) 3252 3253 def entropy(self): 3254 return self._dist.entropy(self.n, self.p) 3255 3256 def rvs(self, size=1, random_state=None): 3257 return self._dist.rvs(self.n, self.p, size, random_state) 3258 3259 3260# Set frozen generator docstrings from corresponding docstrings in 3261# multinomial and fill in default strings in class docstrings 3262for name in ['logpmf', 'pmf', 'mean', 'cov', 'rvs']: 3263 method = multinomial_gen.__dict__[name] 3264 method_frozen = multinomial_frozen.__dict__[name] 3265 method_frozen.__doc__ = doccer.docformat( 3266 method.__doc__, multinomial_docdict_noparams) 3267 method.__doc__ = doccer.docformat(method.__doc__, 3268 multinomial_docdict_params) 3269 3270 3271class special_ortho_group_gen(multi_rv_generic): 3272 r"""A matrix-valued SO(N) random variable. 3273 3274 Return a random rotation matrix, drawn from the Haar distribution 3275 (the only uniform distribution on SO(n)). 3276 3277 The `dim` keyword specifies the dimension N. 3278 3279 Methods 3280 ------- 3281 ``rvs(dim=None, size=1, random_state=None)`` 3282 Draw random samples from SO(N). 3283 3284 Parameters 3285 ---------- 3286 dim : scalar 3287 Dimension of matrices 3288 3289 Notes 3290 ----- 3291 This class is wrapping the random_rot code from the MDP Toolkit, 3292 https://github.com/mdp-toolkit/mdp-toolkit 3293 3294 Return a random rotation matrix, drawn from the Haar distribution 3295 (the only uniform distribution on SO(n)). 3296 The algorithm is described in the paper 3297 Stewart, G.W., "The efficient generation of random orthogonal 3298 matrices with an application to condition estimators", SIAM Journal 3299 on Numerical Analysis, 17(3), pp. 403-409, 1980. 3300 For more information see 3301 https://en.wikipedia.org/wiki/Orthogonal_matrix#Randomization 3302 3303 See also the similar `ortho_group`. For a random rotation in three 3304 dimensions, see `scipy.spatial.transform.Rotation.random`. 3305 3306 Examples 3307 -------- 3308 >>> from scipy.stats import special_ortho_group 3309 >>> x = special_ortho_group.rvs(3) 3310 3311 >>> np.dot(x, x.T) 3312 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16], 3313 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16], 3314 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) 3315 3316 >>> import scipy.linalg 3317 >>> scipy.linalg.det(x) 3318 1.0 3319 3320 This generates one random matrix from SO(3). It is orthogonal and 3321 has a determinant of 1. 3322 3323 See Also 3324 -------- 3325 ortho_group, scipy.spatial.transform.Rotation.random 3326 3327 """ 3328 3329 def __init__(self, seed=None): 3330 super().__init__(seed) 3331 self.__doc__ = doccer.docformat(self.__doc__) 3332 3333 def __call__(self, dim=None, seed=None): 3334 """Create a frozen SO(N) distribution. 3335 3336 See `special_ortho_group_frozen` for more information. 3337 """ 3338 return special_ortho_group_frozen(dim, seed=seed) 3339 3340 def _process_parameters(self, dim): 3341 """Dimension N must be specified; it cannot be inferred.""" 3342 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim): 3343 raise ValueError("""Dimension of rotation must be specified, 3344 and must be a scalar greater than 1.""") 3345 3346 return dim 3347 3348 def rvs(self, dim, size=1, random_state=None): 3349 """Draw random samples from SO(N). 3350 3351 Parameters 3352 ---------- 3353 dim : integer 3354 Dimension of rotation space (N). 3355 size : integer, optional 3356 Number of samples to draw (default 1). 3357 3358 Returns 3359 ------- 3360 rvs : ndarray or scalar 3361 Random size N-dimensional matrices, dimension (size, dim, dim) 3362 3363 """ 3364 random_state = self._get_random_state(random_state) 3365 3366 size = int(size) 3367 if size > 1: 3368 return np.array([self.rvs(dim, size=1, random_state=random_state) 3369 for i in range(size)]) 3370 3371 dim = self._process_parameters(dim) 3372 3373 H = np.eye(dim) 3374 D = np.empty((dim,)) 3375 for n in range(dim-1): 3376 x = random_state.normal(size=(dim-n,)) 3377 norm2 = np.dot(x, x) 3378 x0 = x[0].item() 3379 D[n] = np.sign(x[0]) if x[0] != 0 else 1 3380 x[0] += D[n]*np.sqrt(norm2) 3381 x /= np.sqrt((norm2 - x0**2 + x[0]**2) / 2.) 3382 # Householder transformation 3383 H[:, n:] -= np.outer(np.dot(H[:, n:], x), x) 3384 D[-1] = (-1)**(dim-1)*D[:-1].prod() 3385 # Equivalent to np.dot(np.diag(D), H) but faster, apparently 3386 H = (D*H.T).T 3387 return H 3388 3389 3390special_ortho_group = special_ortho_group_gen() 3391 3392 3393class special_ortho_group_frozen(multi_rv_frozen): 3394 def __init__(self, dim=None, seed=None): 3395 """Create a frozen SO(N) distribution. 3396 3397 Parameters 3398 ---------- 3399 dim : scalar 3400 Dimension of matrices 3401 seed : {None, int, `numpy.random.Generator`, 3402 `numpy.random.RandomState`}, optional 3403 3404 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 3405 singleton is used. 3406 If `seed` is an int, a new ``RandomState`` instance is used, 3407 seeded with `seed`. 3408 If `seed` is already a ``Generator`` or ``RandomState`` instance 3409 then that instance is used. 3410 3411 Examples 3412 -------- 3413 >>> from scipy.stats import special_ortho_group 3414 >>> g = special_ortho_group(5) 3415 >>> x = g.rvs() 3416 3417 """ 3418 self._dist = special_ortho_group_gen(seed) 3419 self.dim = self._dist._process_parameters(dim) 3420 3421 def rvs(self, size=1, random_state=None): 3422 return self._dist.rvs(self.dim, size, random_state) 3423 3424 3425class ortho_group_gen(multi_rv_generic): 3426 r"""A matrix-valued O(N) random variable. 3427 3428 Return a random orthogonal matrix, drawn from the O(N) Haar 3429 distribution (the only uniform distribution on O(N)). 3430 3431 The `dim` keyword specifies the dimension N. 3432 3433 Methods 3434 ------- 3435 ``rvs(dim=None, size=1, random_state=None)`` 3436 Draw random samples from O(N). 3437 3438 Parameters 3439 ---------- 3440 dim : scalar 3441 Dimension of matrices 3442 3443 Notes 3444 ----- 3445 This class is closely related to `special_ortho_group`. 3446 3447 Some care is taken to avoid numerical error, as per the paper by Mezzadri. 3448 3449 References 3450 ---------- 3451 .. [1] F. Mezzadri, "How to generate random matrices from the classical 3452 compact groups", :arXiv:`math-ph/0609050v2`. 3453 3454 Examples 3455 -------- 3456 >>> from scipy.stats import ortho_group 3457 >>> x = ortho_group.rvs(3) 3458 3459 >>> np.dot(x, x.T) 3460 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16], 3461 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16], 3462 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) 3463 3464 >>> import scipy.linalg 3465 >>> np.fabs(scipy.linalg.det(x)) 3466 1.0 3467 3468 This generates one random matrix from O(3). It is orthogonal and 3469 has a determinant of +1 or -1. 3470 3471 """ 3472 3473 def __init__(self, seed=None): 3474 super().__init__(seed) 3475 self.__doc__ = doccer.docformat(self.__doc__) 3476 3477 def _process_parameters(self, dim): 3478 """Dimension N must be specified; it cannot be inferred.""" 3479 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim): 3480 raise ValueError("Dimension of rotation must be specified," 3481 "and must be a scalar greater than 1.") 3482 3483 return dim 3484 3485 def rvs(self, dim, size=1, random_state=None): 3486 """Draw random samples from O(N). 3487 3488 Parameters 3489 ---------- 3490 dim : integer 3491 Dimension of rotation space (N). 3492 size : integer, optional 3493 Number of samples to draw (default 1). 3494 3495 Returns 3496 ------- 3497 rvs : ndarray or scalar 3498 Random size N-dimensional matrices, dimension (size, dim, dim) 3499 3500 """ 3501 random_state = self._get_random_state(random_state) 3502 3503 size = int(size) 3504 if size > 1: 3505 return np.array([self.rvs(dim, size=1, random_state=random_state) 3506 for i in range(size)]) 3507 3508 dim = self._process_parameters(dim) 3509 3510 H = np.eye(dim) 3511 for n in range(dim): 3512 x = random_state.normal(size=(dim-n,)) 3513 norm2 = np.dot(x, x) 3514 x0 = x[0].item() 3515 # random sign, 50/50, but chosen carefully to avoid roundoff error 3516 D = np.sign(x[0]) if x[0] != 0 else 1 3517 x[0] += D * np.sqrt(norm2) 3518 x /= np.sqrt((norm2 - x0**2 + x[0]**2) / 2.) 3519 # Householder transformation 3520 H[:, n:] = -D * (H[:, n:] - np.outer(np.dot(H[:, n:], x), x)) 3521 return H 3522 3523 3524ortho_group = ortho_group_gen() 3525 3526 3527class random_correlation_gen(multi_rv_generic): 3528 r"""A random correlation matrix. 3529 3530 Return a random correlation matrix, given a vector of eigenvalues. 3531 3532 The `eigs` keyword specifies the eigenvalues of the correlation matrix, 3533 and implies the dimension. 3534 3535 Methods 3536 ------- 3537 ``rvs(eigs=None, random_state=None)`` 3538 Draw random correlation matrices, all with eigenvalues eigs. 3539 3540 Parameters 3541 ---------- 3542 eigs : 1d ndarray 3543 Eigenvalues of correlation matrix. 3544 3545 Notes 3546 ----- 3547 3548 Generates a random correlation matrix following a numerically stable 3549 algorithm spelled out by Davies & Higham. This algorithm uses a single O(N) 3550 similarity transformation to construct a symmetric positive semi-definite 3551 matrix, and applies a series of Givens rotations to scale it to have ones 3552 on the diagonal. 3553 3554 References 3555 ---------- 3556 3557 .. [1] Davies, Philip I; Higham, Nicholas J; "Numerically stable generation 3558 of correlation matrices and their factors", BIT 2000, Vol. 40, 3559 No. 4, pp. 640 651 3560 3561 Examples 3562 -------- 3563 >>> from scipy.stats import random_correlation 3564 >>> rng = np.random.default_rng() 3565 >>> x = random_correlation.rvs((.5, .8, 1.2, 1.5), random_state=rng) 3566 >>> x 3567 array([[ 1. , -0.07198934, -0.20411041, -0.24385796], 3568 [-0.07198934, 1. , 0.12968613, -0.29471382], 3569 [-0.20411041, 0.12968613, 1. , 0.2828693 ], 3570 [-0.24385796, -0.29471382, 0.2828693 , 1. ]]) 3571 >>> import scipy.linalg 3572 >>> e, v = scipy.linalg.eigh(x) 3573 >>> e 3574 array([ 0.5, 0.8, 1.2, 1.5]) 3575 3576 """ 3577 3578 def __init__(self, seed=None): 3579 super().__init__(seed) 3580 self.__doc__ = doccer.docformat(self.__doc__) 3581 3582 def _process_parameters(self, eigs, tol): 3583 eigs = np.asarray(eigs, dtype=float) 3584 dim = eigs.size 3585 3586 if eigs.ndim != 1 or eigs.shape[0] != dim or dim <= 1: 3587 raise ValueError("Array 'eigs' must be a vector of length " 3588 "greater than 1.") 3589 3590 if np.fabs(np.sum(eigs) - dim) > tol: 3591 raise ValueError("Sum of eigenvalues must equal dimensionality.") 3592 3593 for x in eigs: 3594 if x < -tol: 3595 raise ValueError("All eigenvalues must be non-negative.") 3596 3597 return dim, eigs 3598 3599 def _givens_to_1(self, aii, ajj, aij): 3600 """Computes a 2x2 Givens matrix to put 1's on the diagonal. 3601 3602 The input matrix is a 2x2 symmetric matrix M = [ aii aij ; aij ajj ]. 3603 3604 The output matrix g is a 2x2 anti-symmetric matrix of the form 3605 [ c s ; -s c ]; the elements c and s are returned. 3606 3607 Applying the output matrix to the input matrix (as b=g.T M g) 3608 results in a matrix with bii=1, provided tr(M) - det(M) >= 1 3609 and floating point issues do not occur. Otherwise, some other 3610 valid rotation is returned. When tr(M)==2, also bjj=1. 3611 3612 """ 3613 aiid = aii - 1. 3614 ajjd = ajj - 1. 3615 3616 if ajjd == 0: 3617 # ajj==1, so swap aii and ajj to avoid division by zero 3618 return 0., 1. 3619 3620 dd = math.sqrt(max(aij**2 - aiid*ajjd, 0)) 3621 3622 # The choice of t should be chosen to avoid cancellation [1] 3623 t = (aij + math.copysign(dd, aij)) / ajjd 3624 c = 1. / math.sqrt(1. + t*t) 3625 if c == 0: 3626 # Underflow 3627 s = 1.0 3628 else: 3629 s = c*t 3630 return c, s 3631 3632 def _to_corr(self, m): 3633 """ 3634 Given a psd matrix m, rotate to put one's on the diagonal, turning it 3635 into a correlation matrix. This also requires the trace equal the 3636 dimensionality. Note: modifies input matrix 3637 """ 3638 # Check requirements for in-place Givens 3639 if not (m.flags.c_contiguous and m.dtype == np.float64 and 3640 m.shape[0] == m.shape[1]): 3641 raise ValueError() 3642 3643 d = m.shape[0] 3644 for i in range(d-1): 3645 if m[i, i] == 1: 3646 continue 3647 elif m[i, i] > 1: 3648 for j in range(i+1, d): 3649 if m[j, j] < 1: 3650 break 3651 else: 3652 for j in range(i+1, d): 3653 if m[j, j] > 1: 3654 break 3655 3656 c, s = self._givens_to_1(m[i, i], m[j, j], m[i, j]) 3657 3658 # Use BLAS to apply Givens rotations in-place. Equivalent to: 3659 # g = np.eye(d) 3660 # g[i, i] = g[j,j] = c 3661 # g[j, i] = -s; g[i, j] = s 3662 # m = np.dot(g.T, np.dot(m, g)) 3663 mv = m.ravel() 3664 drot(mv, mv, c, -s, n=d, 3665 offx=i*d, incx=1, offy=j*d, incy=1, 3666 overwrite_x=True, overwrite_y=True) 3667 drot(mv, mv, c, -s, n=d, 3668 offx=i, incx=d, offy=j, incy=d, 3669 overwrite_x=True, overwrite_y=True) 3670 3671 return m 3672 3673 def rvs(self, eigs, random_state=None, tol=1e-13, diag_tol=1e-7): 3674 """Draw random correlation matrices. 3675 3676 Parameters 3677 ---------- 3678 eigs : 1d ndarray 3679 Eigenvalues of correlation matrix 3680 tol : float, optional 3681 Tolerance for input parameter checks 3682 diag_tol : float, optional 3683 Tolerance for deviation of the diagonal of the resulting 3684 matrix. Default: 1e-7 3685 3686 Raises 3687 ------ 3688 RuntimeError 3689 Floating point error prevented generating a valid correlation 3690 matrix. 3691 3692 Returns 3693 ------- 3694 rvs : ndarray or scalar 3695 Random size N-dimensional matrices, dimension (size, dim, dim), 3696 each having eigenvalues eigs. 3697 3698 """ 3699 dim, eigs = self._process_parameters(eigs, tol=tol) 3700 3701 random_state = self._get_random_state(random_state) 3702 3703 m = ortho_group.rvs(dim, random_state=random_state) 3704 m = np.dot(np.dot(m, np.diag(eigs)), m.T) # Set the trace of m 3705 m = self._to_corr(m) # Carefully rotate to unit diagonal 3706 3707 # Check diagonal 3708 if abs(m.diagonal() - 1).max() > diag_tol: 3709 raise RuntimeError("Failed to generate a valid correlation matrix") 3710 3711 return m 3712 3713 3714random_correlation = random_correlation_gen() 3715 3716 3717class unitary_group_gen(multi_rv_generic): 3718 r"""A matrix-valued U(N) random variable. 3719 3720 Return a random unitary matrix. 3721 3722 The `dim` keyword specifies the dimension N. 3723 3724 Methods 3725 ------- 3726 ``rvs(dim=None, size=1, random_state=None)`` 3727 Draw random samples from U(N). 3728 3729 Parameters 3730 ---------- 3731 dim : scalar 3732 Dimension of matrices 3733 3734 Notes 3735 ----- 3736 This class is similar to `ortho_group`. 3737 3738 References 3739 ---------- 3740 .. [1] F. Mezzadri, "How to generate random matrices from the classical 3741 compact groups", :arXiv:`math-ph/0609050v2`. 3742 3743 Examples 3744 -------- 3745 >>> from scipy.stats import unitary_group 3746 >>> x = unitary_group.rvs(3) 3747 3748 >>> np.dot(x, x.conj().T) 3749 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16], 3750 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16], 3751 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) 3752 3753 This generates one random matrix from U(3). The dot product confirms that 3754 it is unitary up to machine precision. 3755 3756 """ 3757 3758 def __init__(self, seed=None): 3759 super().__init__(seed) 3760 self.__doc__ = doccer.docformat(self.__doc__) 3761 3762 def _process_parameters(self, dim): 3763 """Dimension N must be specified; it cannot be inferred.""" 3764 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim): 3765 raise ValueError("Dimension of rotation must be specified," 3766 "and must be a scalar greater than 1.") 3767 3768 return dim 3769 3770 def rvs(self, dim, size=1, random_state=None): 3771 """Draw random samples from U(N). 3772 3773 Parameters 3774 ---------- 3775 dim : integer 3776 Dimension of space (N). 3777 size : integer, optional 3778 Number of samples to draw (default 1). 3779 3780 Returns 3781 ------- 3782 rvs : ndarray or scalar 3783 Random size N-dimensional matrices, dimension (size, dim, dim) 3784 3785 """ 3786 random_state = self._get_random_state(random_state) 3787 3788 size = int(size) 3789 if size > 1: 3790 return np.array([self.rvs(dim, size=1, random_state=random_state) 3791 for i in range(size)]) 3792 3793 dim = self._process_parameters(dim) 3794 3795 z = 1/math.sqrt(2)*(random_state.normal(size=(dim, dim)) + 3796 1j*random_state.normal(size=(dim, dim))) 3797 q, r = scipy.linalg.qr(z) 3798 d = r.diagonal() 3799 q *= d/abs(d) 3800 return q 3801 3802 3803unitary_group = unitary_group_gen() 3804 3805 3806_mvt_doc_default_callparams = \ 3807""" 3808loc : array_like, optional 3809 Location of the distribution. (default ``0``) 3810shape : array_like, optional 3811 Positive semidefinite matrix of the distribution. (default ``1``) 3812df : float, optional 3813 Degrees of freedom of the distribution; must be greater than zero. 3814 If ``np.inf`` then results are multivariate normal. The default is ``1``. 3815allow_singular : bool, optional 3816 Whether to allow a singular matrix. (default ``False``) 3817""" 3818 3819_mvt_doc_callparams_note = \ 3820"""Setting the parameter `loc` to ``None`` is equivalent to having `loc` 3821be the zero-vector. The parameter `shape` can be a scalar, in which case 3822the shape matrix is the identity times that value, a vector of 3823diagonal entries for the shape matrix, or a two-dimensional array_like. 3824""" 3825 3826_mvt_doc_frozen_callparams_note = \ 3827"""See class definition for a detailed description of parameters.""" 3828 3829mvt_docdict_params = { 3830 '_mvt_doc_default_callparams': _mvt_doc_default_callparams, 3831 '_mvt_doc_callparams_note': _mvt_doc_callparams_note, 3832 '_doc_random_state': _doc_random_state 3833} 3834 3835mvt_docdict_noparams = { 3836 '_mvt_doc_default_callparams': "", 3837 '_mvt_doc_callparams_note': _mvt_doc_frozen_callparams_note, 3838 '_doc_random_state': _doc_random_state 3839} 3840 3841 3842class multivariate_t_gen(multi_rv_generic): 3843 r"""A multivariate t-distributed random variable. 3844 3845 The `loc` parameter specifies the location. The `shape` parameter specifies 3846 the positive semidefinite shape matrix. The `df` parameter specifies the 3847 degrees of freedom. 3848 3849 In addition to calling the methods below, the object itself may be called 3850 as a function to fix the location, shape matrix, and degrees of freedom 3851 parameters, returning a "frozen" multivariate t-distribution random. 3852 3853 Methods 3854 ------- 3855 ``pdf(x, loc=None, shape=1, df=1, allow_singular=False)`` 3856 Probability density function. 3857 ``logpdf(x, loc=None, shape=1, df=1, allow_singular=False)`` 3858 Log of the probability density function. 3859 ``rvs(loc=None, shape=1, df=1, size=1, random_state=None)`` 3860 Draw random samples from a multivariate t-distribution. 3861 3862 Parameters 3863 ---------- 3864 x : array_like 3865 Quantiles, with the last axis of `x` denoting the components. 3866 %(_mvt_doc_default_callparams)s 3867 %(_doc_random_state)s 3868 3869 Notes 3870 ----- 3871 %(_mvt_doc_callparams_note)s 3872 The matrix `shape` must be a (symmetric) positive semidefinite matrix. The 3873 determinant and inverse of `shape` are computed as the pseudo-determinant 3874 and pseudo-inverse, respectively, so that `shape` does not need to have 3875 full rank. 3876 3877 The probability density function for `multivariate_t` is 3878 3879 .. math:: 3880 3881 f(x) = \frac{\Gamma(\nu + p)/2}{\Gamma(\nu/2)\nu^{p/2}\pi^{p/2}|\Sigma|^{1/2}} 3882 \exp\left[1 + \frac{1}{\nu} (\mathbf{x} - \boldsymbol{\mu})^{\top} 3883 \boldsymbol{\Sigma}^{-1} 3884 (\mathbf{x} - \boldsymbol{\mu}) \right]^{-(\nu + p)/2}, 3885 3886 where :math:`p` is the dimension of :math:`\mathbf{x}`, 3887 :math:`\boldsymbol{\mu}` is the :math:`p`-dimensional location, 3888 :math:`\boldsymbol{\Sigma}` the :math:`p \times p`-dimensional shape 3889 matrix, and :math:`\nu` is the degrees of freedom. 3890 3891 .. versionadded:: 1.6.0 3892 3893 Examples 3894 -------- 3895 >>> import matplotlib.pyplot as plt 3896 >>> from scipy.stats import multivariate_t 3897 >>> x, y = np.mgrid[-1:3:.01, -2:1.5:.01] 3898 >>> pos = np.dstack((x, y)) 3899 >>> rv = multivariate_t([1.0, -0.5], [[2.1, 0.3], [0.3, 1.5]], df=2) 3900 >>> fig, ax = plt.subplots(1, 1) 3901 >>> ax.set_aspect('equal') 3902 >>> plt.contourf(x, y, rv.pdf(pos)) 3903 3904 """ 3905 3906 def __init__(self, seed=None): 3907 """Initialize a multivariate t-distributed random variable. 3908 3909 Parameters 3910 ---------- 3911 seed : Random state. 3912 3913 """ 3914 super().__init__(seed) 3915 self.__doc__ = doccer.docformat(self.__doc__, mvt_docdict_params) 3916 self._random_state = check_random_state(seed) 3917 3918 def __call__(self, loc=None, shape=1, df=1, allow_singular=False, 3919 seed=None): 3920 """Create a frozen multivariate t-distribution. 3921 3922 See `multivariate_t_frozen` for parameters. 3923 """ 3924 if df == np.inf: 3925 return multivariate_normal_frozen(mean=loc, cov=shape, 3926 allow_singular=allow_singular, 3927 seed=seed) 3928 return multivariate_t_frozen(loc=loc, shape=shape, df=df, 3929 allow_singular=allow_singular, seed=seed) 3930 3931 def pdf(self, x, loc=None, shape=1, df=1, allow_singular=False): 3932 """Multivariate t-distribution probability density function. 3933 3934 Parameters 3935 ---------- 3936 x : array_like 3937 Points at which to evaluate the probability density function. 3938 %(_mvt_doc_default_callparams)s 3939 3940 Returns 3941 ------- 3942 pdf : Probability density function evaluated at `x`. 3943 3944 Examples 3945 -------- 3946 >>> from scipy.stats import multivariate_t 3947 >>> x = [0.4, 5] 3948 >>> loc = [0, 1] 3949 >>> shape = [[1, 0.1], [0.1, 1]] 3950 >>> df = 7 3951 >>> multivariate_t.pdf(x, loc, shape, df) 3952 array([0.00075713]) 3953 3954 """ 3955 dim, loc, shape, df = self._process_parameters(loc, shape, df) 3956 x = self._process_quantiles(x, dim) 3957 shape_info = _PSD(shape, allow_singular=allow_singular) 3958 logpdf = self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df, 3959 dim, shape_info.rank) 3960 return np.exp(logpdf) 3961 3962 def logpdf(self, x, loc=None, shape=1, df=1): 3963 """Log of the multivariate t-distribution probability density function. 3964 3965 Parameters 3966 ---------- 3967 x : array_like 3968 Points at which to evaluate the log of the probability density 3969 function. 3970 %(_mvt_doc_default_callparams)s 3971 3972 Returns 3973 ------- 3974 logpdf : Log of the probability density function evaluated at `x`. 3975 3976 Examples 3977 -------- 3978 >>> from scipy.stats import multivariate_t 3979 >>> x = [0.4, 5] 3980 >>> loc = [0, 1] 3981 >>> shape = [[1, 0.1], [0.1, 1]] 3982 >>> df = 7 3983 >>> multivariate_t.logpdf(x, loc, shape, df) 3984 array([-7.1859802]) 3985 3986 See Also 3987 -------- 3988 pdf : Probability density function. 3989 3990 """ 3991 dim, loc, shape, df = self._process_parameters(loc, shape, df) 3992 x = self._process_quantiles(x, dim) 3993 shape_info = _PSD(shape) 3994 return self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df, dim, 3995 shape_info.rank) 3996 3997 def _logpdf(self, x, loc, prec_U, log_pdet, df, dim, rank): 3998 """Utility method `pdf`, `logpdf` for parameters. 3999 4000 Parameters 4001 ---------- 4002 x : ndarray 4003 Points at which to evaluate the log of the probability density 4004 function. 4005 loc : ndarray 4006 Location of the distribution. 4007 prec_U : ndarray 4008 A decomposition such that `np.dot(prec_U, prec_U.T)` is the inverse 4009 of the shape matrix. 4010 log_pdet : float 4011 Logarithm of the determinant of the shape matrix. 4012 df : float 4013 Degrees of freedom of the distribution. 4014 dim : int 4015 Dimension of the quantiles x. 4016 rank : int 4017 Rank of the shape matrix. 4018 4019 Notes 4020 ----- 4021 As this function does no argument checking, it should not be called 4022 directly; use 'logpdf' instead. 4023 4024 """ 4025 if df == np.inf: 4026 return multivariate_normal._logpdf(x, loc, prec_U, log_pdet, rank) 4027 4028 dev = x - loc 4029 maha = np.square(np.dot(dev, prec_U)).sum(axis=-1) 4030 4031 t = 0.5 * (df + dim) 4032 A = gammaln(t) 4033 B = gammaln(0.5 * df) 4034 C = dim/2. * np.log(df * np.pi) 4035 D = 0.5 * log_pdet 4036 E = -t * np.log(1 + (1./df) * maha) 4037 4038 return _squeeze_output(A - B - C - D + E) 4039 4040 def rvs(self, loc=None, shape=1, df=1, size=1, random_state=None): 4041 """Draw random samples from a multivariate t-distribution. 4042 4043 Parameters 4044 ---------- 4045 %(_mvt_doc_default_callparams)s 4046 size : integer, optional 4047 Number of samples to draw (default 1). 4048 %(_doc_random_state)s 4049 4050 Returns 4051 ------- 4052 rvs : ndarray or scalar 4053 Random variates of size (`size`, `P`), where `P` is the 4054 dimension of the random variable. 4055 4056 Examples 4057 -------- 4058 >>> from scipy.stats import multivariate_t 4059 >>> x = [0.4, 5] 4060 >>> loc = [0, 1] 4061 >>> shape = [[1, 0.1], [0.1, 1]] 4062 >>> df = 7 4063 >>> multivariate_t.rvs(loc, shape, df) 4064 array([[0.93477495, 3.00408716]]) 4065 4066 """ 4067 # For implementation details, see equation (3): 4068 # 4069 # Hofert, "On Sampling from the Multivariatet Distribution", 2013 4070 # http://rjournal.github.io/archive/2013-2/hofert.pdf 4071 # 4072 dim, loc, shape, df = self._process_parameters(loc, shape, df) 4073 if random_state is not None: 4074 rng = check_random_state(random_state) 4075 else: 4076 rng = self._random_state 4077 4078 if np.isinf(df): 4079 x = np.ones(size) 4080 else: 4081 x = rng.chisquare(df, size=size) / df 4082 4083 z = rng.multivariate_normal(np.zeros(dim), shape, size=size) 4084 samples = loc + z / np.sqrt(x)[:, None] 4085 return _squeeze_output(samples) 4086 4087 def _process_quantiles(self, x, dim): 4088 """ 4089 Adjust quantiles array so that last axis labels the components of 4090 each data point. 4091 """ 4092 x = np.asarray(x, dtype=float) 4093 if x.ndim == 0: 4094 x = x[np.newaxis] 4095 elif x.ndim == 1: 4096 if dim == 1: 4097 x = x[:, np.newaxis] 4098 else: 4099 x = x[np.newaxis, :] 4100 return x 4101 4102 def _process_parameters(self, loc, shape, df): 4103 """ 4104 Infer dimensionality from location array and shape matrix, handle 4105 defaults, and ensure compatible dimensions. 4106 """ 4107 if loc is None and shape is None: 4108 loc = np.asarray(0, dtype=float) 4109 shape = np.asarray(1, dtype=float) 4110 dim = 1 4111 elif loc is None: 4112 shape = np.asarray(shape, dtype=float) 4113 if shape.ndim < 2: 4114 dim = 1 4115 else: 4116 dim = shape.shape[0] 4117 loc = np.zeros(dim) 4118 elif shape is None: 4119 loc = np.asarray(loc, dtype=float) 4120 dim = loc.size 4121 shape = np.eye(dim) 4122 else: 4123 shape = np.asarray(shape, dtype=float) 4124 loc = np.asarray(loc, dtype=float) 4125 dim = loc.size 4126 4127 if dim == 1: 4128 loc.shape = (1,) 4129 shape.shape = (1, 1) 4130 4131 if loc.ndim != 1 or loc.shape[0] != dim: 4132 raise ValueError("Array 'loc' must be a vector of length %d." % 4133 dim) 4134 if shape.ndim == 0: 4135 shape = shape * np.eye(dim) 4136 elif shape.ndim == 1: 4137 shape = np.diag(shape) 4138 elif shape.ndim == 2 and shape.shape != (dim, dim): 4139 rows, cols = shape.shape 4140 if rows != cols: 4141 msg = ("Array 'cov' must be square if it is two dimensional," 4142 " but cov.shape = %s." % str(shape.shape)) 4143 else: 4144 msg = ("Dimension mismatch: array 'cov' is of shape %s," 4145 " but 'loc' is a vector of length %d.") 4146 msg = msg % (str(shape.shape), len(loc)) 4147 raise ValueError(msg) 4148 elif shape.ndim > 2: 4149 raise ValueError("Array 'cov' must be at most two-dimensional," 4150 " but cov.ndim = %d" % shape.ndim) 4151 4152 # Process degrees of freedom. 4153 if df is None: 4154 df = 1 4155 elif df <= 0: 4156 raise ValueError("'df' must be greater than zero.") 4157 elif np.isnan(df): 4158 raise ValueError("'df' is 'nan' but must be greater than zero or 'np.inf'.") 4159 4160 return dim, loc, shape, df 4161 4162 4163class multivariate_t_frozen(multi_rv_frozen): 4164 4165 def __init__(self, loc=None, shape=1, df=1, allow_singular=False, 4166 seed=None): 4167 """Create a frozen multivariate t distribution. 4168 4169 Parameters 4170 ---------- 4171 %(_mvt_doc_default_callparams)s 4172 4173 Examples 4174 -------- 4175 >>> loc = np.zeros(3) 4176 >>> shape = np.eye(3) 4177 >>> df = 10 4178 >>> dist = multivariate_t(loc, shape, df) 4179 >>> dist.rvs() 4180 array([[ 0.81412036, -1.53612361, 0.42199647]]) 4181 >>> dist.pdf([1, 1, 1]) 4182 array([0.01237803]) 4183 4184 """ 4185 self._dist = multivariate_t_gen(seed) 4186 dim, loc, shape, df = self._dist._process_parameters(loc, shape, df) 4187 self.dim, self.loc, self.shape, self.df = dim, loc, shape, df 4188 self.shape_info = _PSD(shape, allow_singular=allow_singular) 4189 4190 def logpdf(self, x): 4191 x = self._dist._process_quantiles(x, self.dim) 4192 U = self.shape_info.U 4193 log_pdet = self.shape_info.log_pdet 4194 return self._dist._logpdf(x, self.loc, U, log_pdet, self.df, self.dim, 4195 self.shape_info.rank) 4196 4197 def pdf(self, x): 4198 return np.exp(self.logpdf(x)) 4199 4200 def rvs(self, size=1, random_state=None): 4201 return self._dist.rvs(loc=self.loc, 4202 shape=self.shape, 4203 df=self.df, 4204 size=size, 4205 random_state=random_state) 4206 4207 4208multivariate_t = multivariate_t_gen() 4209 4210 4211# Set frozen generator docstrings from corresponding docstrings in 4212# matrix_normal_gen and fill in default strings in class docstrings 4213for name in ['logpdf', 'pdf', 'rvs']: 4214 method = multivariate_t_gen.__dict__[name] 4215 method_frozen = multivariate_t_frozen.__dict__[name] 4216 method_frozen.__doc__ = doccer.docformat(method.__doc__, 4217 mvt_docdict_noparams) 4218 method.__doc__ = doccer.docformat(method.__doc__, mvt_docdict_params) 4219 4220 4221_mhg_doc_default_callparams = """\ 4222m : array_like 4223 The number of each type of object in the population. 4224 That is, :math:`m[i]` is the number of objects of 4225 type :math:`i`. 4226n : array_like 4227 The number of samples taken from the population. 4228""" 4229 4230_mhg_doc_callparams_note = """\ 4231`m` must be an array of positive integers. If the quantile 4232:math:`i` contains values out of the range :math:`[0, m_i]` 4233where :math:`m_i` is the number of objects of type :math:`i` 4234in the population or if the parameters are inconsistent with one 4235another (e.g. ``x.sum() != n``), methods return the appropriate 4236value (e.g. ``0`` for ``pmf``). If `m` or `n` contain negative 4237values, the result will contain ``nan`` there. 4238""" 4239 4240_mhg_doc_frozen_callparams = "" 4241 4242_mhg_doc_frozen_callparams_note = \ 4243 """See class definition for a detailed description of parameters.""" 4244 4245mhg_docdict_params = { 4246 '_doc_default_callparams': _mhg_doc_default_callparams, 4247 '_doc_callparams_note': _mhg_doc_callparams_note, 4248 '_doc_random_state': _doc_random_state 4249} 4250 4251mhg_docdict_noparams = { 4252 '_doc_default_callparams': _mhg_doc_frozen_callparams, 4253 '_doc_callparams_note': _mhg_doc_frozen_callparams_note, 4254 '_doc_random_state': _doc_random_state 4255} 4256 4257 4258class multivariate_hypergeom_gen(multi_rv_generic): 4259 r"""A multivariate hypergeometric random variable. 4260 4261 Methods 4262 ------- 4263 ``pmf(x, m, n)`` 4264 Probability mass function. 4265 ``logpmf(x, m, n)`` 4266 Log of the probability mass function. 4267 ``rvs(m, n, size=1, random_state=None)`` 4268 Draw random samples from a multivariate hypergeometric 4269 distribution. 4270 ``mean(m, n)`` 4271 Mean of the multivariate hypergeometric distribution. 4272 ``var(m, n)`` 4273 Variance of the multivariate hypergeometric distribution. 4274 ``cov(m, n)`` 4275 Compute the covariance matrix of the multivariate 4276 hypergeometric distribution. 4277 4278 Parameters 4279 ---------- 4280 %(_doc_default_callparams)s 4281 %(_doc_random_state)s 4282 4283 Notes 4284 ----- 4285 %(_doc_callparams_note)s 4286 4287 The probability mass function for `multivariate_hypergeom` is 4288 4289 .. math:: 4290 4291 P(X_1 = x_1, X_2 = x_2, \ldots, X_k = x_k) = \frac{\binom{m_1}{x_1} 4292 \binom{m_2}{x_2} \cdots \binom{m_k}{x_k}}{\binom{M}{n}}, \\ \quad 4293 (x_1, x_2, \ldots, x_k) \in \mathbb{N}^k \text{ with } 4294 \sum_{i=1}^k x_i = n 4295 4296 where :math:`m_i` are the number of objects of type :math:`i`, :math:`M` 4297 is the total number of objects in the population (sum of all the 4298 :math:`m_i`), and :math:`n` is the size of the sample to be taken 4299 from the population. 4300 4301 .. versionadded:: 1.6.0 4302 4303 Examples 4304 -------- 4305 To evaluate the probability mass function of the multivariate 4306 hypergeometric distribution, with a dichotomous population of size 4307 :math:`10` and :math:`20`, at a sample of size :math:`12` with 4308 :math:`8` objects of the first type and :math:`4` objects of the 4309 second type, use: 4310 4311 >>> from scipy.stats import multivariate_hypergeom 4312 >>> multivariate_hypergeom.pmf(x=[8, 4], m=[10, 20], n=12) 4313 0.0025207176631464523 4314 4315 The `multivariate_hypergeom` distribution is identical to the 4316 corresponding `hypergeom` distribution (tiny numerical differences 4317 notwithstanding) when only two types (good and bad) of objects 4318 are present in the population as in the example above. Consider 4319 another example for a comparison with the hypergeometric distribution: 4320 4321 >>> from scipy.stats import hypergeom 4322 >>> multivariate_hypergeom.pmf(x=[3, 1], m=[10, 5], n=4) 4323 0.4395604395604395 4324 >>> hypergeom.pmf(k=3, M=15, n=4, N=10) 4325 0.43956043956044005 4326 4327 The functions ``pmf``, ``logpmf``, ``mean``, ``var``, ``cov``, and ``rvs`` 4328 support broadcasting, under the convention that the vector parameters 4329 (``x``, ``m``, and ``n``) are interpreted as if each row along the last 4330 axis is a single object. For instance, we can combine the previous two 4331 calls to `multivariate_hypergeom` as 4332 4333 >>> multivariate_hypergeom.pmf(x=[[8, 4], [3, 1]], m=[[10, 20], [10, 5]], 4334 ... n=[12, 4]) 4335 array([0.00252072, 0.43956044]) 4336 4337 This broadcasting also works for ``cov``, where the output objects are 4338 square matrices of size ``m.shape[-1]``. For example: 4339 4340 >>> multivariate_hypergeom.cov(m=[[7, 9], [10, 15]], n=[8, 12]) 4341 array([[[ 1.05, -1.05], 4342 [-1.05, 1.05]], 4343 [[ 1.56, -1.56], 4344 [-1.56, 1.56]]]) 4345 4346 That is, ``result[0]`` is equal to 4347 ``multivariate_hypergeom.cov(m=[7, 9], n=8)`` and ``result[1]`` is equal 4348 to ``multivariate_hypergeom.cov(m=[10, 15], n=12)``. 4349 4350 Alternatively, the object may be called (as a function) to fix the `m` 4351 and `n` parameters, returning a "frozen" multivariate hypergeometric 4352 random variable. 4353 4354 >>> rv = multivariate_hypergeom(m=[10, 20], n=12) 4355 >>> rv.pmf(x=[8, 4]) 4356 0.0025207176631464523 4357 4358 See Also 4359 -------- 4360 scipy.stats.hypergeom : The hypergeometric distribution. 4361 scipy.stats.multinomial : The multinomial distribution. 4362 4363 References 4364 ---------- 4365 .. [1] The Multivariate Hypergeometric Distribution, 4366 http://www.randomservices.org/random/urn/MultiHypergeometric.html 4367 .. [2] Thomas J. Sargent and John Stachurski, 2020, 4368 Multivariate Hypergeometric Distribution 4369 https://python.quantecon.org/_downloads/pdf/multi_hyper.pdf 4370 """ 4371 def __init__(self, seed=None): 4372 super().__init__(seed) 4373 self.__doc__ = doccer.docformat(self.__doc__, mhg_docdict_params) 4374 4375 def __call__(self, m, n, seed=None): 4376 """Create a frozen multivariate_hypergeom distribution. 4377 4378 See `multivariate_hypergeom_frozen` for more information. 4379 """ 4380 return multivariate_hypergeom_frozen(m, n, seed=seed) 4381 4382 def _process_parameters(self, m, n): 4383 m = np.asarray(m) 4384 n = np.asarray(n) 4385 if m.size == 0: 4386 m = m.astype(int) 4387 if n.size == 0: 4388 n = n.astype(int) 4389 if not np.issubdtype(m.dtype, np.integer): 4390 raise TypeError("'m' must an array of integers.") 4391 if not np.issubdtype(n.dtype, np.integer): 4392 raise TypeError("'n' must an array of integers.") 4393 if m.ndim == 0: 4394 raise ValueError("'m' must be an array with" 4395 " at least one dimension.") 4396 4397 # check for empty arrays 4398 if m.size != 0: 4399 n = n[..., np.newaxis] 4400 4401 m, n = np.broadcast_arrays(m, n) 4402 4403 # check for empty arrays 4404 if m.size != 0: 4405 n = n[..., 0] 4406 4407 mcond = m < 0 4408 4409 M = m.sum(axis=-1) 4410 4411 ncond = (n < 0) | (n > M) 4412 return M, m, n, mcond, ncond, np.any(mcond, axis=-1) | ncond 4413 4414 def _process_quantiles(self, x, M, m, n): 4415 x = np.asarray(x) 4416 if not np.issubdtype(x.dtype, np.integer): 4417 raise TypeError("'x' must an array of integers.") 4418 if x.ndim == 0: 4419 raise ValueError("'x' must be an array with" 4420 " at least one dimension.") 4421 if not x.shape[-1] == m.shape[-1]: 4422 raise ValueError(f"Size of each quantile must be size of 'm': " 4423 f"received {x.shape[-1]}, " 4424 f"but expected {m.shape[-1]}.") 4425 4426 # check for empty arrays 4427 if m.size != 0: 4428 n = n[..., np.newaxis] 4429 M = M[..., np.newaxis] 4430 4431 x, m, n, M = np.broadcast_arrays(x, m, n, M) 4432 4433 # check for empty arrays 4434 if m.size != 0: 4435 n, M = n[..., 0], M[..., 0] 4436 4437 xcond = (x < 0) | (x > m) 4438 return (x, M, m, n, xcond, 4439 np.any(xcond, axis=-1) | (x.sum(axis=-1) != n)) 4440 4441 def _checkresult(self, result, cond, bad_value): 4442 result = np.asarray(result) 4443 if cond.ndim != 0: 4444 result[cond] = bad_value 4445 elif cond: 4446 return bad_value 4447 if result.ndim == 0: 4448 return result[()] 4449 return result 4450 4451 def _logpmf(self, x, M, m, n, mxcond, ncond): 4452 # This equation of the pmf comes from the relation, 4453 # n combine r = beta(n+1, 1) / beta(r+1, n-r+1) 4454 num = np.zeros_like(m, dtype=np.float_) 4455 den = np.zeros_like(n, dtype=np.float_) 4456 m, x = m[~mxcond], x[~mxcond] 4457 M, n = M[~ncond], n[~ncond] 4458 num[~mxcond] = (betaln(m+1, 1) - betaln(x+1, m-x+1)) 4459 den[~ncond] = (betaln(M+1, 1) - betaln(n+1, M-n+1)) 4460 num[mxcond] = np.nan 4461 den[ncond] = np.nan 4462 num = num.sum(axis=-1) 4463 return num - den 4464 4465 def logpmf(self, x, m, n): 4466 """Log of the multivariate hypergeometric probability mass function. 4467 4468 Parameters 4469 ---------- 4470 x : array_like 4471 Quantiles, with the last axis of `x` denoting the components. 4472 %(_doc_default_callparams)s 4473 4474 Returns 4475 ------- 4476 logpmf : ndarray or scalar 4477 Log of the probability mass function evaluated at `x` 4478 4479 Notes 4480 ----- 4481 %(_doc_callparams_note)s 4482 """ 4483 M, m, n, mcond, ncond, mncond = self._process_parameters(m, n) 4484 (x, M, m, n, xcond, 4485 xcond_reduced) = self._process_quantiles(x, M, m, n) 4486 mxcond = mcond | xcond 4487 ncond = ncond | np.zeros(n.shape, dtype=np.bool_) 4488 4489 result = self._logpmf(x, M, m, n, mxcond, ncond) 4490 4491 # replace values for which x was out of the domain; broadcast 4492 # xcond to the right shape 4493 xcond_ = xcond_reduced | np.zeros(mncond.shape, dtype=np.bool_) 4494 result = self._checkresult(result, xcond_, np.NINF) 4495 4496 # replace values bad for n or m; broadcast 4497 # mncond to the right shape 4498 mncond_ = mncond | np.zeros(xcond_reduced.shape, dtype=np.bool_) 4499 return self._checkresult(result, mncond_, np.nan) 4500 4501 def pmf(self, x, m, n): 4502 """Multivariate hypergeometric probability mass function. 4503 4504 Parameters 4505 ---------- 4506 x : array_like 4507 Quantiles, with the last axis of `x` denoting the components. 4508 %(_doc_default_callparams)s 4509 4510 Returns 4511 ------- 4512 pmf : ndarray or scalar 4513 Probability density function evaluated at `x` 4514 4515 Notes 4516 ----- 4517 %(_doc_callparams_note)s 4518 """ 4519 out = np.exp(self.logpmf(x, m, n)) 4520 return out 4521 4522 def mean(self, m, n): 4523 """Mean of the multivariate hypergeometric distribution. 4524 4525 Parameters 4526 ---------- 4527 %(_doc_default_callparams)s 4528 4529 Returns 4530 ------- 4531 mean : array_like or scalar 4532 The mean of the distribution 4533 """ 4534 M, m, n, _, _, mncond = self._process_parameters(m, n) 4535 # check for empty arrays 4536 if m.size != 0: 4537 M, n = M[..., np.newaxis], n[..., np.newaxis] 4538 cond = (M == 0) 4539 M = np.ma.masked_array(M, mask=cond) 4540 mu = n*(m/M) 4541 if m.size != 0: 4542 mncond = (mncond[..., np.newaxis] | 4543 np.zeros(mu.shape, dtype=np.bool_)) 4544 return self._checkresult(mu, mncond, np.nan) 4545 4546 def var(self, m, n): 4547 """Variance of the multivariate hypergeometric distribution. 4548 4549 Parameters 4550 ---------- 4551 %(_doc_default_callparams)s 4552 4553 Returns 4554 ------- 4555 array_like 4556 The variances of the components of the distribution. This is 4557 the diagonal of the covariance matrix of the distribution 4558 """ 4559 M, m, n, _, _, mncond = self._process_parameters(m, n) 4560 # check for empty arrays 4561 if m.size != 0: 4562 M, n = M[..., np.newaxis], n[..., np.newaxis] 4563 cond = (M == 0) & (M-1 == 0) 4564 M = np.ma.masked_array(M, mask=cond) 4565 output = n * m/M * (M-m)/M * (M-n)/(M-1) 4566 if m.size != 0: 4567 mncond = (mncond[..., np.newaxis] | 4568 np.zeros(output.shape, dtype=np.bool_)) 4569 return self._checkresult(output, mncond, np.nan) 4570 4571 def cov(self, m, n): 4572 """Covariance matrix of the multivariate hypergeometric distribution. 4573 4574 Parameters 4575 ---------- 4576 %(_doc_default_callparams)s 4577 4578 Returns 4579 ------- 4580 cov : array_like 4581 The covariance matrix of the distribution 4582 """ 4583 # see [1]_ for the formula and [2]_ for implementation 4584 # cov( x_i,x_j ) = -n * (M-n)/(M-1) * (K_i*K_j) / (M**2) 4585 M, m, n, _, _, mncond = self._process_parameters(m, n) 4586 # check for empty arrays 4587 if m.size != 0: 4588 M = M[..., np.newaxis, np.newaxis] 4589 n = n[..., np.newaxis, np.newaxis] 4590 cond = (M == 0) & (M-1 == 0) 4591 M = np.ma.masked_array(M, mask=cond) 4592 output = (-n * (M-n)/(M-1) * 4593 np.einsum("...i,...j->...ij", m, m) / (M**2)) 4594 # check for empty arrays 4595 if m.size != 0: 4596 M, n = M[..., 0, 0], n[..., 0, 0] 4597 cond = cond[..., 0, 0] 4598 dim = m.shape[-1] 4599 # diagonal entries need to be computed differently 4600 for i in range(dim): 4601 output[..., i, i] = (n * (M-n) * m[..., i]*(M-m[..., i])) 4602 output[..., i, i] = output[..., i, i] / (M-1) 4603 output[..., i, i] = output[..., i, i] / (M**2) 4604 if m.size != 0: 4605 mncond = (mncond[..., np.newaxis, np.newaxis] | 4606 np.zeros(output.shape, dtype=np.bool_)) 4607 return self._checkresult(output, mncond, np.nan) 4608 4609 def rvs(self, m, n, size=None, random_state=None): 4610 """Draw random samples from a multivariate hypergeometric distribution. 4611 4612 Parameters 4613 ---------- 4614 %(_doc_default_callparams)s 4615 size : integer or iterable of integers, optional 4616 Number of samples to draw. Default is ``None``, in which case a 4617 single variate is returned as an array with shape ``m.shape``. 4618 %(_doc_random_state)s 4619 4620 Returns 4621 ------- 4622 rvs : array_like 4623 Random variates of shape ``size`` or ``m.shape`` 4624 (if ``size=None``). 4625 4626 Notes 4627 ----- 4628 %(_doc_callparams_note)s 4629 4630 Also note that NumPy's `multivariate_hypergeometric` sampler is not 4631 used as it doesn't support broadcasting. 4632 """ 4633 M, m, n, _, _, _ = self._process_parameters(m, n) 4634 4635 random_state = self._get_random_state(random_state) 4636 4637 if size is not None and isinstance(size, int): 4638 size = (size, ) 4639 4640 if size is None: 4641 rvs = np.empty(m.shape, dtype=m.dtype) 4642 else: 4643 rvs = np.empty(size + (m.shape[-1], ), dtype=m.dtype) 4644 rem = M 4645 4646 # This sampler has been taken from numpy gh-13794 4647 # https://github.com/numpy/numpy/pull/13794 4648 for c in range(m.shape[-1] - 1): 4649 rem = rem - m[..., c] 4650 rvs[..., c] = ((n != 0) * 4651 random_state.hypergeometric(m[..., c], rem, 4652 n + (n == 0), 4653 size=size)) 4654 n = n - rvs[..., c] 4655 rvs[..., m.shape[-1] - 1] = n 4656 4657 return rvs 4658 4659 4660multivariate_hypergeom = multivariate_hypergeom_gen() 4661 4662 4663class multivariate_hypergeom_frozen(multi_rv_frozen): 4664 def __init__(self, m, n, seed=None): 4665 self._dist = multivariate_hypergeom_gen(seed) 4666 (self.M, self.m, self.n, 4667 self.mcond, self.ncond, 4668 self.mncond) = self._dist._process_parameters(m, n) 4669 4670 # monkey patch self._dist 4671 def _process_parameters(m, n): 4672 return (self.M, self.m, self.n, 4673 self.mcond, self.ncond, 4674 self.mncond) 4675 self._dist._process_parameters = _process_parameters 4676 4677 def logpmf(self, x): 4678 return self._dist.logpmf(x, self.m, self.n) 4679 4680 def pmf(self, x): 4681 return self._dist.pmf(x, self.m, self.n) 4682 4683 def mean(self): 4684 return self._dist.mean(self.m, self.n) 4685 4686 def var(self): 4687 return self._dist.var(self.m, self.n) 4688 4689 def cov(self): 4690 return self._dist.cov(self.m, self.n) 4691 4692 def rvs(self, size=1, random_state=None): 4693 return self._dist.rvs(self.m, self.n, 4694 size=size, 4695 random_state=random_state) 4696 4697 4698# Set frozen generator docstrings from corresponding docstrings in 4699# multivariate_hypergeom and fill in default strings in class docstrings 4700for name in ['logpmf', 'pmf', 'mean', 'var', 'cov', 'rvs']: 4701 method = multivariate_hypergeom_gen.__dict__[name] 4702 method_frozen = multivariate_hypergeom_frozen.__dict__[name] 4703 method_frozen.__doc__ = doccer.docformat( 4704 method.__doc__, mhg_docdict_noparams) 4705 method.__doc__ = doccer.docformat(method.__doc__, 4706 mhg_docdict_params) 4707