1""" 2Test functions for multivariate normal distributions. 3 4""" 5import pickle 6 7from numpy.testing import (assert_allclose, assert_almost_equal, 8 assert_array_almost_equal, assert_equal, 9 assert_array_less, assert_) 10import pytest 11from pytest import raises as assert_raises 12 13from .test_continuous_basic import check_distribution_rvs 14 15import numpy 16import numpy as np 17 18import scipy.linalg 19from scipy.stats._multivariate import (_PSD, 20 _lnB, 21 _cho_inv_batch, 22 multivariate_normal_frozen) 23from scipy.stats import (multivariate_normal, multivariate_hypergeom, 24 matrix_normal, special_ortho_group, ortho_group, 25 random_correlation, unitary_group, dirichlet, 26 beta, wishart, multinomial, invwishart, chi2, 27 invgamma, norm, uniform, ks_2samp, kstest, binom, 28 hypergeom, multivariate_t, cauchy, normaltest) 29 30from scipy.integrate import romb 31from scipy.special import multigammaln 32 33from .common_tests import check_random_state_property 34 35from unittest.mock import patch 36 37 38class TestMultivariateNormal: 39 def test_input_shape(self): 40 mu = np.arange(3) 41 cov = np.identity(2) 42 assert_raises(ValueError, multivariate_normal.pdf, (0, 1), mu, cov) 43 assert_raises(ValueError, multivariate_normal.pdf, (0, 1, 2), mu, cov) 44 assert_raises(ValueError, multivariate_normal.cdf, (0, 1), mu, cov) 45 assert_raises(ValueError, multivariate_normal.cdf, (0, 1, 2), mu, cov) 46 47 def test_scalar_values(self): 48 np.random.seed(1234) 49 50 # When evaluated on scalar data, the pdf should return a scalar 51 x, mean, cov = 1.5, 1.7, 2.5 52 pdf = multivariate_normal.pdf(x, mean, cov) 53 assert_equal(pdf.ndim, 0) 54 55 # When evaluated on a single vector, the pdf should return a scalar 56 x = np.random.randn(5) 57 mean = np.random.randn(5) 58 cov = np.abs(np.random.randn(5)) # Diagonal values for cov. matrix 59 pdf = multivariate_normal.pdf(x, mean, cov) 60 assert_equal(pdf.ndim, 0) 61 62 # When evaluated on scalar data, the cdf should return a scalar 63 x, mean, cov = 1.5, 1.7, 2.5 64 cdf = multivariate_normal.cdf(x, mean, cov) 65 assert_equal(cdf.ndim, 0) 66 67 # When evaluated on a single vector, the cdf should return a scalar 68 x = np.random.randn(5) 69 mean = np.random.randn(5) 70 cov = np.abs(np.random.randn(5)) # Diagonal values for cov. matrix 71 cdf = multivariate_normal.cdf(x, mean, cov) 72 assert_equal(cdf.ndim, 0) 73 74 def test_logpdf(self): 75 # Check that the log of the pdf is in fact the logpdf 76 np.random.seed(1234) 77 x = np.random.randn(5) 78 mean = np.random.randn(5) 79 cov = np.abs(np.random.randn(5)) 80 d1 = multivariate_normal.logpdf(x, mean, cov) 81 d2 = multivariate_normal.pdf(x, mean, cov) 82 assert_allclose(d1, np.log(d2)) 83 84 def test_logpdf_default_values(self): 85 # Check that the log of the pdf is in fact the logpdf 86 # with default parameters Mean=None and cov = 1 87 np.random.seed(1234) 88 x = np.random.randn(5) 89 d1 = multivariate_normal.logpdf(x) 90 d2 = multivariate_normal.pdf(x) 91 # check whether default values are being used 92 d3 = multivariate_normal.logpdf(x, None, 1) 93 d4 = multivariate_normal.pdf(x, None, 1) 94 assert_allclose(d1, np.log(d2)) 95 assert_allclose(d3, np.log(d4)) 96 97 def test_logcdf(self): 98 # Check that the log of the cdf is in fact the logcdf 99 np.random.seed(1234) 100 x = np.random.randn(5) 101 mean = np.random.randn(5) 102 cov = np.abs(np.random.randn(5)) 103 d1 = multivariate_normal.logcdf(x, mean, cov) 104 d2 = multivariate_normal.cdf(x, mean, cov) 105 assert_allclose(d1, np.log(d2)) 106 107 def test_logcdf_default_values(self): 108 # Check that the log of the cdf is in fact the logcdf 109 # with default parameters Mean=None and cov = 1 110 np.random.seed(1234) 111 x = np.random.randn(5) 112 d1 = multivariate_normal.logcdf(x) 113 d2 = multivariate_normal.cdf(x) 114 # check whether default values are being used 115 d3 = multivariate_normal.logcdf(x, None, 1) 116 d4 = multivariate_normal.cdf(x, None, 1) 117 assert_allclose(d1, np.log(d2)) 118 assert_allclose(d3, np.log(d4)) 119 120 def test_rank(self): 121 # Check that the rank is detected correctly. 122 np.random.seed(1234) 123 n = 4 124 mean = np.random.randn(n) 125 for expected_rank in range(1, n + 1): 126 s = np.random.randn(n, expected_rank) 127 cov = np.dot(s, s.T) 128 distn = multivariate_normal(mean, cov, allow_singular=True) 129 assert_equal(distn.cov_info.rank, expected_rank) 130 131 def test_degenerate_distributions(self): 132 133 def _sample_orthonormal_matrix(n): 134 M = np.random.randn(n, n) 135 u, s, v = scipy.linalg.svd(M) 136 return u 137 138 for n in range(1, 5): 139 x = np.random.randn(n) 140 for k in range(1, n + 1): 141 # Sample a small covariance matrix. 142 s = np.random.randn(k, k) 143 cov_kk = np.dot(s, s.T) 144 145 # Embed the small covariance matrix into a larger low rank matrix. 146 cov_nn = np.zeros((n, n)) 147 cov_nn[:k, :k] = cov_kk 148 149 # Define a rotation of the larger low rank matrix. 150 u = _sample_orthonormal_matrix(n) 151 cov_rr = np.dot(u, np.dot(cov_nn, u.T)) 152 y = np.dot(u, x) 153 154 # Check some identities. 155 distn_kk = multivariate_normal(np.zeros(k), cov_kk, 156 allow_singular=True) 157 distn_nn = multivariate_normal(np.zeros(n), cov_nn, 158 allow_singular=True) 159 distn_rr = multivariate_normal(np.zeros(n), cov_rr, 160 allow_singular=True) 161 assert_equal(distn_kk.cov_info.rank, k) 162 assert_equal(distn_nn.cov_info.rank, k) 163 assert_equal(distn_rr.cov_info.rank, k) 164 pdf_kk = distn_kk.pdf(x[:k]) 165 pdf_nn = distn_nn.pdf(x) 166 pdf_rr = distn_rr.pdf(y) 167 assert_allclose(pdf_kk, pdf_nn) 168 assert_allclose(pdf_kk, pdf_rr) 169 logpdf_kk = distn_kk.logpdf(x[:k]) 170 logpdf_nn = distn_nn.logpdf(x) 171 logpdf_rr = distn_rr.logpdf(y) 172 assert_allclose(logpdf_kk, logpdf_nn) 173 assert_allclose(logpdf_kk, logpdf_rr) 174 175 def test_large_pseudo_determinant(self): 176 # Check that large pseudo-determinants are handled appropriately. 177 178 # Construct a singular diagonal covariance matrix 179 # whose pseudo determinant overflows double precision. 180 large_total_log = 1000.0 181 npos = 100 182 nzero = 2 183 large_entry = np.exp(large_total_log / npos) 184 n = npos + nzero 185 cov = np.zeros((n, n), dtype=float) 186 np.fill_diagonal(cov, large_entry) 187 cov[-nzero:, -nzero:] = 0 188 189 # Check some determinants. 190 assert_equal(scipy.linalg.det(cov), 0) 191 assert_equal(scipy.linalg.det(cov[:npos, :npos]), np.inf) 192 assert_allclose(np.linalg.slogdet(cov[:npos, :npos]), 193 (1, large_total_log)) 194 195 # Check the pseudo-determinant. 196 psd = _PSD(cov) 197 assert_allclose(psd.log_pdet, large_total_log) 198 199 def test_broadcasting(self): 200 np.random.seed(1234) 201 n = 4 202 203 # Construct a random covariance matrix. 204 data = np.random.randn(n, n) 205 cov = np.dot(data, data.T) 206 mean = np.random.randn(n) 207 208 # Construct an ndarray which can be interpreted as 209 # a 2x3 array whose elements are random data vectors. 210 X = np.random.randn(2, 3, n) 211 212 # Check that multiple data points can be evaluated at once. 213 desired_pdf = multivariate_normal.pdf(X, mean, cov) 214 desired_cdf = multivariate_normal.cdf(X, mean, cov) 215 for i in range(2): 216 for j in range(3): 217 actual = multivariate_normal.pdf(X[i, j], mean, cov) 218 assert_allclose(actual, desired_pdf[i,j]) 219 # Repeat for cdf 220 actual = multivariate_normal.cdf(X[i, j], mean, cov) 221 assert_allclose(actual, desired_cdf[i,j], rtol=1e-3) 222 223 def test_normal_1D(self): 224 # The probability density function for a 1D normal variable should 225 # agree with the standard normal distribution in scipy.stats.distributions 226 x = np.linspace(0, 2, 10) 227 mean, cov = 1.2, 0.9 228 scale = cov**0.5 229 d1 = norm.pdf(x, mean, scale) 230 d2 = multivariate_normal.pdf(x, mean, cov) 231 assert_allclose(d1, d2) 232 # The same should hold for the cumulative distribution function 233 d1 = norm.cdf(x, mean, scale) 234 d2 = multivariate_normal.cdf(x, mean, cov) 235 assert_allclose(d1, d2) 236 237 def test_marginalization(self): 238 # Integrating out one of the variables of a 2D Gaussian should 239 # yield a 1D Gaussian 240 mean = np.array([2.5, 3.5]) 241 cov = np.array([[.5, 0.2], [0.2, .6]]) 242 n = 2 ** 8 + 1 # Number of samples 243 delta = 6 / (n - 1) # Grid spacing 244 245 v = np.linspace(0, 6, n) 246 xv, yv = np.meshgrid(v, v) 247 pos = np.empty((n, n, 2)) 248 pos[:, :, 0] = xv 249 pos[:, :, 1] = yv 250 pdf = multivariate_normal.pdf(pos, mean, cov) 251 252 # Marginalize over x and y axis 253 margin_x = romb(pdf, delta, axis=0) 254 margin_y = romb(pdf, delta, axis=1) 255 256 # Compare with standard normal distribution 257 gauss_x = norm.pdf(v, loc=mean[0], scale=cov[0, 0] ** 0.5) 258 gauss_y = norm.pdf(v, loc=mean[1], scale=cov[1, 1] ** 0.5) 259 assert_allclose(margin_x, gauss_x, rtol=1e-2, atol=1e-2) 260 assert_allclose(margin_y, gauss_y, rtol=1e-2, atol=1e-2) 261 262 def test_frozen(self): 263 # The frozen distribution should agree with the regular one 264 np.random.seed(1234) 265 x = np.random.randn(5) 266 mean = np.random.randn(5) 267 cov = np.abs(np.random.randn(5)) 268 norm_frozen = multivariate_normal(mean, cov) 269 assert_allclose(norm_frozen.pdf(x), multivariate_normal.pdf(x, mean, cov)) 270 assert_allclose(norm_frozen.logpdf(x), 271 multivariate_normal.logpdf(x, mean, cov)) 272 assert_allclose(norm_frozen.cdf(x), multivariate_normal.cdf(x, mean, cov)) 273 assert_allclose(norm_frozen.logcdf(x), 274 multivariate_normal.logcdf(x, mean, cov)) 275 276 def test_pseudodet_pinv(self): 277 # Make sure that pseudo-inverse and pseudo-det agree on cutoff 278 279 # Assemble random covariance matrix with large and small eigenvalues 280 np.random.seed(1234) 281 n = 7 282 x = np.random.randn(n, n) 283 cov = np.dot(x, x.T) 284 s, u = scipy.linalg.eigh(cov) 285 s = np.full(n, 0.5) 286 s[0] = 1.0 287 s[-1] = 1e-7 288 cov = np.dot(u, np.dot(np.diag(s), u.T)) 289 290 # Set cond so that the lowest eigenvalue is below the cutoff 291 cond = 1e-5 292 psd = _PSD(cov, cond=cond) 293 psd_pinv = _PSD(psd.pinv, cond=cond) 294 295 # Check that the log pseudo-determinant agrees with the sum 296 # of the logs of all but the smallest eigenvalue 297 assert_allclose(psd.log_pdet, np.sum(np.log(s[:-1]))) 298 # Check that the pseudo-determinant of the pseudo-inverse 299 # agrees with 1 / pseudo-determinant 300 assert_allclose(-psd.log_pdet, psd_pinv.log_pdet) 301 302 def test_exception_nonsquare_cov(self): 303 cov = [[1, 2, 3], [4, 5, 6]] 304 assert_raises(ValueError, _PSD, cov) 305 306 def test_exception_nonfinite_cov(self): 307 cov_nan = [[1, 0], [0, np.nan]] 308 assert_raises(ValueError, _PSD, cov_nan) 309 cov_inf = [[1, 0], [0, np.inf]] 310 assert_raises(ValueError, _PSD, cov_inf) 311 312 def test_exception_non_psd_cov(self): 313 cov = [[1, 0], [0, -1]] 314 assert_raises(ValueError, _PSD, cov) 315 316 def test_exception_singular_cov(self): 317 np.random.seed(1234) 318 x = np.random.randn(5) 319 mean = np.random.randn(5) 320 cov = np.ones((5, 5)) 321 e = np.linalg.LinAlgError 322 assert_raises(e, multivariate_normal, mean, cov) 323 assert_raises(e, multivariate_normal.pdf, x, mean, cov) 324 assert_raises(e, multivariate_normal.logpdf, x, mean, cov) 325 assert_raises(e, multivariate_normal.cdf, x, mean, cov) 326 assert_raises(e, multivariate_normal.logcdf, x, mean, cov) 327 328 def test_R_values(self): 329 # Compare the multivariate pdf with some values precomputed 330 # in R version 3.0.1 (2013-05-16) on Mac OS X 10.6. 331 332 # The values below were generated by the following R-script: 333 # > library(mnormt) 334 # > x <- seq(0, 2, length=5) 335 # > y <- 3*x - 2 336 # > z <- x + cos(y) 337 # > mu <- c(1, 3, 2) 338 # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3) 339 # > r_pdf <- dmnorm(cbind(x,y,z), mu, Sigma) 340 r_pdf = np.array([0.0002214706, 0.0013819953, 0.0049138692, 341 0.0103803050, 0.0140250800]) 342 343 x = np.linspace(0, 2, 5) 344 y = 3 * x - 2 345 z = x + np.cos(y) 346 r = np.array([x, y, z]).T 347 348 mean = np.array([1, 3, 2], 'd') 349 cov = np.array([[1, 2, 0], [2, 5, .5], [0, .5, 3]], 'd') 350 351 pdf = multivariate_normal.pdf(r, mean, cov) 352 assert_allclose(pdf, r_pdf, atol=1e-10) 353 354 # Compare the multivariate cdf with some values precomputed 355 # in R version 3.3.2 (2016-10-31) on Debian GNU/Linux. 356 357 # The values below were generated by the following R-script: 358 # > library(mnormt) 359 # > x <- seq(0, 2, length=5) 360 # > y <- 3*x - 2 361 # > z <- x + cos(y) 362 # > mu <- c(1, 3, 2) 363 # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3) 364 # > r_cdf <- pmnorm(cbind(x,y,z), mu, Sigma) 365 r_cdf = np.array([0.0017866215, 0.0267142892, 0.0857098761, 366 0.1063242573, 0.2501068509]) 367 368 cdf = multivariate_normal.cdf(r, mean, cov) 369 assert_allclose(cdf, r_cdf, atol=1e-5) 370 371 # Also test bivariate cdf with some values precomputed 372 # in R version 3.3.2 (2016-10-31) on Debian GNU/Linux. 373 374 # The values below were generated by the following R-script: 375 # > library(mnormt) 376 # > x <- seq(0, 2, length=5) 377 # > y <- 3*x - 2 378 # > mu <- c(1, 3) 379 # > Sigma <- matrix(c(1,2,2,5), 2, 2) 380 # > r_cdf2 <- pmnorm(cbind(x,y), mu, Sigma) 381 r_cdf2 = np.array([0.01262147, 0.05838989, 0.18389571, 382 0.40696599, 0.66470577]) 383 384 r2 = np.array([x, y]).T 385 386 mean2 = np.array([1, 3], 'd') 387 cov2 = np.array([[1, 2], [2, 5]], 'd') 388 389 cdf2 = multivariate_normal.cdf(r2, mean2, cov2) 390 assert_allclose(cdf2, r_cdf2, atol=1e-5) 391 392 def test_multivariate_normal_rvs_zero_covariance(self): 393 mean = np.zeros(2) 394 covariance = np.zeros((2, 2)) 395 model = multivariate_normal(mean, covariance, allow_singular=True) 396 sample = model.rvs() 397 assert_equal(sample, [0, 0]) 398 399 def test_rvs_shape(self): 400 # Check that rvs parses the mean and covariance correctly, and returns 401 # an array of the right shape 402 N = 300 403 d = 4 404 sample = multivariate_normal.rvs(mean=np.zeros(d), cov=1, size=N) 405 assert_equal(sample.shape, (N, d)) 406 407 sample = multivariate_normal.rvs(mean=None, 408 cov=np.array([[2, .1], [.1, 1]]), 409 size=N) 410 assert_equal(sample.shape, (N, 2)) 411 412 u = multivariate_normal(mean=0, cov=1) 413 sample = u.rvs(N) 414 assert_equal(sample.shape, (N, )) 415 416 def test_large_sample(self): 417 # Generate large sample and compare sample mean and sample covariance 418 # with mean and covariance matrix. 419 420 np.random.seed(2846) 421 422 n = 3 423 mean = np.random.randn(n) 424 M = np.random.randn(n, n) 425 cov = np.dot(M, M.T) 426 size = 5000 427 428 sample = multivariate_normal.rvs(mean, cov, size) 429 430 assert_allclose(numpy.cov(sample.T), cov, rtol=1e-1) 431 assert_allclose(sample.mean(0), mean, rtol=1e-1) 432 433 def test_entropy(self): 434 np.random.seed(2846) 435 436 n = 3 437 mean = np.random.randn(n) 438 M = np.random.randn(n, n) 439 cov = np.dot(M, M.T) 440 441 rv = multivariate_normal(mean, cov) 442 443 # Check that frozen distribution agrees with entropy function 444 assert_almost_equal(rv.entropy(), multivariate_normal.entropy(mean, cov)) 445 # Compare entropy with manually computed expression involving 446 # the sum of the logs of the eigenvalues of the covariance matrix 447 eigs = np.linalg.eig(cov)[0] 448 desired = 1 / 2 * (n * (np.log(2 * np.pi) + 1) + np.sum(np.log(eigs))) 449 assert_almost_equal(desired, rv.entropy()) 450 451 def test_lnB(self): 452 alpha = np.array([1, 1, 1]) 453 desired = .5 # e^lnB = 1/2 for [1, 1, 1] 454 455 assert_almost_equal(np.exp(_lnB(alpha)), desired) 456 457class TestMatrixNormal: 458 459 def test_bad_input(self): 460 # Check that bad inputs raise errors 461 num_rows = 4 462 num_cols = 3 463 M = np.full((num_rows,num_cols), 0.3) 464 U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5) 465 V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3) 466 467 # Incorrect dimensions 468 assert_raises(ValueError, matrix_normal, np.zeros((5,4,3))) 469 assert_raises(ValueError, matrix_normal, M, np.zeros(10), V) 470 assert_raises(ValueError, matrix_normal, M, U, np.zeros(10)) 471 assert_raises(ValueError, matrix_normal, M, U, U) 472 assert_raises(ValueError, matrix_normal, M, V, V) 473 assert_raises(ValueError, matrix_normal, M.T, U, V) 474 475 # Singular covariance 476 e = np.linalg.LinAlgError 477 assert_raises(e, matrix_normal, M, U, np.ones((num_cols, num_cols))) 478 assert_raises(e, matrix_normal, M, np.ones((num_rows, num_rows)), V) 479 480 def test_default_inputs(self): 481 # Check that default argument handling works 482 num_rows = 4 483 num_cols = 3 484 M = np.full((num_rows,num_cols), 0.3) 485 U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5) 486 V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3) 487 Z = np.zeros((num_rows, num_cols)) 488 Zr = np.zeros((num_rows, 1)) 489 Zc = np.zeros((1, num_cols)) 490 Ir = np.identity(num_rows) 491 Ic = np.identity(num_cols) 492 I1 = np.identity(1) 493 494 assert_equal(matrix_normal.rvs(mean=M, rowcov=U, colcov=V).shape, 495 (num_rows, num_cols)) 496 assert_equal(matrix_normal.rvs(mean=M).shape, 497 (num_rows, num_cols)) 498 assert_equal(matrix_normal.rvs(rowcov=U).shape, 499 (num_rows, 1)) 500 assert_equal(matrix_normal.rvs(colcov=V).shape, 501 (1, num_cols)) 502 assert_equal(matrix_normal.rvs(mean=M, colcov=V).shape, 503 (num_rows, num_cols)) 504 assert_equal(matrix_normal.rvs(mean=M, rowcov=U).shape, 505 (num_rows, num_cols)) 506 assert_equal(matrix_normal.rvs(rowcov=U, colcov=V).shape, 507 (num_rows, num_cols)) 508 509 assert_equal(matrix_normal(mean=M).rowcov, Ir) 510 assert_equal(matrix_normal(mean=M).colcov, Ic) 511 assert_equal(matrix_normal(rowcov=U).mean, Zr) 512 assert_equal(matrix_normal(rowcov=U).colcov, I1) 513 assert_equal(matrix_normal(colcov=V).mean, Zc) 514 assert_equal(matrix_normal(colcov=V).rowcov, I1) 515 assert_equal(matrix_normal(mean=M, rowcov=U).colcov, Ic) 516 assert_equal(matrix_normal(mean=M, colcov=V).rowcov, Ir) 517 assert_equal(matrix_normal(rowcov=U, colcov=V).mean, Z) 518 519 def test_covariance_expansion(self): 520 # Check that covariance can be specified with scalar or vector 521 num_rows = 4 522 num_cols = 3 523 M = np.full((num_rows, num_cols), 0.3) 524 Uv = np.full(num_rows, 0.2) 525 Us = 0.2 526 Vv = np.full(num_cols, 0.1) 527 Vs = 0.1 528 529 Ir = np.identity(num_rows) 530 Ic = np.identity(num_cols) 531 532 assert_equal(matrix_normal(mean=M, rowcov=Uv, colcov=Vv).rowcov, 533 0.2*Ir) 534 assert_equal(matrix_normal(mean=M, rowcov=Uv, colcov=Vv).colcov, 535 0.1*Ic) 536 assert_equal(matrix_normal(mean=M, rowcov=Us, colcov=Vs).rowcov, 537 0.2*Ir) 538 assert_equal(matrix_normal(mean=M, rowcov=Us, colcov=Vs).colcov, 539 0.1*Ic) 540 541 def test_frozen_matrix_normal(self): 542 for i in range(1,5): 543 for j in range(1,5): 544 M = np.full((i,j), 0.3) 545 U = 0.5 * np.identity(i) + np.full((i,i), 0.5) 546 V = 0.7 * np.identity(j) + np.full((j,j), 0.3) 547 548 frozen = matrix_normal(mean=M, rowcov=U, colcov=V) 549 550 rvs1 = frozen.rvs(random_state=1234) 551 rvs2 = matrix_normal.rvs(mean=M, rowcov=U, colcov=V, 552 random_state=1234) 553 assert_equal(rvs1, rvs2) 554 555 X = frozen.rvs(random_state=1234) 556 557 pdf1 = frozen.pdf(X) 558 pdf2 = matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V) 559 assert_equal(pdf1, pdf2) 560 561 logpdf1 = frozen.logpdf(X) 562 logpdf2 = matrix_normal.logpdf(X, mean=M, rowcov=U, colcov=V) 563 assert_equal(logpdf1, logpdf2) 564 565 def test_matches_multivariate(self): 566 # Check that the pdfs match those obtained by vectorising and 567 # treating as a multivariate normal. 568 for i in range(1,5): 569 for j in range(1,5): 570 M = np.full((i,j), 0.3) 571 U = 0.5 * np.identity(i) + np.full((i,i), 0.5) 572 V = 0.7 * np.identity(j) + np.full((j,j), 0.3) 573 574 frozen = matrix_normal(mean=M, rowcov=U, colcov=V) 575 X = frozen.rvs(random_state=1234) 576 pdf1 = frozen.pdf(X) 577 logpdf1 = frozen.logpdf(X) 578 579 vecX = X.T.flatten() 580 vecM = M.T.flatten() 581 cov = np.kron(V,U) 582 pdf2 = multivariate_normal.pdf(vecX, mean=vecM, cov=cov) 583 logpdf2 = multivariate_normal.logpdf(vecX, mean=vecM, cov=cov) 584 585 assert_allclose(pdf1, pdf2, rtol=1E-10) 586 assert_allclose(logpdf1, logpdf2, rtol=1E-10) 587 588 def test_array_input(self): 589 # Check array of inputs has the same output as the separate entries. 590 num_rows = 4 591 num_cols = 3 592 M = np.full((num_rows,num_cols), 0.3) 593 U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5) 594 V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3) 595 N = 10 596 597 frozen = matrix_normal(mean=M, rowcov=U, colcov=V) 598 X1 = frozen.rvs(size=N, random_state=1234) 599 X2 = frozen.rvs(size=N, random_state=4321) 600 X = np.concatenate((X1[np.newaxis,:,:,:],X2[np.newaxis,:,:,:]), axis=0) 601 assert_equal(X.shape, (2, N, num_rows, num_cols)) 602 603 array_logpdf = frozen.logpdf(X) 604 assert_equal(array_logpdf.shape, (2, N)) 605 for i in range(2): 606 for j in range(N): 607 separate_logpdf = matrix_normal.logpdf(X[i,j], mean=M, 608 rowcov=U, colcov=V) 609 assert_allclose(separate_logpdf, array_logpdf[i,j], 1E-10) 610 611 def test_moments(self): 612 # Check that the sample moments match the parameters 613 num_rows = 4 614 num_cols = 3 615 M = np.full((num_rows,num_cols), 0.3) 616 U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5) 617 V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3) 618 N = 1000 619 620 frozen = matrix_normal(mean=M, rowcov=U, colcov=V) 621 X = frozen.rvs(size=N, random_state=1234) 622 623 sample_mean = np.mean(X,axis=0) 624 assert_allclose(sample_mean, M, atol=0.1) 625 626 sample_colcov = np.cov(X.reshape(N*num_rows,num_cols).T) 627 assert_allclose(sample_colcov, V, atol=0.1) 628 629 sample_rowcov = np.cov(np.swapaxes(X,1,2).reshape( 630 N*num_cols,num_rows).T) 631 assert_allclose(sample_rowcov, U, atol=0.1) 632 633class TestDirichlet: 634 635 def test_frozen_dirichlet(self): 636 np.random.seed(2846) 637 638 n = np.random.randint(1, 32) 639 alpha = np.random.uniform(10e-10, 100, n) 640 641 d = dirichlet(alpha) 642 643 assert_equal(d.var(), dirichlet.var(alpha)) 644 assert_equal(d.mean(), dirichlet.mean(alpha)) 645 assert_equal(d.entropy(), dirichlet.entropy(alpha)) 646 num_tests = 10 647 for i in range(num_tests): 648 x = np.random.uniform(10e-10, 100, n) 649 x /= np.sum(x) 650 assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha)) 651 assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha)) 652 653 def test_numpy_rvs_shape_compatibility(self): 654 np.random.seed(2846) 655 alpha = np.array([1.0, 2.0, 3.0]) 656 x = np.random.dirichlet(alpha, size=7) 657 assert_equal(x.shape, (7, 3)) 658 assert_raises(ValueError, dirichlet.pdf, x, alpha) 659 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 660 dirichlet.pdf(x.T, alpha) 661 dirichlet.pdf(x.T[:-1], alpha) 662 dirichlet.logpdf(x.T, alpha) 663 dirichlet.logpdf(x.T[:-1], alpha) 664 665 def test_alpha_with_zeros(self): 666 np.random.seed(2846) 667 alpha = [1.0, 0.0, 3.0] 668 # don't pass invalid alpha to np.random.dirichlet 669 x = np.random.dirichlet(np.maximum(1e-9, alpha), size=7).T 670 assert_raises(ValueError, dirichlet.pdf, x, alpha) 671 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 672 673 def test_alpha_with_negative_entries(self): 674 np.random.seed(2846) 675 alpha = [1.0, -2.0, 3.0] 676 # don't pass invalid alpha to np.random.dirichlet 677 x = np.random.dirichlet(np.maximum(1e-9, alpha), size=7).T 678 assert_raises(ValueError, dirichlet.pdf, x, alpha) 679 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 680 681 def test_data_with_zeros(self): 682 alpha = np.array([1.0, 2.0, 3.0, 4.0]) 683 x = np.array([0.1, 0.0, 0.2, 0.7]) 684 dirichlet.pdf(x, alpha) 685 dirichlet.logpdf(x, alpha) 686 alpha = np.array([1.0, 1.0, 1.0, 1.0]) 687 assert_almost_equal(dirichlet.pdf(x, alpha), 6) 688 assert_almost_equal(dirichlet.logpdf(x, alpha), np.log(6)) 689 690 def test_data_with_zeros_and_small_alpha(self): 691 alpha = np.array([1.0, 0.5, 3.0, 4.0]) 692 x = np.array([0.1, 0.0, 0.2, 0.7]) 693 assert_raises(ValueError, dirichlet.pdf, x, alpha) 694 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 695 696 def test_data_with_negative_entries(self): 697 alpha = np.array([1.0, 2.0, 3.0, 4.0]) 698 x = np.array([0.1, -0.1, 0.3, 0.7]) 699 assert_raises(ValueError, dirichlet.pdf, x, alpha) 700 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 701 702 def test_data_with_too_large_entries(self): 703 alpha = np.array([1.0, 2.0, 3.0, 4.0]) 704 x = np.array([0.1, 1.1, 0.3, 0.7]) 705 assert_raises(ValueError, dirichlet.pdf, x, alpha) 706 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 707 708 def test_data_too_deep_c(self): 709 alpha = np.array([1.0, 2.0, 3.0]) 710 x = np.full((2, 7, 7), 1 / 14) 711 assert_raises(ValueError, dirichlet.pdf, x, alpha) 712 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 713 714 def test_alpha_too_deep(self): 715 alpha = np.array([[1.0, 2.0], [3.0, 4.0]]) 716 x = np.full((2, 2, 7), 1 / 4) 717 assert_raises(ValueError, dirichlet.pdf, x, alpha) 718 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 719 720 def test_alpha_correct_depth(self): 721 alpha = np.array([1.0, 2.0, 3.0]) 722 x = np.full((3, 7), 1 / 3) 723 dirichlet.pdf(x, alpha) 724 dirichlet.logpdf(x, alpha) 725 726 def test_non_simplex_data(self): 727 alpha = np.array([1.0, 2.0, 3.0]) 728 x = np.full((3, 7), 1 / 2) 729 assert_raises(ValueError, dirichlet.pdf, x, alpha) 730 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 731 732 def test_data_vector_too_short(self): 733 alpha = np.array([1.0, 2.0, 3.0, 4.0]) 734 x = np.full((2, 7), 1 / 2) 735 assert_raises(ValueError, dirichlet.pdf, x, alpha) 736 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 737 738 def test_data_vector_too_long(self): 739 alpha = np.array([1.0, 2.0, 3.0, 4.0]) 740 x = np.full((5, 7), 1 / 5) 741 assert_raises(ValueError, dirichlet.pdf, x, alpha) 742 assert_raises(ValueError, dirichlet.logpdf, x, alpha) 743 744 def test_mean_and_var(self): 745 alpha = np.array([1., 0.8, 0.2]) 746 d = dirichlet(alpha) 747 748 expected_var = [1. / 12., 0.08, 0.03] 749 expected_mean = [0.5, 0.4, 0.1] 750 751 assert_array_almost_equal(d.var(), expected_var) 752 assert_array_almost_equal(d.mean(), expected_mean) 753 754 def test_scalar_values(self): 755 alpha = np.array([0.2]) 756 d = dirichlet(alpha) 757 758 # For alpha of length 1, mean and var should be scalar instead of array 759 assert_equal(d.mean().ndim, 0) 760 assert_equal(d.var().ndim, 0) 761 762 assert_equal(d.pdf([1.]).ndim, 0) 763 assert_equal(d.logpdf([1.]).ndim, 0) 764 765 def test_K_and_K_minus_1_calls_equal(self): 766 # Test that calls with K and K-1 entries yield the same results. 767 768 np.random.seed(2846) 769 770 n = np.random.randint(1, 32) 771 alpha = np.random.uniform(10e-10, 100, n) 772 773 d = dirichlet(alpha) 774 num_tests = 10 775 for i in range(num_tests): 776 x = np.random.uniform(10e-10, 100, n) 777 x /= np.sum(x) 778 assert_almost_equal(d.pdf(x[:-1]), d.pdf(x)) 779 780 def test_multiple_entry_calls(self): 781 # Test that calls with multiple x vectors as matrix work 782 np.random.seed(2846) 783 784 n = np.random.randint(1, 32) 785 alpha = np.random.uniform(10e-10, 100, n) 786 d = dirichlet(alpha) 787 788 num_tests = 10 789 num_multiple = 5 790 xm = None 791 for i in range(num_tests): 792 for m in range(num_multiple): 793 x = np.random.uniform(10e-10, 100, n) 794 x /= np.sum(x) 795 if xm is not None: 796 xm = np.vstack((xm, x)) 797 else: 798 xm = x 799 rm = d.pdf(xm.T) 800 rs = None 801 for xs in xm: 802 r = d.pdf(xs) 803 if rs is not None: 804 rs = np.append(rs, r) 805 else: 806 rs = r 807 assert_array_almost_equal(rm, rs) 808 809 def test_2D_dirichlet_is_beta(self): 810 np.random.seed(2846) 811 812 alpha = np.random.uniform(10e-10, 100, 2) 813 d = dirichlet(alpha) 814 b = beta(alpha[0], alpha[1]) 815 816 num_tests = 10 817 for i in range(num_tests): 818 x = np.random.uniform(10e-10, 100, 2) 819 x /= np.sum(x) 820 assert_almost_equal(b.pdf(x), d.pdf([x])) 821 822 assert_almost_equal(b.mean(), d.mean()[0]) 823 assert_almost_equal(b.var(), d.var()[0]) 824 825 826def test_multivariate_normal_dimensions_mismatch(): 827 # Regression test for GH #3493. Check that setting up a PDF with a mean of 828 # length M and a covariance matrix of size (N, N), where M != N, raises a 829 # ValueError with an informative error message. 830 mu = np.array([0.0, 0.0]) 831 sigma = np.array([[1.0]]) 832 833 assert_raises(ValueError, multivariate_normal, mu, sigma) 834 835 # A simple check that the right error message was passed along. Checking 836 # that the entire message is there, word for word, would be somewhat 837 # fragile, so we just check for the leading part. 838 try: 839 multivariate_normal(mu, sigma) 840 except ValueError as e: 841 msg = "Dimension mismatch" 842 assert_equal(str(e)[:len(msg)], msg) 843 844 845class TestWishart: 846 def test_scale_dimensions(self): 847 # Test that we can call the Wishart with various scale dimensions 848 849 # Test case: dim=1, scale=1 850 true_scale = np.array(1, ndmin=2) 851 scales = [ 852 1, # scalar 853 [1], # iterable 854 np.array(1), # 0-dim 855 np.r_[1], # 1-dim 856 np.array(1, ndmin=2) # 2-dim 857 ] 858 for scale in scales: 859 w = wishart(1, scale) 860 assert_equal(w.scale, true_scale) 861 assert_equal(w.scale.shape, true_scale.shape) 862 863 # Test case: dim=2, scale=[[1,0] 864 # [0,2] 865 true_scale = np.array([[1,0], 866 [0,2]]) 867 scales = [ 868 [1,2], # iterable 869 np.r_[1,2], # 1-dim 870 np.array([[1,0], # 2-dim 871 [0,2]]) 872 ] 873 for scale in scales: 874 w = wishart(2, scale) 875 assert_equal(w.scale, true_scale) 876 assert_equal(w.scale.shape, true_scale.shape) 877 878 # We cannot call with a df < dim - 1 879 assert_raises(ValueError, wishart, 1, np.eye(2)) 880 881 # But we can call with dim - 1 < df < dim 882 wishart(1.1, np.eye(2)) # no error 883 # see gh-5562 884 885 # We cannot call with a 3-dimension array 886 scale = np.array(1, ndmin=3) 887 assert_raises(ValueError, wishart, 1, scale) 888 889 def test_quantile_dimensions(self): 890 # Test that we can call the Wishart rvs with various quantile dimensions 891 892 # If dim == 1, consider x.shape = [1,1,1] 893 X = [ 894 1, # scalar 895 [1], # iterable 896 np.array(1), # 0-dim 897 np.r_[1], # 1-dim 898 np.array(1, ndmin=2), # 2-dim 899 np.array([1], ndmin=3) # 3-dim 900 ] 901 902 w = wishart(1,1) 903 density = w.pdf(np.array(1, ndmin=3)) 904 for x in X: 905 assert_equal(w.pdf(x), density) 906 907 # If dim == 1, consider x.shape = [1,1,*] 908 X = [ 909 [1,2,3], # iterable 910 np.r_[1,2,3], # 1-dim 911 np.array([1,2,3], ndmin=3) # 3-dim 912 ] 913 914 w = wishart(1,1) 915 density = w.pdf(np.array([1,2,3], ndmin=3)) 916 for x in X: 917 assert_equal(w.pdf(x), density) 918 919 # If dim == 2, consider x.shape = [2,2,1] 920 # where x[:,:,*] = np.eye(1)*2 921 X = [ 922 2, # scalar 923 [2,2], # iterable 924 np.array(2), # 0-dim 925 np.r_[2,2], # 1-dim 926 np.array([[2,0], 927 [0,2]]), # 2-dim 928 np.array([[2,0], 929 [0,2]])[:,:,np.newaxis] # 3-dim 930 ] 931 932 w = wishart(2,np.eye(2)) 933 density = w.pdf(np.array([[2,0], 934 [0,2]])[:,:,np.newaxis]) 935 for x in X: 936 assert_equal(w.pdf(x), density) 937 938 def test_frozen(self): 939 # Test that the frozen and non-frozen Wishart gives the same answers 940 941 # Construct an arbitrary positive definite scale matrix 942 dim = 4 943 scale = np.diag(np.arange(dim)+1) 944 scale[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim-1) // 2) 945 scale = np.dot(scale.T, scale) 946 947 # Construct a collection of positive definite matrices to test the PDF 948 X = [] 949 for i in range(5): 950 x = np.diag(np.arange(dim)+(i+1)**2) 951 x[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim-1) // 2) 952 x = np.dot(x.T, x) 953 X.append(x) 954 X = np.array(X).T 955 956 # Construct a 1D and 2D set of parameters 957 parameters = [ 958 (10, 1, np.linspace(0.1, 10, 5)), # 1D case 959 (10, scale, X) 960 ] 961 962 for (df, scale, x) in parameters: 963 w = wishart(df, scale) 964 assert_equal(w.var(), wishart.var(df, scale)) 965 assert_equal(w.mean(), wishart.mean(df, scale)) 966 assert_equal(w.mode(), wishart.mode(df, scale)) 967 assert_equal(w.entropy(), wishart.entropy(df, scale)) 968 assert_equal(w.pdf(x), wishart.pdf(x, df, scale)) 969 970 def test_1D_is_chisquared(self): 971 # The 1-dimensional Wishart with an identity scale matrix is just a 972 # chi-squared distribution. 973 # Test variance, mean, entropy, pdf 974 # Kolgomorov-Smirnov test for rvs 975 np.random.seed(482974) 976 977 sn = 500 978 dim = 1 979 scale = np.eye(dim) 980 981 df_range = np.arange(1, 10, 2, dtype=float) 982 X = np.linspace(0.1,10,num=10) 983 for df in df_range: 984 w = wishart(df, scale) 985 c = chi2(df) 986 987 # Statistics 988 assert_allclose(w.var(), c.var()) 989 assert_allclose(w.mean(), c.mean()) 990 assert_allclose(w.entropy(), c.entropy()) 991 992 # PDF 993 assert_allclose(w.pdf(X), c.pdf(X)) 994 995 # rvs 996 rvs = w.rvs(size=sn) 997 args = (df,) 998 alpha = 0.01 999 check_distribution_rvs('chi2', args, alpha, rvs) 1000 1001 def test_is_scaled_chisquared(self): 1002 # The 2-dimensional Wishart with an arbitrary scale matrix can be 1003 # transformed to a scaled chi-squared distribution. 1004 # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have 1005 # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)` 1006 np.random.seed(482974) 1007 1008 sn = 500 1009 df = 10 1010 dim = 4 1011 # Construct an arbitrary positive definite matrix 1012 scale = np.diag(np.arange(4)+1) 1013 scale[np.tril_indices(4, k=-1)] = np.arange(6) 1014 scale = np.dot(scale.T, scale) 1015 # Use :math:`\lambda = [1, \dots, 1]'` 1016 lamda = np.ones((dim,1)) 1017 sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze() 1018 w = wishart(df, sigma_lamda) 1019 c = chi2(df, scale=sigma_lamda) 1020 1021 # Statistics 1022 assert_allclose(w.var(), c.var()) 1023 assert_allclose(w.mean(), c.mean()) 1024 assert_allclose(w.entropy(), c.entropy()) 1025 1026 # PDF 1027 X = np.linspace(0.1,10,num=10) 1028 assert_allclose(w.pdf(X), c.pdf(X)) 1029 1030 # rvs 1031 rvs = w.rvs(size=sn) 1032 args = (df,0,sigma_lamda) 1033 alpha = 0.01 1034 check_distribution_rvs('chi2', args, alpha, rvs) 1035 1036class TestMultinomial: 1037 def test_logpmf(self): 1038 vals1 = multinomial.logpmf((3,4), 7, (0.3, 0.7)) 1039 assert_allclose(vals1, -1.483270127243324, rtol=1e-8) 1040 1041 vals2 = multinomial.logpmf([3, 4], 0, [.3, .7]) 1042 assert_allclose(vals2, np.NAN, rtol=1e-8) 1043 1044 vals3 = multinomial.logpmf([3, 4], 0, [-2, 3]) 1045 assert_allclose(vals3, np.NAN, rtol=1e-8) 1046 1047 def test_reduces_binomial(self): 1048 # test that the multinomial pmf reduces to the binomial pmf in the 2d 1049 # case 1050 val1 = multinomial.logpmf((3, 4), 7, (0.3, 0.7)) 1051 val2 = binom.logpmf(3, 7, 0.3) 1052 assert_allclose(val1, val2, rtol=1e-8) 1053 1054 val1 = multinomial.pmf((6, 8), 14, (0.1, 0.9)) 1055 val2 = binom.pmf(6, 14, 0.1) 1056 assert_allclose(val1, val2, rtol=1e-8) 1057 1058 def test_R(self): 1059 # test against the values produced by this R code 1060 # (https://stat.ethz.ch/R-manual/R-devel/library/stats/html/Multinom.html) 1061 # X <- t(as.matrix(expand.grid(0:3, 0:3))); X <- X[, colSums(X) <= 3] 1062 # X <- rbind(X, 3:3 - colSums(X)); dimnames(X) <- list(letters[1:3], NULL) 1063 # X 1064 # apply(X, 2, function(x) dmultinom(x, prob = c(1,2,5))) 1065 1066 n, p = 3, [1./8, 2./8, 5./8] 1067 r_vals = {(0, 0, 3): 0.244140625, (1, 0, 2): 0.146484375, 1068 (2, 0, 1): 0.029296875, (3, 0, 0): 0.001953125, 1069 (0, 1, 2): 0.292968750, (1, 1, 1): 0.117187500, 1070 (2, 1, 0): 0.011718750, (0, 2, 1): 0.117187500, 1071 (1, 2, 0): 0.023437500, (0, 3, 0): 0.015625000} 1072 for x in r_vals: 1073 assert_allclose(multinomial.pmf(x, n, p), r_vals[x], atol=1e-14) 1074 1075 def test_rvs_np(self): 1076 # test that .rvs agrees w/numpy 1077 sc_rvs = multinomial.rvs(3, [1/4.]*3, size=7, random_state=123) 1078 rndm = np.random.RandomState(123) 1079 np_rvs = rndm.multinomial(3, [1/4.]*3, size=7) 1080 assert_equal(sc_rvs, np_rvs) 1081 1082 def test_pmf(self): 1083 vals0 = multinomial.pmf((5,), 5, (1,)) 1084 assert_allclose(vals0, 1, rtol=1e-8) 1085 1086 vals1 = multinomial.pmf((3,4), 7, (.3, .7)) 1087 assert_allclose(vals1, .22689449999999994, rtol=1e-8) 1088 1089 vals2 = multinomial.pmf([[[3,5],[0,8]], [[-1, 9], [1, 1]]], 8, 1090 (.1, .9)) 1091 assert_allclose(vals2, [[.03306744, .43046721], [0, 0]], rtol=1e-8) 1092 1093 x = np.empty((0,2), dtype=np.float64) 1094 vals3 = multinomial.pmf(x, 4, (.3, .7)) 1095 assert_equal(vals3, np.empty([], dtype=np.float64)) 1096 1097 vals4 = multinomial.pmf([1,2], 4, (.3, .7)) 1098 assert_allclose(vals4, 0, rtol=1e-8) 1099 1100 vals5 = multinomial.pmf([3, 3, 0], 6, [2/3.0, 1/3.0, 0]) 1101 assert_allclose(vals5, 0.219478737997, rtol=1e-8) 1102 1103 def test_pmf_broadcasting(self): 1104 vals0 = multinomial.pmf([1, 2], 3, [[.1, .9], [.2, .8]]) 1105 assert_allclose(vals0, [.243, .384], rtol=1e-8) 1106 1107 vals1 = multinomial.pmf([1, 2], [3, 4], [.1, .9]) 1108 assert_allclose(vals1, [.243, 0], rtol=1e-8) 1109 1110 vals2 = multinomial.pmf([[[1, 2], [1, 1]]], 3, [.1, .9]) 1111 assert_allclose(vals2, [[.243, 0]], rtol=1e-8) 1112 1113 vals3 = multinomial.pmf([1, 2], [[[3], [4]]], [.1, .9]) 1114 assert_allclose(vals3, [[[.243], [0]]], rtol=1e-8) 1115 1116 vals4 = multinomial.pmf([[1, 2], [1,1]], [[[[3]]]], [.1, .9]) 1117 assert_allclose(vals4, [[[[.243, 0]]]], rtol=1e-8) 1118 1119 def test_cov(self): 1120 cov1 = multinomial.cov(5, (.2, .3, .5)) 1121 cov2 = [[5*.2*.8, -5*.2*.3, -5*.2*.5], 1122 [-5*.3*.2, 5*.3*.7, -5*.3*.5], 1123 [-5*.5*.2, -5*.5*.3, 5*.5*.5]] 1124 assert_allclose(cov1, cov2, rtol=1e-8) 1125 1126 def test_cov_broadcasting(self): 1127 cov1 = multinomial.cov(5, [[.1, .9], [.2, .8]]) 1128 cov2 = [[[.45, -.45],[-.45, .45]], [[.8, -.8], [-.8, .8]]] 1129 assert_allclose(cov1, cov2, rtol=1e-8) 1130 1131 cov3 = multinomial.cov([4, 5], [.1, .9]) 1132 cov4 = [[[.36, -.36], [-.36, .36]], [[.45, -.45], [-.45, .45]]] 1133 assert_allclose(cov3, cov4, rtol=1e-8) 1134 1135 cov5 = multinomial.cov([4, 5], [[.3, .7], [.4, .6]]) 1136 cov6 = [[[4*.3*.7, -4*.3*.7], [-4*.3*.7, 4*.3*.7]], 1137 [[5*.4*.6, -5*.4*.6], [-5*.4*.6, 5*.4*.6]]] 1138 assert_allclose(cov5, cov6, rtol=1e-8) 1139 1140 def test_entropy(self): 1141 # this is equivalent to a binomial distribution with n=2, so the 1142 # entropy .77899774929 is easily computed "by hand" 1143 ent0 = multinomial.entropy(2, [.2, .8]) 1144 assert_allclose(ent0, binom.entropy(2, .2), rtol=1e-8) 1145 1146 def test_entropy_broadcasting(self): 1147 ent0 = multinomial.entropy([2, 3], [.2, .3]) 1148 assert_allclose(ent0, [binom.entropy(2, .2), binom.entropy(3, .2)], 1149 rtol=1e-8) 1150 1151 ent1 = multinomial.entropy([7, 8], [[.3, .7], [.4, .6]]) 1152 assert_allclose(ent1, [binom.entropy(7, .3), binom.entropy(8, .4)], 1153 rtol=1e-8) 1154 1155 ent2 = multinomial.entropy([[7], [8]], [[.3, .7], [.4, .6]]) 1156 assert_allclose(ent2, 1157 [[binom.entropy(7, .3), binom.entropy(7, .4)], 1158 [binom.entropy(8, .3), binom.entropy(8, .4)]], 1159 rtol=1e-8) 1160 1161 def test_mean(self): 1162 mean1 = multinomial.mean(5, [.2, .8]) 1163 assert_allclose(mean1, [5*.2, 5*.8], rtol=1e-8) 1164 1165 def test_mean_broadcasting(self): 1166 mean1 = multinomial.mean([5, 6], [.2, .8]) 1167 assert_allclose(mean1, [[5*.2, 5*.8], [6*.2, 6*.8]], rtol=1e-8) 1168 1169 def test_frozen(self): 1170 # The frozen distribution should agree with the regular one 1171 np.random.seed(1234) 1172 n = 12 1173 pvals = (.1, .2, .3, .4) 1174 x = [[0,0,0,12],[0,0,1,11],[0,1,1,10],[1,1,1,9],[1,1,2,8]] 1175 x = np.asarray(x, dtype=np.float64) 1176 mn_frozen = multinomial(n, pvals) 1177 assert_allclose(mn_frozen.pmf(x), multinomial.pmf(x, n, pvals)) 1178 assert_allclose(mn_frozen.logpmf(x), multinomial.logpmf(x, n, pvals)) 1179 assert_allclose(mn_frozen.entropy(), multinomial.entropy(n, pvals)) 1180 1181class TestInvwishart: 1182 def test_frozen(self): 1183 # Test that the frozen and non-frozen inverse Wishart gives the same 1184 # answers 1185 1186 # Construct an arbitrary positive definite scale matrix 1187 dim = 4 1188 scale = np.diag(np.arange(dim)+1) 1189 scale[np.tril_indices(dim, k=-1)] = np.arange(dim*(dim-1)/2) 1190 scale = np.dot(scale.T, scale) 1191 1192 # Construct a collection of positive definite matrices to test the PDF 1193 X = [] 1194 for i in range(5): 1195 x = np.diag(np.arange(dim)+(i+1)**2) 1196 x[np.tril_indices(dim, k=-1)] = np.arange(dim*(dim-1)/2) 1197 x = np.dot(x.T, x) 1198 X.append(x) 1199 X = np.array(X).T 1200 1201 # Construct a 1D and 2D set of parameters 1202 parameters = [ 1203 (10, 1, np.linspace(0.1, 10, 5)), # 1D case 1204 (10, scale, X) 1205 ] 1206 1207 for (df, scale, x) in parameters: 1208 iw = invwishart(df, scale) 1209 assert_equal(iw.var(), invwishart.var(df, scale)) 1210 assert_equal(iw.mean(), invwishart.mean(df, scale)) 1211 assert_equal(iw.mode(), invwishart.mode(df, scale)) 1212 assert_allclose(iw.pdf(x), invwishart.pdf(x, df, scale)) 1213 1214 def test_1D_is_invgamma(self): 1215 # The 1-dimensional inverse Wishart with an identity scale matrix is 1216 # just an inverse gamma distribution. 1217 # Test variance, mean, pdf 1218 # Kolgomorov-Smirnov test for rvs 1219 np.random.seed(482974) 1220 1221 sn = 500 1222 dim = 1 1223 scale = np.eye(dim) 1224 1225 df_range = np.arange(5, 20, 2, dtype=float) 1226 X = np.linspace(0.1,10,num=10) 1227 for df in df_range: 1228 iw = invwishart(df, scale) 1229 ig = invgamma(df/2, scale=1./2) 1230 1231 # Statistics 1232 assert_allclose(iw.var(), ig.var()) 1233 assert_allclose(iw.mean(), ig.mean()) 1234 1235 # PDF 1236 assert_allclose(iw.pdf(X), ig.pdf(X)) 1237 1238 # rvs 1239 rvs = iw.rvs(size=sn) 1240 args = (df/2, 0, 1./2) 1241 alpha = 0.01 1242 check_distribution_rvs('invgamma', args, alpha, rvs) 1243 1244 def test_wishart_invwishart_2D_rvs(self): 1245 dim = 3 1246 df = 10 1247 1248 # Construct a simple non-diagonal positive definite matrix 1249 scale = np.eye(dim) 1250 scale[0,1] = 0.5 1251 scale[1,0] = 0.5 1252 1253 # Construct frozen Wishart and inverse Wishart random variables 1254 w = wishart(df, scale) 1255 iw = invwishart(df, scale) 1256 1257 # Get the generated random variables from a known seed 1258 np.random.seed(248042) 1259 w_rvs = wishart.rvs(df, scale) 1260 np.random.seed(248042) 1261 frozen_w_rvs = w.rvs() 1262 np.random.seed(248042) 1263 iw_rvs = invwishart.rvs(df, scale) 1264 np.random.seed(248042) 1265 frozen_iw_rvs = iw.rvs() 1266 1267 # Manually calculate what it should be, based on the Bartlett (1933) 1268 # decomposition of a Wishart into D A A' D', where D is the Cholesky 1269 # factorization of the scale matrix and A is the lower triangular matrix 1270 # with the square root of chi^2 variates on the diagonal and N(0,1) 1271 # variates in the lower triangle. 1272 np.random.seed(248042) 1273 covariances = np.random.normal(size=3) 1274 variances = np.r_[ 1275 np.random.chisquare(df), 1276 np.random.chisquare(df-1), 1277 np.random.chisquare(df-2), 1278 ]**0.5 1279 1280 # Construct the lower-triangular A matrix 1281 A = np.diag(variances) 1282 A[np.tril_indices(dim, k=-1)] = covariances 1283 1284 # Wishart random variate 1285 D = np.linalg.cholesky(scale) 1286 DA = D.dot(A) 1287 manual_w_rvs = np.dot(DA, DA.T) 1288 1289 # inverse Wishart random variate 1290 # Supposing that the inverse wishart has scale matrix `scale`, then the 1291 # random variate is the inverse of a random variate drawn from a Wishart 1292 # distribution with scale matrix `inv_scale = np.linalg.inv(scale)` 1293 iD = np.linalg.cholesky(np.linalg.inv(scale)) 1294 iDA = iD.dot(A) 1295 manual_iw_rvs = np.linalg.inv(np.dot(iDA, iDA.T)) 1296 1297 # Test for equality 1298 assert_allclose(w_rvs, manual_w_rvs) 1299 assert_allclose(frozen_w_rvs, manual_w_rvs) 1300 assert_allclose(iw_rvs, manual_iw_rvs) 1301 assert_allclose(frozen_iw_rvs, manual_iw_rvs) 1302 1303 def test_cho_inv_batch(self): 1304 """Regression test for gh-8844.""" 1305 a0 = np.array([[2, 1, 0, 0.5], 1306 [1, 2, 0.5, 0.5], 1307 [0, 0.5, 3, 1], 1308 [0.5, 0.5, 1, 2]]) 1309 a1 = np.array([[2, -1, 0, 0.5], 1310 [-1, 2, 0.5, 0.5], 1311 [0, 0.5, 3, 1], 1312 [0.5, 0.5, 1, 4]]) 1313 a = np.array([a0, a1]) 1314 ainv = a.copy() 1315 _cho_inv_batch(ainv) 1316 ident = np.eye(4) 1317 assert_allclose(a[0].dot(ainv[0]), ident, atol=1e-15) 1318 assert_allclose(a[1].dot(ainv[1]), ident, atol=1e-15) 1319 1320 def test_logpdf_4x4(self): 1321 """Regression test for gh-8844.""" 1322 X = np.array([[2, 1, 0, 0.5], 1323 [1, 2, 0.5, 0.5], 1324 [0, 0.5, 3, 1], 1325 [0.5, 0.5, 1, 2]]) 1326 Psi = np.array([[9, 7, 3, 1], 1327 [7, 9, 5, 1], 1328 [3, 5, 8, 2], 1329 [1, 1, 2, 9]]) 1330 nu = 6 1331 prob = invwishart.logpdf(X, nu, Psi) 1332 # Explicit calculation from the formula on wikipedia. 1333 p = X.shape[0] 1334 sig, logdetX = np.linalg.slogdet(X) 1335 sig, logdetPsi = np.linalg.slogdet(Psi) 1336 M = np.linalg.solve(X, Psi) 1337 expected = ((nu/2)*logdetPsi 1338 - (nu*p/2)*np.log(2) 1339 - multigammaln(nu/2, p) 1340 - (nu + p + 1)/2*logdetX 1341 - 0.5*M.trace()) 1342 assert_allclose(prob, expected) 1343 1344 1345class TestSpecialOrthoGroup: 1346 def test_reproducibility(self): 1347 np.random.seed(514) 1348 x = special_ortho_group.rvs(3) 1349 expected = np.array([[-0.99394515, -0.04527879, 0.10011432], 1350 [0.04821555, -0.99846897, 0.02711042], 1351 [0.09873351, 0.03177334, 0.99460653]]) 1352 assert_array_almost_equal(x, expected) 1353 1354 random_state = np.random.RandomState(seed=514) 1355 x = special_ortho_group.rvs(3, random_state=random_state) 1356 assert_array_almost_equal(x, expected) 1357 1358 def test_invalid_dim(self): 1359 assert_raises(ValueError, special_ortho_group.rvs, None) 1360 assert_raises(ValueError, special_ortho_group.rvs, (2, 2)) 1361 assert_raises(ValueError, special_ortho_group.rvs, 1) 1362 assert_raises(ValueError, special_ortho_group.rvs, 2.5) 1363 1364 def test_frozen_matrix(self): 1365 dim = 7 1366 frozen = special_ortho_group(dim) 1367 1368 rvs1 = frozen.rvs(random_state=1234) 1369 rvs2 = special_ortho_group.rvs(dim, random_state=1234) 1370 1371 assert_equal(rvs1, rvs2) 1372 1373 def test_det_and_ortho(self): 1374 xs = [special_ortho_group.rvs(dim) 1375 for dim in range(2,12) 1376 for i in range(3)] 1377 1378 # Test that determinants are always +1 1379 dets = [np.linalg.det(x) for x in xs] 1380 assert_allclose(dets, [1.]*30, rtol=1e-13) 1381 1382 # Test that these are orthogonal matrices 1383 for x in xs: 1384 assert_array_almost_equal(np.dot(x, x.T), 1385 np.eye(x.shape[0])) 1386 1387 def test_haar(self): 1388 # Test that the distribution is constant under rotation 1389 # Every column should have the same distribution 1390 # Additionally, the distribution should be invariant under another rotation 1391 1392 # Generate samples 1393 dim = 5 1394 samples = 1000 # Not too many, or the test takes too long 1395 ks_prob = .05 1396 np.random.seed(514) 1397 xs = special_ortho_group.rvs(dim, size=samples) 1398 1399 # Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3), 1400 # effectively picking off entries in the matrices of xs. 1401 # These projections should all have the same disribution, 1402 # establishing rotational invariance. We use the two-sided 1403 # KS test to confirm this. 1404 # We could instead test that angles between random vectors 1405 # are uniformly distributed, but the below is sufficient. 1406 # It is not feasible to consider all pairs, so pick a few. 1407 els = ((0,0), (0,2), (1,4), (2,3)) 1408 #proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els} 1409 proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els) 1410 pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1] 1411 ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs] 1412 assert_array_less([ks_prob]*len(pairs), ks_tests) 1413 1414class TestOrthoGroup: 1415 def test_reproducibility(self): 1416 np.random.seed(515) 1417 x = ortho_group.rvs(3) 1418 x2 = ortho_group.rvs(3, random_state=515) 1419 # Note this matrix has det -1, distinguishing O(N) from SO(N) 1420 assert_almost_equal(np.linalg.det(x), -1) 1421 expected = np.array([[0.94449759, -0.21678569, -0.24683651], 1422 [-0.13147569, -0.93800245, 0.3207266], 1423 [0.30106219, 0.27047251, 0.9144431]]) 1424 assert_array_almost_equal(x, expected) 1425 assert_array_almost_equal(x2, expected) 1426 1427 def test_invalid_dim(self): 1428 assert_raises(ValueError, ortho_group.rvs, None) 1429 assert_raises(ValueError, ortho_group.rvs, (2, 2)) 1430 assert_raises(ValueError, ortho_group.rvs, 1) 1431 assert_raises(ValueError, ortho_group.rvs, 2.5) 1432 1433 def test_det_and_ortho(self): 1434 xs = [[ortho_group.rvs(dim) 1435 for i in range(10)] 1436 for dim in range(2,12)] 1437 1438 # Test that abs determinants are always +1 1439 dets = np.array([[np.linalg.det(x) for x in xx] for xx in xs]) 1440 assert_allclose(np.fabs(dets), np.ones(dets.shape), rtol=1e-13) 1441 1442 # Test that we get both positive and negative determinants 1443 # Check that we have at least one and less than 10 negative dets in a sample of 10. The rest are positive by the previous test. 1444 # Test each dimension separately 1445 assert_array_less([0]*10, [np.nonzero(d < 0)[0].shape[0] for d in dets]) 1446 assert_array_less([np.nonzero(d < 0)[0].shape[0] for d in dets], [10]*10) 1447 1448 # Test that these are orthogonal matrices 1449 for xx in xs: 1450 for x in xx: 1451 assert_array_almost_equal(np.dot(x, x.T), 1452 np.eye(x.shape[0])) 1453 1454 def test_haar(self): 1455 # Test that the distribution is constant under rotation 1456 # Every column should have the same distribution 1457 # Additionally, the distribution should be invariant under another rotation 1458 1459 # Generate samples 1460 dim = 5 1461 samples = 1000 # Not too many, or the test takes too long 1462 ks_prob = .05 1463 np.random.seed(518) # Note that the test is sensitive to seed too 1464 xs = ortho_group.rvs(dim, size=samples) 1465 1466 # Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3), 1467 # effectively picking off entries in the matrices of xs. 1468 # These projections should all have the same disribution, 1469 # establishing rotational invariance. We use the two-sided 1470 # KS test to confirm this. 1471 # We could instead test that angles between random vectors 1472 # are uniformly distributed, but the below is sufficient. 1473 # It is not feasible to consider all pairs, so pick a few. 1474 els = ((0,0), (0,2), (1,4), (2,3)) 1475 #proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els} 1476 proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els) 1477 pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1] 1478 ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs] 1479 assert_array_less([ks_prob]*len(pairs), ks_tests) 1480 1481 @pytest.mark.slow 1482 def test_pairwise_distances(self): 1483 # Test that the distribution of pairwise distances is close to correct. 1484 np.random.seed(514) 1485 1486 def random_ortho(dim): 1487 u, _s, v = np.linalg.svd(np.random.normal(size=(dim, dim))) 1488 return np.dot(u, v) 1489 1490 for dim in range(2, 6): 1491 def generate_test_statistics(rvs, N=1000, eps=1e-10): 1492 stats = np.array([ 1493 np.sum((rvs(dim=dim) - rvs(dim=dim))**2) 1494 for _ in range(N) 1495 ]) 1496 # Add a bit of noise to account for numeric accuracy. 1497 stats += np.random.uniform(-eps, eps, size=stats.shape) 1498 return stats 1499 1500 expected = generate_test_statistics(random_ortho) 1501 actual = generate_test_statistics(scipy.stats.ortho_group.rvs) 1502 1503 _D, p = scipy.stats.ks_2samp(expected, actual) 1504 1505 assert_array_less(.05, p) 1506 1507class TestRandomCorrelation: 1508 def test_reproducibility(self): 1509 np.random.seed(514) 1510 eigs = (.5, .8, 1.2, 1.5) 1511 x = random_correlation.rvs(eigs) 1512 x2 = random_correlation.rvs(eigs, random_state=514) 1513 expected = np.array([[1., -0.20387311, 0.18366501, -0.04953711], 1514 [-0.20387311, 1., -0.24351129, 0.06703474], 1515 [0.18366501, -0.24351129, 1., 0.38530195], 1516 [-0.04953711, 0.06703474, 0.38530195, 1.]]) 1517 assert_array_almost_equal(x, expected) 1518 assert_array_almost_equal(x2, expected) 1519 1520 def test_invalid_eigs(self): 1521 assert_raises(ValueError, random_correlation.rvs, None) 1522 assert_raises(ValueError, random_correlation.rvs, 'test') 1523 assert_raises(ValueError, random_correlation.rvs, 2.5) 1524 assert_raises(ValueError, random_correlation.rvs, [2.5]) 1525 assert_raises(ValueError, random_correlation.rvs, [[1,2],[3,4]]) 1526 assert_raises(ValueError, random_correlation.rvs, [2.5, -.5]) 1527 assert_raises(ValueError, random_correlation.rvs, [1, 2, .1]) 1528 1529 def test_definition(self): 1530 # Test the definition of a correlation matrix in several dimensions: 1531 # 1532 # 1. Det is product of eigenvalues (and positive by construction 1533 # in examples) 1534 # 2. 1's on diagonal 1535 # 3. Matrix is symmetric 1536 1537 def norm(i, e): 1538 return i*e/sum(e) 1539 1540 np.random.seed(123) 1541 1542 eigs = [norm(i, np.random.uniform(size=i)) for i in range(2, 6)] 1543 eigs.append([4,0,0,0]) 1544 1545 ones = [[1.]*len(e) for e in eigs] 1546 xs = [random_correlation.rvs(e) for e in eigs] 1547 1548 # Test that determinants are products of eigenvalues 1549 # These are positive by construction 1550 # Could also test that the eigenvalues themselves are correct, 1551 # but this seems sufficient. 1552 dets = [np.fabs(np.linalg.det(x)) for x in xs] 1553 dets_known = [np.prod(e) for e in eigs] 1554 assert_allclose(dets, dets_known, rtol=1e-13, atol=1e-13) 1555 1556 # Test for 1's on the diagonal 1557 diags = [np.diag(x) for x in xs] 1558 for a, b in zip(diags, ones): 1559 assert_allclose(a, b, rtol=1e-13) 1560 1561 # Correlation matrices are symmetric 1562 for x in xs: 1563 assert_allclose(x, x.T, rtol=1e-13) 1564 1565 def test_to_corr(self): 1566 # Check some corner cases in to_corr 1567 1568 # ajj == 1 1569 m = np.array([[0.1, 0], [0, 1]], dtype=float) 1570 m = random_correlation._to_corr(m) 1571 assert_allclose(m, np.array([[1, 0], [0, 0.1]])) 1572 1573 # Floating point overflow; fails to compute the correct 1574 # rotation, but should still produce some valid rotation 1575 # rather than infs/nans 1576 with np.errstate(over='ignore'): 1577 g = np.array([[0, 1], [-1, 0]]) 1578 1579 m0 = np.array([[1e300, 0], [0, np.nextafter(1, 0)]], dtype=float) 1580 m = random_correlation._to_corr(m0.copy()) 1581 assert_allclose(m, g.T.dot(m0).dot(g)) 1582 1583 m0 = np.array([[0.9, 1e300], [1e300, 1.1]], dtype=float) 1584 m = random_correlation._to_corr(m0.copy()) 1585 assert_allclose(m, g.T.dot(m0).dot(g)) 1586 1587 # Zero discriminant; should set the first diag entry to 1 1588 m0 = np.array([[2, 1], [1, 2]], dtype=float) 1589 m = random_correlation._to_corr(m0.copy()) 1590 assert_allclose(m[0,0], 1) 1591 1592 # Slightly negative discriminant; should be approx correct still 1593 m0 = np.array([[2 + 1e-7, 1], [1, 2]], dtype=float) 1594 m = random_correlation._to_corr(m0.copy()) 1595 assert_allclose(m[0,0], 1) 1596 1597 1598class TestUnitaryGroup: 1599 def test_reproducibility(self): 1600 np.random.seed(514) 1601 x = unitary_group.rvs(3) 1602 x2 = unitary_group.rvs(3, random_state=514) 1603 1604 expected = np.array([[0.308771+0.360312j, 0.044021+0.622082j, 0.160327+0.600173j], 1605 [0.732757+0.297107j, 0.076692-0.4614j, -0.394349+0.022613j], 1606 [-0.148844+0.357037j, -0.284602-0.557949j, 0.607051+0.299257j]]) 1607 1608 assert_array_almost_equal(x, expected) 1609 assert_array_almost_equal(x2, expected) 1610 1611 def test_invalid_dim(self): 1612 assert_raises(ValueError, unitary_group.rvs, None) 1613 assert_raises(ValueError, unitary_group.rvs, (2, 2)) 1614 assert_raises(ValueError, unitary_group.rvs, 1) 1615 assert_raises(ValueError, unitary_group.rvs, 2.5) 1616 1617 def test_unitarity(self): 1618 xs = [unitary_group.rvs(dim) 1619 for dim in range(2,12) 1620 for i in range(3)] 1621 1622 # Test that these are unitary matrices 1623 for x in xs: 1624 assert_allclose(np.dot(x, x.conj().T), np.eye(x.shape[0]), atol=1e-15) 1625 1626 def test_haar(self): 1627 # Test that the eigenvalues, which lie on the unit circle in 1628 # the complex plane, are uncorrelated. 1629 1630 # Generate samples 1631 dim = 5 1632 samples = 1000 # Not too many, or the test takes too long 1633 np.random.seed(514) # Note that the test is sensitive to seed too 1634 xs = unitary_group.rvs(dim, size=samples) 1635 1636 # The angles "x" of the eigenvalues should be uniformly distributed 1637 # Overall this seems to be a necessary but weak test of the distribution. 1638 eigs = np.vstack([scipy.linalg.eigvals(x) for x in xs]) 1639 x = np.arctan2(eigs.imag, eigs.real) 1640 res = kstest(x.ravel(), uniform(-np.pi, 2*np.pi).cdf) 1641 assert_(res.pvalue > 0.05) 1642 1643 1644class TestMultivariateT: 1645 1646 # These tests were created by running vpa(mvtpdf(...)) in MATLAB. The 1647 # function takes no `mu` parameter. The tests were run as 1648 # 1649 # >> ans = vpa(mvtpdf(x - mu, shape, df)); 1650 # 1651 PDF_TESTS = [( 1652 # x 1653 [ 1654 [1, 2], 1655 [4, 1], 1656 [2, 1], 1657 [2, 4], 1658 [1, 4], 1659 [4, 1], 1660 [3, 2], 1661 [3, 3], 1662 [4, 4], 1663 [5, 1], 1664 ], 1665 # loc 1666 [0, 0], 1667 # shape 1668 [ 1669 [1, 0], 1670 [0, 1] 1671 ], 1672 # df 1673 4, 1674 # ans 1675 [ 1676 0.013972450422333741737457302178882, 1677 0.0010998721906793330026219646100571, 1678 0.013972450422333741737457302178882, 1679 0.00073682844024025606101402363634634, 1680 0.0010998721906793330026219646100571, 1681 0.0010998721906793330026219646100571, 1682 0.0020732579600816823488240725481546, 1683 0.00095660371505271429414668515889275, 1684 0.00021831953784896498569831346792114, 1685 0.00037725616140301147447000396084604 1686 ] 1687 1688 ), ( 1689 # x 1690 [ 1691 [0.9718, 0.1298, 0.8134], 1692 [0.4922, 0.5522, 0.7185], 1693 [0.3010, 0.1491, 0.5008], 1694 [0.5971, 0.2585, 0.8940], 1695 [0.5434, 0.5287, 0.9507], 1696 ], 1697 # loc 1698 [-1, 1, 50], 1699 # shape 1700 [ 1701 [1.0000, 0.5000, 0.2500], 1702 [0.5000, 1.0000, -0.1000], 1703 [0.2500, -0.1000, 1.0000], 1704 ], 1705 # df 1706 8, 1707 # ans 1708 [ 1709 0.00000000000000069609279697467772867405511133763, 1710 0.00000000000000073700739052207366474839369535934, 1711 0.00000000000000069522909962669171512174435447027, 1712 0.00000000000000074212293557998314091880208889767, 1713 0.00000000000000077039675154022118593323030449058, 1714 ] 1715 )] 1716 1717 @pytest.mark.parametrize("x, loc, shape, df, ans", PDF_TESTS) 1718 def test_pdf_correctness(self, x, loc, shape, df, ans): 1719 dist = multivariate_t(loc, shape, df, seed=0) 1720 val = dist.pdf(x) 1721 assert_array_almost_equal(val, ans) 1722 1723 @pytest.mark.parametrize("x, loc, shape, df, ans", PDF_TESTS) 1724 def test_logpdf_correct(self, x, loc, shape, df, ans): 1725 dist = multivariate_t(loc, shape, df, seed=0) 1726 val1 = dist.pdf(x) 1727 val2 = dist.logpdf(x) 1728 assert_array_almost_equal(np.log(val1), val2) 1729 1730 # https://github.com/scipy/scipy/issues/10042#issuecomment-576795195 1731 def test_mvt_with_df_one_is_cauchy(self): 1732 x = [9, 7, 4, 1, -3, 9, 0, -3, -1, 3] 1733 val = multivariate_t.pdf(x, df=1) 1734 ans = cauchy.pdf(x) 1735 assert_array_almost_equal(val, ans) 1736 1737 def test_mvt_with_high_df_is_approx_normal(self): 1738 # `normaltest` returns the chi-squared statistic and the associated 1739 # p-value. The null hypothesis is that `x` came from a normal 1740 # distribution, so a low p-value represents rejecting the null, i.e. 1741 # that it is unlikely that `x` came a normal distribution. 1742 P_VAL_MIN = 0.1 1743 1744 dist = multivariate_t(0, 1, df=100000, seed=1) 1745 samples = dist.rvs(size=100000) 1746 _, p = normaltest(samples) 1747 assert (p > P_VAL_MIN) 1748 1749 dist = multivariate_t([-2, 3], [[10, -1], [-1, 10]], df=100000, 1750 seed=42) 1751 samples = dist.rvs(size=100000) 1752 _, p = normaltest(samples) 1753 assert ((p > P_VAL_MIN).all()) 1754 1755 @patch('scipy.stats.multivariate_normal._logpdf') 1756 def test_mvt_with_inf_df_calls_normal(self, mock): 1757 dist = multivariate_t(0, 1, df=np.inf, seed=7) 1758 assert isinstance(dist, multivariate_normal_frozen) 1759 multivariate_t.pdf(0, df=np.inf) 1760 assert mock.call_count == 1 1761 multivariate_t.logpdf(0, df=np.inf) 1762 assert mock.call_count == 2 1763 1764 def test_shape_correctness(self): 1765 # pdf and logpdf should return scalar when the 1766 # number of samples in x is one. 1767 dim = 4 1768 loc = np.zeros(dim) 1769 shape = np.eye(dim) 1770 df = 4.5 1771 x = np.zeros(dim) 1772 res = multivariate_t(loc, shape, df).pdf(x) 1773 assert np.isscalar(res) 1774 res = multivariate_t(loc, shape, df).logpdf(x) 1775 assert np.isscalar(res) 1776 1777 # pdf() and logpdf() should return probabilities of shape 1778 # (n_samples,) when x has n_samples. 1779 n_samples = 7 1780 x = np.random.random((n_samples, dim)) 1781 res = multivariate_t(loc, shape, df).pdf(x) 1782 assert (res.shape == (n_samples,)) 1783 res = multivariate_t(loc, shape, df).logpdf(x) 1784 assert (res.shape == (n_samples,)) 1785 1786 # rvs() should return scalar unless a size argument is applied. 1787 res = multivariate_t(np.zeros(1), np.eye(1), 1).rvs() 1788 assert np.isscalar(res) 1789 1790 # rvs() should return vector of shape (size,) if size argument 1791 # is applied. 1792 size = 7 1793 res = multivariate_t(np.zeros(1), np.eye(1), 1).rvs(size=size) 1794 assert (res.shape == (size,)) 1795 1796 def test_default_arguments(self): 1797 dist = multivariate_t() 1798 assert_equal(dist.loc, [0]) 1799 assert_equal(dist.shape, [[1]]) 1800 assert (dist.df == 1) 1801 1802 DEFAULT_ARGS_TESTS = [ 1803 (None, None, None, 0, 1, 1), 1804 (None, None, 7, 0, 1, 7), 1805 (None, [[7, 0], [0, 7]], None, [0, 0], [[7, 0], [0, 7]], 1), 1806 (None, [[7, 0], [0, 7]], 7, [0, 0], [[7, 0], [0, 7]], 7), 1807 ([7, 7], None, None, [7, 7], [[1, 0], [0, 1]], 1), 1808 ([7, 7], None, 7, [7, 7], [[1, 0], [0, 1]], 7), 1809 ([7, 7], [[7, 0], [0, 7]], None, [7, 7], [[7, 0], [0, 7]], 1), 1810 ([7, 7], [[7, 0], [0, 7]], 7, [7, 7], [[7, 0], [0, 7]], 7) 1811 ] 1812 1813 @pytest.mark.parametrize("loc, shape, df, loc_ans, shape_ans, df_ans", DEFAULT_ARGS_TESTS) 1814 def test_default_args(self, loc, shape, df, loc_ans, shape_ans, df_ans): 1815 dist = multivariate_t(loc=loc, shape=shape, df=df) 1816 assert_equal(dist.loc, loc_ans) 1817 assert_equal(dist.shape, shape_ans) 1818 assert (dist.df == df_ans) 1819 1820 ARGS_SHAPES_TESTS = [ 1821 (-1, 2, 3, [-1], [[2]], 3), 1822 ([-1], [2], 3, [-1], [[2]], 3), 1823 (np.array([-1]), np.array([2]), 3, [-1], [[2]], 3) 1824 ] 1825 1826 @pytest.mark.parametrize("loc, shape, df, loc_ans, shape_ans, df_ans", ARGS_SHAPES_TESTS) 1827 def test_scalar_list_and_ndarray_arguments(self, loc, shape, df, loc_ans, shape_ans, df_ans): 1828 dist = multivariate_t(loc, shape, df) 1829 assert_equal(dist.loc, loc_ans) 1830 assert_equal(dist.shape, shape_ans) 1831 assert_equal(dist.df, df_ans) 1832 1833 def test_argument_error_handling(self): 1834 # `loc` should be a one-dimensional vector. 1835 loc = [[1, 1]] 1836 assert_raises(ValueError, 1837 multivariate_t, 1838 **dict(loc=loc)) 1839 1840 # `shape` should be scalar or square matrix. 1841 shape = [[1, 1], [2, 2], [3, 3]] 1842 assert_raises(ValueError, 1843 multivariate_t, 1844 **dict(loc=loc, shape=shape)) 1845 1846 # `df` should be greater than zero. 1847 loc = np.zeros(2) 1848 shape = np.eye(2) 1849 df = -1 1850 assert_raises(ValueError, 1851 multivariate_t, 1852 **dict(loc=loc, shape=shape, df=df)) 1853 df = 0 1854 assert_raises(ValueError, 1855 multivariate_t, 1856 **dict(loc=loc, shape=shape, df=df)) 1857 1858 def test_reproducibility(self): 1859 rng = np.random.RandomState(4) 1860 loc = rng.uniform(size=3) 1861 shape = np.eye(3) 1862 dist1 = multivariate_t(loc, shape, df=3, seed=2) 1863 dist2 = multivariate_t(loc, shape, df=3, seed=2) 1864 samples1 = dist1.rvs(size=10) 1865 samples2 = dist2.rvs(size=10) 1866 assert_equal(samples1, samples2) 1867 1868 def test_allow_singular(self): 1869 # Make shape singular and verify error was raised. 1870 args = dict(loc=[0,0], shape=[[0,0],[0,1]], df=1, allow_singular=False) 1871 assert_raises(np.linalg.LinAlgError, multivariate_t, **args) 1872 1873 1874class TestMultivariateHypergeom: 1875 @pytest.mark.parametrize( 1876 "x, m, n, expected", 1877 [ 1878 # Ground truth value from R dmvhyper 1879 ([3, 4], [5, 10], 7, -1.119814), 1880 # test for `n=0` 1881 ([3, 4], [5, 10], 0, np.NINF), 1882 # test for `x < 0` 1883 ([-3, 4], [5, 10], 7, np.NINF), 1884 # test for `m < 0` (RuntimeWarning issue) 1885 ([3, 4], [-5, 10], 7, np.nan), 1886 # test for all `m < 0` and `x.sum() != n` 1887 ([[1, 2], [3, 4]], [[-4, -6], [-5, -10]], 1888 [3, 7], [np.nan, np.nan]), 1889 # test for `x < 0` and `m < 0` (RuntimeWarning issue) 1890 ([-3, 4], [-5, 10], 1, np.nan), 1891 # test for `x > m` 1892 ([1, 11], [10, 1], 12, np.nan), 1893 # test for `m < 0` (RuntimeWarning issue) 1894 ([1, 11], [10, -1], 12, np.nan), 1895 # test for `n < 0` 1896 ([3, 4], [5, 10], -7, np.nan), 1897 # test for `x.sum() != n` 1898 ([3, 3], [5, 10], 7, np.NINF) 1899 ] 1900 ) 1901 def test_logpmf(self, x, m, n, expected): 1902 vals = multivariate_hypergeom.logpmf(x, m, n) 1903 assert_allclose(vals, expected, rtol=1e-6) 1904 1905 def test_reduces_hypergeom(self): 1906 # test that the multivariate_hypergeom pmf reduces to the 1907 # hypergeom pmf in the 2d case. 1908 val1 = multivariate_hypergeom.pmf(x=[3, 1], m=[10, 5], n=4) 1909 val2 = hypergeom.pmf(k=3, M=15, n=4, N=10) 1910 assert_allclose(val1, val2, rtol=1e-8) 1911 1912 val1 = multivariate_hypergeom.pmf(x=[7, 3], m=[15, 10], n=10) 1913 val2 = hypergeom.pmf(k=7, M=25, n=10, N=15) 1914 assert_allclose(val1, val2, rtol=1e-8) 1915 1916 def test_rvs(self): 1917 # test if `rvs` is unbiased and large sample size converges 1918 # to the true mean. 1919 rv = multivariate_hypergeom(m=[3, 5], n=4) 1920 rvs = rv.rvs(size=1000, random_state=123) 1921 assert_allclose(rvs.mean(0), rv.mean(), rtol=1e-2) 1922 1923 def test_rvs_broadcasting(self): 1924 rv = multivariate_hypergeom(m=[[3, 5], [5, 10]], n=[4, 9]) 1925 rvs = rv.rvs(size=(1000, 2), random_state=123) 1926 assert_allclose(rvs.mean(0), rv.mean(), rtol=1e-2) 1927 1928 @pytest.mark.parametrize( 1929 "x, m, n, expected", 1930 [ 1931 ([5], [5], 5, 1), 1932 ([3, 4], [5, 10], 7, 0.3263403), 1933 # Ground truth value from R dmvhyper 1934 ([[[3, 5], [0, 8]], [[-1, 9], [1, 1]]], 1935 [5, 10], [[8, 8], [8, 2]], 1936 [[0.3916084, 0.006993007], [0, 0.4761905]]), 1937 # test with empty arrays. 1938 (np.array([], np.int_), np.array([], np.int_), 0, []), 1939 ([1, 2], [4, 5], 5, 0), 1940 # Ground truth value from R dmvhyper 1941 ([3, 3, 0], [5, 6, 7], 6, 0.01077354) 1942 ] 1943 ) 1944 def test_pmf(self, x, m, n, expected): 1945 vals = multivariate_hypergeom.pmf(x, m, n) 1946 assert_allclose(vals, expected, rtol=1e-7) 1947 1948 @pytest.mark.parametrize( 1949 "x, m, n, expected", 1950 [ 1951 ([3, 4], [[5, 10], [10, 15]], 7, [0.3263403, 0.3407531]), 1952 ([[1], [2]], [[3], [4]], [1, 3], [1., 0.]), 1953 ([[[1], [2]]], [[3], [4]], [1, 3], [[1., 0.]]), 1954 ([[1], [2]], [[[[3]]]], [1, 3], [[[1., 0.]]]) 1955 ] 1956 ) 1957 def test_pmf_broadcasting(self, x, m, n, expected): 1958 vals = multivariate_hypergeom.pmf(x, m, n) 1959 assert_allclose(vals, expected, rtol=1e-7) 1960 1961 def test_cov(self): 1962 cov1 = multivariate_hypergeom.cov(m=[3, 7, 10], n=12) 1963 cov2 = [[0.64421053, -0.26526316, -0.37894737], 1964 [-0.26526316, 1.14947368, -0.88421053], 1965 [-0.37894737, -0.88421053, 1.26315789]] 1966 assert_allclose(cov1, cov2, rtol=1e-8) 1967 1968 def test_cov_broadcasting(self): 1969 cov1 = multivariate_hypergeom.cov(m=[[7, 9], [10, 15]], n=[8, 12]) 1970 cov2 = [[[1.05, -1.05], [-1.05, 1.05]], 1971 [[1.56, -1.56], [-1.56, 1.56]]] 1972 assert_allclose(cov1, cov2, rtol=1e-8) 1973 1974 cov3 = multivariate_hypergeom.cov(m=[[4], [5]], n=[4, 5]) 1975 cov4 = [[[0.]], [[0.]]] 1976 assert_allclose(cov3, cov4, rtol=1e-8) 1977 1978 cov5 = multivariate_hypergeom.cov(m=[7, 9], n=[8, 12]) 1979 cov6 = [[[1.05, -1.05], [-1.05, 1.05]], 1980 [[0.7875, -0.7875], [-0.7875, 0.7875]]] 1981 assert_allclose(cov5, cov6, rtol=1e-8) 1982 1983 def test_var(self): 1984 # test with hypergeom 1985 var0 = multivariate_hypergeom.var(m=[10, 5], n=4) 1986 var1 = hypergeom.var(M=15, n=4, N=10) 1987 assert_allclose(var0, var1, rtol=1e-8) 1988 1989 def test_var_broadcasting(self): 1990 var0 = multivariate_hypergeom.var(m=[10, 5], n=[4, 8]) 1991 var1 = multivariate_hypergeom.var(m=[10, 5], n=4) 1992 var2 = multivariate_hypergeom.var(m=[10, 5], n=8) 1993 assert_allclose(var0[0], var1, rtol=1e-8) 1994 assert_allclose(var0[1], var2, rtol=1e-8) 1995 1996 var3 = multivariate_hypergeom.var(m=[[10, 5], [10, 14]], n=[4, 8]) 1997 var4 = [[0.6984127, 0.6984127], [1.352657, 1.352657]] 1998 assert_allclose(var3, var4, rtol=1e-8) 1999 2000 var5 = multivariate_hypergeom.var(m=[[5], [10]], n=[5, 10]) 2001 var6 = [[0.], [0.]] 2002 assert_allclose(var5, var6, rtol=1e-8) 2003 2004 def test_mean(self): 2005 # test with hypergeom 2006 mean0 = multivariate_hypergeom.mean(m=[10, 5], n=4) 2007 mean1 = hypergeom.mean(M=15, n=4, N=10) 2008 assert_allclose(mean0[0], mean1, rtol=1e-8) 2009 2010 mean2 = multivariate_hypergeom.mean(m=[12, 8], n=10) 2011 mean3 = [12.*10./20., 8.*10./20.] 2012 assert_allclose(mean2, mean3, rtol=1e-8) 2013 2014 def test_mean_broadcasting(self): 2015 mean0 = multivariate_hypergeom.mean(m=[[3, 5], [10, 5]], n=[4, 8]) 2016 mean1 = [[3.*4./8., 5.*4./8.], [10.*8./15., 5.*8./15.]] 2017 assert_allclose(mean0, mean1, rtol=1e-8) 2018 2019 def test_mean_edge_cases(self): 2020 mean0 = multivariate_hypergeom.mean(m=[0, 0, 0], n=0) 2021 assert_equal(mean0, [0., 0., 0.]) 2022 2023 mean1 = multivariate_hypergeom.mean(m=[1, 0, 0], n=2) 2024 assert_equal(mean1, [np.nan, np.nan, np.nan]) 2025 2026 mean2 = multivariate_hypergeom.mean(m=[[1, 0, 0], [1, 0, 1]], n=2) 2027 assert_allclose(mean2, [[np.nan, np.nan, np.nan], [1., 0., 1.]], 2028 rtol=1e-17) 2029 2030 mean3 = multivariate_hypergeom.mean(m=np.array([], np.int_), n=0) 2031 assert_equal(mean3, []) 2032 assert_(mean3.shape == (0, )) 2033 2034 def test_var_edge_cases(self): 2035 var0 = multivariate_hypergeom.var(m=[0, 0, 0], n=0) 2036 assert_allclose(var0, [0., 0., 0.], rtol=1e-16) 2037 2038 var1 = multivariate_hypergeom.var(m=[1, 0, 0], n=2) 2039 assert_equal(var1, [np.nan, np.nan, np.nan]) 2040 2041 var2 = multivariate_hypergeom.var(m=[[1, 0, 0], [1, 0, 1]], n=2) 2042 assert_allclose(var2, [[np.nan, np.nan, np.nan], [0., 0., 0.]], 2043 rtol=1e-17) 2044 2045 var3 = multivariate_hypergeom.var(m=np.array([], np.int_), n=0) 2046 assert_equal(var3, []) 2047 assert_(var3.shape == (0, )) 2048 2049 def test_cov_edge_cases(self): 2050 cov0 = multivariate_hypergeom.cov(m=[1, 0, 0], n=1) 2051 cov1 = [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]] 2052 assert_allclose(cov0, cov1, rtol=1e-17) 2053 2054 cov3 = multivariate_hypergeom.cov(m=[0, 0, 0], n=0) 2055 cov4 = [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]] 2056 assert_equal(cov3, cov4) 2057 2058 cov5 = multivariate_hypergeom.cov(m=np.array([], np.int_), n=0) 2059 cov6 = np.array([], dtype=np.float_).reshape(0, 0) 2060 assert_allclose(cov5, cov6, rtol=1e-17) 2061 assert_(cov5.shape == (0, 0)) 2062 2063 def test_frozen(self): 2064 # The frozen distribution should agree with the regular one 2065 np.random.seed(1234) 2066 n = 12 2067 m = [7, 9, 11, 13] 2068 x = [[0, 0, 0, 12], [0, 0, 1, 11], [0, 1, 1, 10], 2069 [1, 1, 1, 9], [1, 1, 2, 8]] 2070 x = np.asarray(x, dtype=np.int_) 2071 mhg_frozen = multivariate_hypergeom(m, n) 2072 assert_allclose(mhg_frozen.pmf(x), 2073 multivariate_hypergeom.pmf(x, m, n)) 2074 assert_allclose(mhg_frozen.logpmf(x), 2075 multivariate_hypergeom.logpmf(x, m, n)) 2076 assert_allclose(mhg_frozen.var(), multivariate_hypergeom.var(m, n)) 2077 assert_allclose(mhg_frozen.cov(), multivariate_hypergeom.cov(m, n)) 2078 2079 def test_invalid_params(self): 2080 assert_raises(ValueError, multivariate_hypergeom.pmf, 5, 10, 5) 2081 assert_raises(ValueError, multivariate_hypergeom.pmf, 5, [10], 5) 2082 assert_raises(ValueError, multivariate_hypergeom.pmf, [5, 4], [10], 5) 2083 assert_raises(TypeError, multivariate_hypergeom.pmf, [5.5, 4.5], 2084 [10, 15], 5) 2085 assert_raises(TypeError, multivariate_hypergeom.pmf, [5, 4], 2086 [10.5, 15.5], 5) 2087 assert_raises(TypeError, multivariate_hypergeom.pmf, [5, 4], 2088 [10, 15], 5.5) 2089 2090 2091def check_pickling(distfn, args): 2092 # check that a distribution instance pickles and unpickles 2093 # pay special attention to the random_state property 2094 2095 # save the random_state (restore later) 2096 rndm = distfn.random_state 2097 2098 distfn.random_state = 1234 2099 distfn.rvs(*args, size=8) 2100 s = pickle.dumps(distfn) 2101 r0 = distfn.rvs(*args, size=8) 2102 2103 unpickled = pickle.loads(s) 2104 r1 = unpickled.rvs(*args, size=8) 2105 assert_equal(r0, r1) 2106 2107 # restore the random_state 2108 distfn.random_state = rndm 2109 2110 2111def test_random_state_property(): 2112 scale = np.eye(3) 2113 scale[0, 1] = 0.5 2114 scale[1, 0] = 0.5 2115 dists = [ 2116 [multivariate_normal, ()], 2117 [dirichlet, (np.array([1.]), )], 2118 [wishart, (10, scale)], 2119 [invwishart, (10, scale)], 2120 [multinomial, (5, [0.5, 0.4, 0.1])], 2121 [ortho_group, (2,)], 2122 [special_ortho_group, (2,)] 2123 ] 2124 for distfn, args in dists: 2125 check_random_state_property(distfn, args) 2126 check_pickling(distfn, args) 2127