1import autograd.numpy as np 2from autograd.scipy.special import gammaln 3from autograd.scipy.misc import logsumexp 4from autograd.scipy.linalg import solve_triangular 5 6from ssm.util import one_hot 7 8 9def flatten_to_dim(X, d): 10 """ 11 Flatten an array of dimension k + d into an array of dimension 1 + d. 12 13 Example: 14 X = npr.rand(10, 5, 2, 2) 15 flatten_to_dim(X, 4).shape # (10, 5, 2, 2) 16 flatten_to_dim(X, 3).shape # (10, 5, 2, 2) 17 flatten_to_dim(X, 2).shape # (50, 2, 2) 18 flatten_to_dim(X, 1).shape # (100, 2) 19 20 Parameters 21 ---------- 22 X : array_like 23 The array to be flattened. Must be at least d dimensional 24 25 d : int (> 0) 26 The number of dimensions to retain. All leading dimensions are flattened. 27 28 Returns 29 ------- 30 flat_X : array_like 31 The input X flattened into an array dimension d (if X.ndim == d) 32 or d+1 (if X.ndim > d) 33 """ 34 assert X.ndim >= d 35 assert d > 0 36 return np.reshape(X[None, ...], (-1,) + X.shape[-d:]) 37 38 39def batch_mahalanobis(L, x): 40 """ 41 Compute the squared Mahalanobis distance. 42 :math:`x^T M^{-1} x` for a factored :math:`M = LL^T`. 43 44 Copied from PyTorch torch.distributions.multivariate_normal. 45 46 Parameters 47 ---------- 48 L : array_like (..., D, D) 49 Cholesky factorization(s) of covariance matrix 50 51 x : array_like (..., D) 52 Points at which to evaluate the quadratic term 53 54 Returns 55 ------- 56 y : array_like (...,) 57 squared Mahalanobis distance :math:`x^T (LL^T)^{-1} x` 58 59 x^T (LL^T)^{-1} x = x^T L^{-T} L^{-1} x 60 """ 61 # The most common shapes are x: (T, D) and L : (D, D) 62 # Special case that one 63 if x.ndim == 2 and L.ndim == 2: 64 xs = solve_triangular(L, x.T, lower=True) 65 return np.sum(xs**2, axis=0) 66 67 # Flatten the Cholesky into a (-1, D, D) array 68 flat_L = flatten_to_dim(L, 2) 69 # Invert each of the K arrays and reshape like L 70 L_inv = np.reshape(np.array([np.linalg.inv(Li.T) for Li in flat_L]), L.shape) 71 # dot with L_inv^T; square and sum. 72 xs = np.einsum('...i,...ij->...j', x, L_inv) 73 return np.sum(xs**2, axis=-1) 74 75def _multivariate_normal_logpdf(data, mus, Sigmas, Ls=None): 76 """ 77 Compute the log probability density of a multivariate Gaussian distribution. 78 This will broadcast as long as data, mus, Sigmas have the same (or at 79 least be broadcast compatible along the) leading dimensions. 80 81 Parameters 82 ---------- 83 data : array_like (..., D) 84 The points at which to evaluate the log density 85 86 mus : array_like (..., D) 87 The mean(s) of the Gaussian distribution(s) 88 89 Sigmas : array_like (..., D, D) 90 The covariances(s) of the Gaussian distribution(s) 91 92 Ls : array_like (..., D, D) 93 Optionally pass in the Cholesky decomposition of Sigmas 94 95 Returns 96 ------- 97 lps : array_like (...,) 98 Log probabilities under the multivariate Gaussian distribution(s). 99 """ 100 # Check inputs 101 D = data.shape[-1] 102 assert mus.shape[-1] == D 103 assert Sigmas.shape[-2] == Sigmas.shape[-1] == D 104 if Ls is not None: 105 assert Ls.shape[-2] == Ls.shape[-1] == D 106 else: 107 Ls = np.linalg.cholesky(Sigmas) # (..., D, D) 108 109 # Quadratic term 110 lp = -0.5 * batch_mahalanobis(Ls, data - mus) # (...,) 111 # Normalizer 112 L_diag = np.reshape(Ls, Ls.shape[:-2] + (-1,))[..., ::D + 1] # (..., D) 113 half_log_det = np.sum(np.log(abs(L_diag)), axis=-1) # (...,) 114 lp = lp - 0.5 * D * np.log(2 * np.pi) - half_log_det # (...,) 115 116 return lp 117 118 119def multivariate_normal_logpdf(data, mus, Sigmas, mask=None): 120 """ 121 Compute the log probability density of a multivariate Gaussian distribution. 122 This will broadcast as long as data, mus, Sigmas have the same (or at 123 least compatible) leading dimensions. 124 125 Parameters 126 ---------- 127 data : array_like (..., D) 128 The points at which to evaluate the log density 129 130 mus : array_like (..., D) 131 The mean(s) of the Gaussian distribution(s) 132 133 Sigmas : array_like (..., D, D) 134 The covariances(s) of the Gaussian distribution(s) 135 136 mask : array_like (..., D) bool 137 Optional mask indicating which entries in the data are observed 138 139 Returns 140 ------- 141 lps : array_like (...,) 142 Log probabilities under the multivariate Gaussian distribution(s). 143 """ 144 # Check inputs 145 D = data.shape[-1] 146 assert mus.shape[-1] == D 147 assert Sigmas.shape[-2] == Sigmas.shape[-1] == D 148 149 # If there's no mask, we can just use the standard log pdf code 150 if mask is None: 151 return _multivariate_normal_logpdf(data, mus, Sigmas) 152 153 # Otherwise we need to separate the data into sets with the same mask, 154 # since each one will entail a different covariance matrix. 155 # 156 # First, determine the output shape. Allow mus and Sigmas to 157 # have different shapes; e.g. many Gaussians with the same 158 # covariance but different means. 159 shp1 = np.broadcast(data, mus).shape[:-1] 160 shp2 = np.broadcast(data[..., None], Sigmas).shape[:-2] 161 assert len(shp1) == len(shp2) 162 shp = tuple(max(s1, s2) for s1, s2 in zip(shp1, shp2)) 163 164 # Broadcast the data into the full shape 165 full_data = np.broadcast_to(data, shp + (D,)) 166 167 # Get the full mask 168 assert mask.dtype == bool 169 assert mask.shape == data.shape 170 full_mask = np.broadcast_to(mask, shp + (D,)) 171 172 # Flatten the mask and get the unique values 173 flat_data = flatten_to_dim(full_data, 1) 174 flat_mask = flatten_to_dim(full_mask, 1) 175 unique_masks, mask_index = np.unique(flat_mask, return_inverse=True, axis=0) 176 177 # Initialize the output 178 lls = np.nan * np.ones(flat_data.shape[0]) 179 180 # Compute the log probability for each mask 181 for i, this_mask in enumerate(unique_masks): 182 this_inds = np.where(mask_index == i)[0] 183 this_D = np.sum(this_mask) 184 if this_D == 0: 185 lls[this_inds] = 0 186 continue 187 188 this_data = flat_data[np.ix_(this_inds, this_mask)] 189 this_mus = mus[..., this_mask] 190 this_Sigmas = Sigmas[np.ix_(*[np.ones(sz, dtype=bool) for sz in Sigmas.shape[:-2]], this_mask, this_mask)] 191 192 # Precompute the Cholesky decomposition 193 this_Ls = np.linalg.cholesky(this_Sigmas) 194 195 # Broadcast mus and Sigmas to full shape and extract the necessary indices 196 this_mus = flatten_to_dim(np.broadcast_to(this_mus, shp + (this_D,)), 1)[this_inds] 197 this_Ls = flatten_to_dim(np.broadcast_to(this_Ls, shp + (this_D, this_D)), 2)[this_inds] 198 199 # Evaluate the log likelihood 200 lls[this_inds] = _multivariate_normal_logpdf(this_data, this_mus, this_Sigmas, Ls=this_Ls) 201 202 # Reshape the output 203 assert np.all(np.isfinite(lls)) 204 return np.reshape(lls, shp) 205 206 207def expected_multivariate_normal_logpdf(E_xs, E_xxTs, E_mus, E_mumuTs, Sigmas, Ls=None): 208 """ 209 Compute the expected log probability density of a multivariate Gaussian distribution. 210 This will broadcast as long as data, mus, Sigmas have the same (or at 211 least be broadcast compatible along the) leading dimensions. 212 Parameters 213 ---------- 214 E_xs : array_like (..., D) 215 The expected value of the points at which to evaluate the log density 216 E_xxTs : array_like (..., D, D) 217 The second moment of the points at which to evaluate the log density 218 E_mus : array_like (..., D) 219 The expected mean(s) of the Gaussian distribution(s) 220 E_mumuTs : array_like (..., D, D) 221 The second moment of the mean 222 Sigmas : array_like (..., D, D) 223 The covariances(s) of the Gaussian distribution(s) 224 Ls : array_like (..., D, D) 225 Optionally pass in the Cholesky decomposition of Sigmas 226 Returns 227 ------- 228 lps : array_like (...,) 229 Expected log probabilities under the multivariate Gaussian distribution(s). 230 TODO 231 ---- 232 - Allow for uncertainty in the covariance as well. 233 """ 234 # Check inputs 235 D = E_xs.shape[-1] 236 assert E_xxTs.shape[-2] == E_xxTs.shape[-1] == D 237 assert E_mus.shape[-1] == D 238 assert E_mumuTs.shape[-2] == E_mumuTs.shape[-1] == D 239 assert Sigmas.shape[-2] == Sigmas.shape[-1] == D 240 if Ls is not None: 241 assert Ls.shape[-2] == Ls.shape[-1] == D 242 else: 243 Ls = np.linalg.cholesky(Sigmas) # (..., D, D) 244 245 # TODO: Figure out how to perform this computation without explicit inverse 246 Sigma_invs = np.linalg.inv(Sigmas) 247 248 # Compute E[(x-mu)^T Sigma^{-1}(x-mu)] 249 # = Tr(Sigma^{-1} E[(x-mu)(x-mu)^T]) 250 # = Tr(Sigma^{-1} E[xx^T - x mu^T - mu x^T + mu mu^T]) 251 # = Tr(Sigma^{-1} (E[xx^T - E[x]E[mu]^T - E[mu]E[x]^T + E[mu mu^T]])) 252 # = Tr(Sigma^{-1} A) 253 # = Tr((LL^T)^{-1} A) 254 # = Tr(L^{-1} A L^{-T} ) 255 # = sum_{ij} [Sigma^{-1}]_{ij} * A_{ij} 256 # where 257 # 258 # A = E[xx^T - E[x]E[mu]^T - E[mu]E[x]^T + E[mu mu^T]] 259 # 260 # However, since Sigma^{-1} is symmetric, we get the same 261 # answer with 262 # 263 # A = E[xx^T - 2 * E[x]E[mu]^T + E[mu mu^T]] 264 # 265 E_xmuT = E_xs[..., :, None] * E_mus[..., None, :] 266 # E_muxT = np.swapaxes(E_xmuT, -1, -2) 267 # As = E_xxTs - E_xmuT - E_muxT + E_mumuTs 268 As = E_xxTs - 2 * E_xmuT + E_mumuTs 269 lp = -0.5 * np.sum(Sigma_invs * As, axis=(-2, -1)) 270 271 # Normalizer 272 L_diag = np.reshape(Ls, Ls.shape[:-2] + (-1,))[..., ::D + 1] # (..., D) 273 half_log_det = np.sum(np.log(abs(L_diag)), axis=-1) # (...,) 274 lp = lp - 0.5 * D * np.log(2 * np.pi) - half_log_det # (...,) 275 276 return lp 277 278 279def diagonal_gaussian_logpdf(data, mus, sigmasqs, mask=None): 280 """ 281 Compute the log probability density of a Gaussian distribution with 282 a diagonal covariance. This will broadcast as long as data, mus, 283 sigmas have the same (or at least compatible) leading dimensions. 284 285 Parameters 286 ---------- 287 data : array_like (..., D) 288 The points at which to evaluate the log density 289 290 mus : array_like (..., D) 291 The mean(s) of the Gaussian distribution(s) 292 293 sigmasqs : array_like (..., D) 294 The diagonal variances(s) of the Gaussian distribution(s) 295 296 mask : array_like (..., D) bool 297 Optional mask indicating which entries in the data are observed 298 299 Returns 300 ------- 301 lps : array_like (...,) 302 Log probabilities under the diagonal Gaussian distribution(s). 303 """ 304 # Check inputs 305 D = data.shape[-1] 306 assert mus.shape[-1] == D 307 assert sigmasqs.shape[-1] == D 308 309 # Check mask 310 mask = mask if mask is not None else np.ones_like(data, dtype=bool) 311 assert mask.shape == data.shape 312 313 normalizer = -0.5 * np.log(2 * np.pi * sigmasqs) 314 return np.sum((normalizer - 0.5 * (data - mus)**2 / sigmasqs) * mask, axis=-1) 315 316 317def multivariate_studentst_logpdf(data, mus, Sigmas, nus, Ls=None): 318 """ 319 Compute the log probability density of a multivariate Student's t distribution. 320 This will broadcast as long as data, mus, Sigmas, nus have the same (or at 321 least be broadcast compatible along the) leading dimensions. 322 323 Parameters 324 ---------- 325 data : array_like (..., D) 326 The points at which to evaluate the log density 327 328 mus : array_like (..., D) 329 The mean(s) of the t distribution(s) 330 331 Sigmas : array_like (..., D, D) 332 The covariances(s) of the t distribution(s) 333 334 nus : array_like (...,) 335 The degrees of freedom of the t distribution(s) 336 337 Ls : array_like (..., D, D) 338 Optionally pass in the Cholesky decomposition of Sigmas 339 340 Returns 341 ------- 342 lps : array_like (...,) 343 Log probabilities under the multivariate Gaussian distribution(s). 344 """ 345 # Check inputs 346 D = data.shape[-1] 347 assert mus.shape[-1] == D 348 assert Sigmas.shape[-2] == Sigmas.shape[-1] == D 349 if Ls is not None: 350 assert Ls.shape[-2] == Ls.shape[-1] == D 351 else: 352 Ls = np.linalg.cholesky(Sigmas) # (..., D, D) 353 354 # Quadratic term 355 q = batch_mahalanobis(Ls, data - mus) / nus # (...,) 356 lp = - 0.5 * (nus + D) * np.log1p(q) # (...,) 357 358 # Normalizer 359 lp = lp + gammaln(0.5 * (nus + D)) - gammaln(0.5 * nus) # (...,) 360 lp = lp - 0.5 * D * np.log(np.pi) - 0.5 * D * np.log(nus) # (...,) 361 L_diag = np.reshape(Ls, Ls.shape[:-2] + (-1,))[..., ::D + 1] # (..., D) 362 half_log_det = np.sum(np.log(abs(L_diag)), axis=-1) # (...,) 363 lp = lp - half_log_det 364 365 return lp 366 367 368def expected_multivariate_studentst_logpdf(E_xs, E_xxTs, E_mus, E_mumuTs, Sigmas, nus, Ls=None): 369 """ 370 Compute the expected log probability density of a multivariate Gaussian distribution. 371 This will broadcast as long as data, mus, Sigmas have the same (or at 372 least be broadcast compatible along the) leading dimensions. 373 Parameters 374 ---------- 375 E_xs : array_like (..., D) 376 The expected value of the points at which to evaluate the log density 377 E_xxTs : array_like (..., D, D) 378 The second moment of the points at which to evaluate the log density 379 E_mus : array_like (..., D) 380 The expected mean(s) of the Gaussian distribution(s) 381 E_mumuTs : array_like (..., D, D) 382 The second moment of the mean 383 Sigmas : array_like (..., D, D) 384 The covariances(s) of the Gaussian distribution(s) 385 Ls : array_like (..., D, D) 386 Optionally pass in the Cholesky decomposition of Sigmas 387 Returns 388 ------- 389 lps : array_like (...,) 390 Expected log probabilities under the multivariate Gaussian distribution(s). 391 TODO 392 ---- 393 - Allow for uncertainty in the covariance Sigmas and dof nus as well. 394 """ 395 # Check inputs 396 D = E_xs.shape[-1] 397 assert E_xxTs.shape[-2] == E_xxTs.shape[-1] == D 398 assert E_mus.shape[-1] == D 399 assert E_mumuTs.shape[-2] == E_mumuTs.shape[-1] == D 400 assert Sigmas.shape[-2] == Sigmas.shape[-1] == D 401 if Ls is not None: 402 assert Ls.shape[-2] == Ls.shape[-1] == D 403 else: 404 Ls = np.linalg.cholesky(Sigmas) # (..., D, D) 405 406 # TODO: Figure out how to perform this computation without explicit inverse 407 Sigma_invs = np.linalg.inv(Sigmas) 408 409 # Compute E[(x-mu)^T Sigma^{-1}(x-mu)] 410 # = Tr(Sigma^{-1} E[(x-mu)(x-mu)^T]) 411 # = Tr(Sigma^{-1} E[xx^T - 2 x mu^T + mu mu^T]) 412 # = Tr(Sigma^{-1} (E[xx^T - 2 E[x]E[mu]^T + E[mu mu^T]])) 413 # = Tr(Sigma^{-1} A) 414 # = Tr((LL^T)^{-1} A) 415 # = Tr(L^{-1} A L^{-T} ) 416 # = sum_{ij} [Sigma^{-1}]_{ij} * A_{ij} 417 # where 418 # 419 # A = E[xx^T - 2 E[x]E[mu]^T + E[mu mu^T]] 420 # 421 As = E_xxTs - 2 * E_xs[..., :, None] * E_mus[..., None, :] + E_mumuTs # (..., D, D) 422 q = np.sum(Sigma_invs * As, axis=(-2, -1)) / nus # (...,) 423 lp = - 0.5 * (nus + D) * np.log1p(q) # (...,) 424 425 # Normalizer 426 L_diag = np.reshape(Ls, Ls.shape[:-2] + (-1,))[..., ::D + 1] # (..., D) 427 half_log_det = np.sum(np.log(abs(L_diag)), axis=-1) # (...,) 428 lp = lp - 0.5 * D * np.log(2 * np.pi) - half_log_det # (...,) 429 430 return lp 431 432 433def independent_studentst_logpdf(data, mus, sigmasqs, nus, mask=None): 434 """ 435 Compute the log probability density of a Gaussian distribution with 436 a diagonal covariance. This will broadcast as long as data, mus, 437 sigmas have the same (or at least compatible) leading dimensions. 438 439 Parameters 440 ---------- 441 data : array_like (..., D) 442 The points at which to evaluate the log density 443 444 mus : array_like (..., D) 445 The mean(s) of the Student's t distribution(s) 446 447 sigmasqs : array_like (..., D) 448 The diagonal variances(s) of the Student's t distribution(s) 449 450 nus : array_like (..., D) 451 The degrees of freedom of the Student's t distribution(s) 452 453 mask : array_like (..., D) bool 454 Optional mask indicating which entries in the data are observed 455 456 Returns 457 ------- 458 lps : array_like (...,) 459 Log probabilities under the Student's t distribution(s). 460 """ 461 D = data.shape[-1] 462 assert mus.shape[-1] == D 463 assert sigmasqs.shape[-1] == D 464 assert nus.shape[-1] == D 465 466 # Check mask 467 mask = mask if mask is not None else np.ones_like(data, dtype=bool) 468 assert mask.shape == data.shape 469 470 normalizer = gammaln(0.5 * (nus + 1)) - gammaln(0.5 * nus) 471 normalizer = normalizer - 0.5 * (np.log(np.pi) + np.log(nus) + np.log(sigmasqs)) 472 ll = normalizer - 0.5 * (nus + 1) * np.log(1.0 + (data - mus)**2 / (sigmasqs * nus)) 473 return np.sum(ll * mask, axis=-1) 474 475 476def bernoulli_logpdf(data, logit_ps, mask=None): 477 """ 478 Compute the log probability density of a Bernoulli distribution. 479 This will broadcast as long as data and logit_ps have the same 480 (or at least compatible) leading dimensions. 481 482 Parameters 483 ---------- 484 data : array_like (..., D) 485 The points at which to evaluate the log density 486 487 logit_ps : array_like (..., D) 488 The logit(s) log p / (1 - p) of the Bernoulli distribution(s) 489 490 mask : array_like (..., D) bool 491 Optional mask indicating which entries in the data are observed 492 493 Returns 494 ------- 495 lps : array_like (...,) 496 Log probabilities under the Bernoulli distribution(s). 497 """ 498 D = data.shape[-1] 499 assert (data.dtype == int or data.dtype == bool) 500 assert data.min() >= 0 and data.max() <= 1 501 assert logit_ps.shape[-1] == D 502 503 # Check mask 504 mask = mask if mask is not None else np.ones_like(data, dtype=bool) 505 assert mask.shape == data.shape 506 507 # Evaluate log probability 508 # log Pr(x | p) = x * log(p) + (1-x) * log(1-p) 509 # = x * log(p / (1-p)) + log(1-p) 510 # = x * log(p / (1-p)) - log(1/(1-p)) 511 # = x * log(p / (1-p)) - log(1 + p/(1-p)). 512 # 513 # Let u = log (p / (1-p)) = logit(p), then 514 # 515 # log Pr(x | p) = x * u - log(1 + e^u) 516 # = x * u - log(e^0 + e^u) 517 # = x * u - log(e^m * (e^-m + e^(u-m)) 518 # = x * u - m - log(exp(-m) + exp(u-m)). 519 # 520 # This holds for any m. we choose m = max(0, u) to avoid overflow. 521 m = np.maximum(0, logit_ps) 522 lls = data * logit_ps - m - np.log(np.exp(-m) + np.exp(logit_ps - m)) 523 return np.sum(lls * mask, axis=-1) 524 525 526def poisson_logpdf(data, lambdas, mask=None): 527 """ 528 Compute the log probability density of a Poisson distribution. 529 This will broadcast as long as data and lambdas have the same 530 (or at least compatible) leading dimensions. 531 532 Parameters 533 ---------- 534 data : array_like (..., D) 535 The points at which to evaluate the log density 536 537 lambdas : array_like (..., D) 538 The rates of the Poisson distribution(s) 539 540 mask : array_like (..., D) bool 541 Optional mask indicating which entries in the data are observed 542 543 Returns 544 ------- 545 lps : array_like (...,) 546 Log probabilities under the Poisson distribution(s). 547 """ 548 D = data.shape[-1] 549 assert data.dtype in (int, np.int8, np.int16, np.int32, np.int64) 550 assert lambdas.shape[-1] == D 551 552 # Check mask 553 mask = mask if mask is not None else np.ones_like(data, dtype=bool) 554 assert mask.shape == data.shape 555 556 # Compute log pdf 557 lls = -gammaln(data + 1) - lambdas + data * np.log(lambdas) 558 return np.sum(lls * mask, axis=-1) 559 560 561def categorical_logpdf(data, logits, mask=None): 562 """ 563 Compute the log probability density of a categorical distribution. 564 This will broadcast as long as data and logits have the same 565 (or at least compatible) leading dimensions. 566 567 Parameters 568 ---------- 569 data : array_like (..., D) int (0 <= data < C) 570 The points at which to evaluate the log density 571 572 lambdas : array_like (..., D, C) 573 The logits of the categorical distribution(s) with C classes 574 575 mask : array_like (..., D) bool 576 Optional mask indicating which entries in the data are observed 577 578 Returns 579 ------- 580 lps : array_like (...,) 581 Log probabilities under the categorical distribution(s). 582 """ 583 D = data.shape[-1] 584 C = logits.shape[-1] 585 assert data.dtype in (int, np.int8, np.int16, np.int32, np.int64) 586 assert np.all((data >= 0) & (data < C)) 587 assert logits.shape[-2] == D 588 589 # Check mask 590 mask = mask if mask is not None else np.ones_like(data, dtype=bool) 591 assert mask.shape == data.shape 592 593 logits = logits - logsumexp(logits, axis=-1, keepdims=True) # (..., D, C) 594 x = one_hot(data, C) # (..., D, C) 595 lls = np.sum(x * logits, axis=-1) # (..., D) 596 return np.sum(lls * mask, axis=-1) # (...,) 597 598 599def vonmises_logpdf(data, mus, kappas, mask=None): 600 """ 601 Compute the log probability density of a von Mises distribution. 602 This will broadcast as long as data, mus, and kappas have the same 603 (or at least compatible) leading dimensions. 604 605 Parameters 606 ---------- 607 data : array_like (..., D) 608 The points at which to evaluate the log density 609 610 mus : array_like (..., D) 611 The means of the von Mises distribution(s) 612 613 kappas : array_like (..., D) 614 The concentration of the von Mises distribution(s) 615 616 mask : array_like (..., D) bool 617 Optional mask indicating which entries in the data are observed 618 619 Returns 620 ------- 621 lps : array_like (...,) 622 Log probabilities under the von Mises distribution(s). 623 """ 624 try: 625 from autograd.scipy.special import i0 626 except: 627 raise Exception("von Mises relies on the function autograd.scipy.special.i0. " 628 "This is present in the latest Github code, but not on pypi. " 629 "Please use the Github version of autograd instead.") 630 631 D = data.shape[-1] 632 assert mus.shape[-1] == D 633 assert kappas.shape[-1] == D 634 635 # Check mask 636 mask = mask if mask is not None else np.ones_like(data, dtype=bool) 637 assert mask.shape == data.shape 638 639 ll = kappas * np.cos(data - mus) - np.log(2 * np.pi) - np.log(i0(kappas)) 640 return np.sum(ll * mask, axis=-1) 641