1# -*- coding: utf-8 -*- 2""" 3This module implements maximum likelihood-based estimation (MLE) of 4Gaussian regression models for finite-dimensional observations made on 5infinite-dimensional processes. 6 7The ProcessMLE class supports regression analyses on grouped data, 8where the observations within a group are dependent (they are made on 9the same underlying process). One use-case is repeated measures 10regression for temporal (longitudinal) data, in which the repeated 11measures occur at arbitrary real-valued time points. 12 13The mean structure is specified as a linear model. The covariance 14parameters depend on covariates via a link function. 15""" 16 17import numpy as np 18import pandas as pd 19import patsy 20import statsmodels.base.model as base 21from statsmodels.regression.linear_model import OLS 22import collections 23from scipy.optimize import minimize 24from statsmodels.iolib import summary2 25from statsmodels.tools.numdiff import approx_fprime 26import warnings 27 28 29class ProcessCovariance(object): 30 r""" 31 A covariance model for a process indexed by a real parameter. 32 33 An implementation of this class is based on a positive definite 34 correlation function h that maps real numbers to the interval [0, 35 1], such as the Gaussian (squared exponential) correlation 36 function :math:`\exp(-x^2)`. It also depends on a positive 37 scaling function `s` and a positive smoothness function `u`. 38 """ 39 40 def get_cov(self, time, sc, sm): 41 """ 42 Returns the covariance matrix for given time values. 43 44 Parameters 45 ---------- 46 time : array_like 47 The time points for the observations. If len(time) = p, 48 a pxp covariance matrix is returned. 49 sc : array_like 50 The scaling parameters for the observations. 51 sm : array_like 52 The smoothness parameters for the observation. See class 53 docstring for details. 54 """ 55 raise NotImplementedError 56 57 def jac(self, time, sc, sm): 58 """ 59 The Jacobian of the covariance with respect to the parameters. 60 61 See get_cov for parameters. 62 63 Returns 64 ------- 65 jsc : list-like 66 jsc[i] is the derivative of the covariance matrix 67 with respect to the i^th scaling parameter. 68 jsm : list-like 69 jsm[i] is the derivative of the covariance matrix 70 with respect to the i^th smoothness parameter. 71 """ 72 raise NotImplementedError 73 74 75class GaussianCovariance(ProcessCovariance): 76 r""" 77 An implementation of ProcessCovariance using the Gaussian kernel. 78 79 This class represents a parametric covariance model for a Gaussian 80 process as described in the work of Paciorek et al. cited below. 81 82 Following Paciorek et al [1]_, the covariance between observations with 83 index `i` and `j` is given by: 84 85 .. math:: 86 87 s[i] \cdot s[j] \cdot h(|time[i] - time[j]| / \sqrt{(u[i] + u[j]) / 88 2}) \cdot \frac{u[i]^{1/4}u[j]^{1/4}}{\sqrt{(u[i] + u[j])/2}} 89 90 The ProcessMLE class allows linear models with this covariance 91 structure to be fit using maximum likelihood (ML). The mean and 92 covariance parameters of the model are fit jointly. 93 94 The mean, scaling, and smoothing parameters can be linked to 95 covariates. The mean parameters are linked linearly, and the 96 scaling and smoothing parameters use an log link function to 97 preserve positivity. 98 99 The reference of Paciorek et al. below provides more details. 100 Note that here we only implement the 1-dimensional version of 101 their approach. 102 103 References 104 ---------- 105 .. [1] Paciorek, C. J. and Schervish, M. J. (2006). Spatial modeling using 106 a new class of nonstationary covariance functions. Environmetrics, 107 17:483–506. 108 https://papers.nips.cc/paper/2350-nonstationary-covariance-functions-for-gaussian-process-regression.pdf 109 """ 110 111 def get_cov(self, time, sc, sm): 112 113 da = np.subtract.outer(time, time) 114 ds = np.add.outer(sm, sm) / 2 115 116 qmat = da * da / ds 117 cm = np.exp(-qmat / 2) / np.sqrt(ds) 118 cm *= np.outer(sm, sm)**0.25 119 cm *= np.outer(sc, sc) 120 121 return cm 122 123 def jac(self, time, sc, sm): 124 125 da = np.subtract.outer(time, time) 126 ds = np.add.outer(sm, sm) / 2 127 sds = np.sqrt(ds) 128 daa = da * da 129 qmat = daa / ds 130 p = len(time) 131 eqm = np.exp(-qmat / 2) 132 sm4 = np.outer(sm, sm)**0.25 133 cmx = eqm * sm4 / sds 134 dq0 = -daa / ds**2 135 di = np.zeros((p, p)) 136 fi = np.zeros((p, p)) 137 scc = np.outer(sc, sc) 138 139 # Derivatives with respect to the smoothing parameters. 140 jsm = [] 141 for i, _ in enumerate(sm): 142 di *= 0 143 di[i, :] += 0.5 144 di[:, i] += 0.5 145 dbottom = 0.5 * di / sds 146 dtop = -0.5 * eqm * dq0 * di 147 b = dtop / sds - eqm * dbottom / ds 148 c = eqm / sds 149 v = 0.25 * sm**0.25 / sm[i]**0.75 150 fi *= 0 151 fi[i, :] = v 152 fi[:, i] = v 153 fi[i, i] = 0.5 / sm[i]**0.5 154 b = c * fi + b * sm4 155 b *= scc 156 jsm.append(b) 157 158 # Derivatives with respect to the scaling parameters. 159 jsc = [] 160 for i in range(0, len(sc)): 161 b = np.zeros((p, p)) 162 b[i, :] = cmx[i, :] * sc 163 b[:, i] += cmx[:, i] * sc 164 jsc.append(b) 165 166 return jsc, jsm 167 168 169def _check_args(endog, exog, exog_scale, exog_smooth, exog_noise, time, 170 groups): 171 172 v = [ 173 len(endog), 174 exog.shape[0], 175 exog_scale.shape[0], 176 exog_smooth.shape[0], 177 len(time), 178 len(groups) 179 ] 180 181 if exog_noise is not None: 182 v.append(exog_noise.shape[0]) 183 184 if min(v) != max(v): 185 msg = ("The leading dimensions of all array arguments " + 186 "must be equal.") 187 raise ValueError(msg) 188 189 190class ProcessMLE(base.LikelihoodModel): 191 """ 192 Fit a Gaussian mean/variance regression model. 193 194 This class fits a one-dimensional Gaussian process model with 195 parametrized mean and covariance structures to grouped data. For 196 each group, there is an independent realization of a latent 197 Gaussian process indexed by an observed real-valued time 198 variable.. The data consist of the Gaussian process observed at a 199 finite number of `time` values. 200 201 The process mean and variance can be lined to covariates. The 202 mean structure is linear in the covariates. The covariance 203 structure is non-stationary, and is defined parametrically through 204 'scaling', and 'smoothing' parameters. The covariance of the 205 process between two observations in the same group is a function 206 of the distance between the time values of the two observations. 207 The scaling and smoothing parameters can be linked to covariates. 208 209 The observed data are modeled as the sum of the Gaussian process 210 realization and (optionally) independent white noise. The standard 211 deviation of the white noise can be linked to covariates. 212 213 The data should be provided in 'long form', with a group label to 214 indicate which observations belong to the same group. 215 Observations in different groups are always independent. 216 217 Parameters 218 ---------- 219 endog : array_like 220 The dependent variable. 221 exog : array_like 222 The design matrix for the mean structure 223 exog_scale : array_like 224 The design matrix for the scaling structure 225 exog_smooth : array_like 226 The design matrix for the smoothness structure 227 exog_noise : array_like 228 The design matrix for the additive white noise. The 229 linear predictor is the log of the white noise standard 230 deviation. If None, there is no additive noise (the 231 process is observed directly). 232 time : array_like (1-dimensional) 233 The univariate index values, used to calculate distances 234 between observations in the same group, which determines 235 their correlations. 236 groups : array_like (1-dimensional) 237 The group values. 238 cov : a ProcessCovariance instance 239 Defaults to GaussianCovariance. 240 """ 241 242 def __init__(self, 243 endog, 244 exog, 245 exog_scale, 246 exog_smooth, 247 exog_noise, 248 time, 249 groups, 250 cov=None, 251 **kwargs): 252 253 super(ProcessMLE, self).__init__( 254 endog, 255 exog, 256 exog_scale=exog_scale, 257 exog_smooth=exog_smooth, 258 exog_noise=exog_noise, 259 time=time, 260 groups=groups, 261 **kwargs) 262 263 self._has_noise = exog_noise is not None 264 265 # Create parameter names 266 xnames = [] 267 if hasattr(exog, "columns"): 268 xnames = list(exog.columns) 269 else: 270 xnames = ["Mean%d" % j for j in range(exog.shape[1])] 271 272 if hasattr(exog_scale, "columns"): 273 xnames += list(exog_scale.columns) 274 else: 275 xnames += ["Scale%d" % j for j in range(exog_scale.shape[1])] 276 277 if hasattr(exog_smooth, "columns"): 278 xnames += list(exog_smooth.columns) 279 else: 280 xnames += ["Smooth%d" % j for j in range(exog_smooth.shape[1])] 281 282 if self._has_noise: 283 if hasattr(exog_noise, "columns"): 284 # If pandas-like, get the actual column names 285 xnames += list(exog_noise.columns) 286 else: 287 # If numpy-like, create default names 288 xnames += ["Noise%d" % j for j in range(exog_noise.shape[1])] 289 290 self.data.param_names = xnames 291 292 if cov is None: 293 cov = GaussianCovariance() 294 self.cov = cov 295 296 _check_args(endog, exog, exog_scale, exog_smooth, exog_noise, 297 time, groups) 298 299 groups_ix = collections.defaultdict(lambda: []) 300 for i, g in enumerate(groups): 301 groups_ix[g].append(i) 302 self._groups_ix = groups_ix 303 304 # Default, can be set in call to fit. 305 self.verbose = False 306 307 self.k_exog = self.exog.shape[1] 308 self.k_scale = self.exog_scale.shape[1] 309 self.k_smooth = self.exog_smooth.shape[1] 310 if self._has_noise: 311 self.k_noise = self.exog_noise.shape[1] 312 313 def _split_param_names(self): 314 xnames = self.data.param_names 315 q = 0 316 mean_names = xnames[q:q+self.k_exog] 317 q += self.k_exog 318 scale_names = xnames[q:q+self.k_scale] 319 q += self.k_scale 320 smooth_names = xnames[q:q+self.k_smooth] 321 322 if self._has_noise: 323 q += self.k_noise 324 noise_names = xnames[q:q+self.k_noise] 325 else: 326 noise_names = [] 327 328 return mean_names, scale_names, smooth_names, noise_names 329 330 @classmethod 331 def from_formula(cls, 332 formula, 333 data, 334 subset=None, 335 drop_cols=None, 336 *args, 337 **kwargs): 338 339 if "scale_formula" in kwargs: 340 scale_formula = kwargs["scale_formula"] 341 else: 342 raise ValueError("scale_formula is a required argument") 343 344 if "smooth_formula" in kwargs: 345 smooth_formula = kwargs["smooth_formula"] 346 else: 347 raise ValueError("smooth_formula is a required argument") 348 349 if "noise_formula" in kwargs: 350 noise_formula = kwargs["noise_formula"] 351 else: 352 noise_formula = None 353 354 if "time" in kwargs: 355 time = kwargs["time"] 356 else: 357 raise ValueError("time is a required argument") 358 359 if "groups" in kwargs: 360 groups = kwargs["groups"] 361 else: 362 raise ValueError("groups is a required argument") 363 364 if subset is not None: 365 warnings.warn("'subset' is ignored") 366 367 if drop_cols is not None: 368 warnings.warn("'drop_cols' is ignored") 369 370 if isinstance(time, str): 371 time = np.asarray(data[time]) 372 373 if isinstance(groups, str): 374 groups = np.asarray(data[groups]) 375 376 exog_scale = patsy.dmatrix(scale_formula, data) 377 scale_design_info = exog_scale.design_info 378 scale_names = scale_design_info.column_names 379 exog_scale = np.asarray(exog_scale) 380 381 exog_smooth = patsy.dmatrix(smooth_formula, data) 382 smooth_design_info = exog_smooth.design_info 383 smooth_names = smooth_design_info.column_names 384 exog_smooth = np.asarray(exog_smooth) 385 386 if noise_formula is not None: 387 exog_noise = patsy.dmatrix(noise_formula, data) 388 noise_design_info = exog_noise.design_info 389 noise_names = noise_design_info.column_names 390 exog_noise = np.asarray(exog_noise) 391 else: 392 exog_noise, noise_design_info, noise_names, exog_noise =\ 393 None, None, [], None 394 395 mod = super(ProcessMLE, cls).from_formula( 396 formula, 397 data=data, 398 subset=None, 399 exog_scale=exog_scale, 400 exog_smooth=exog_smooth, 401 exog_noise=exog_noise, 402 time=time, 403 groups=groups) 404 405 mod.data.scale_design_info = scale_design_info 406 mod.data.smooth_design_info = smooth_design_info 407 408 if mod._has_noise: 409 mod.data.noise_design_info = noise_design_info 410 411 mod.data.param_names = (mod.exog_names + scale_names + 412 smooth_names + noise_names) 413 414 return mod 415 416 def unpack(self, z): 417 """ 418 Split the packed parameter vector into blocks. 419 """ 420 421 # Mean parameters 422 pm = self.exog.shape[1] 423 mnpar = z[0:pm] 424 425 # Standard deviation parameters 426 pv = self.exog_scale.shape[1] 427 scpar = z[pm:pm + pv] 428 429 # Smoothness parameters 430 ps = self.exog_smooth.shape[1] 431 smpar = z[pm + pv:pm + pv + ps] 432 433 # Observation white noise standard deviation. 434 # Empty if has_noise = False. 435 nopar = z[pm + pv + ps:] 436 437 return mnpar, scpar, smpar, nopar 438 439 def _get_start(self): 440 441 # Use OLS to get starting values for mean structure parameters 442 model = OLS(self.endog, self.exog) 443 result = model.fit() 444 445 m = self.exog_scale.shape[1] + self.exog_smooth.shape[1] 446 447 if self._has_noise: 448 m += self.exog_noise.shape[1] 449 450 return np.concatenate((result.params, np.zeros(m))) 451 452 def loglike(self, params): 453 """ 454 Calculate the log-likelihood function for the model. 455 456 Parameters 457 ---------- 458 params : array_like 459 The packed parameters for the model. 460 461 Returns 462 ------- 463 The log-likelihood value at the given parameter point. 464 465 Notes 466 ----- 467 The mean, scaling, and smoothing parameters are packed into 468 a vector. Use `unpack` to access the component vectors. 469 """ 470 471 mnpar, scpar, smpar, nopar = self.unpack(params) 472 473 # Residuals 474 resid = self.endog - np.dot(self.exog, mnpar) 475 476 # Scaling parameters 477 sc = np.exp(np.dot(self.exog_scale, scpar)) 478 479 # Smoothness parameters 480 sm = np.exp(np.dot(self.exog_smooth, smpar)) 481 482 # White noise standard deviation 483 if self._has_noise: 484 no = np.exp(np.dot(self.exog_noise, nopar)) 485 486 # Get the log-likelihood 487 ll = 0. 488 for _, ix in self._groups_ix.items(): 489 490 # Get the covariance matrix for this person. 491 cm = self.cov.get_cov(self.time[ix], sc[ix], sm[ix]) 492 493 # The variance of the additive noise, if present. 494 if self._has_noise: 495 cm.flat[::cm.shape[0] + 1] += no[ix]**2 496 497 re = resid[ix] 498 ll -= 0.5 * np.linalg.slogdet(cm)[1] 499 ll -= 0.5 * np.dot(re, np.linalg.solve(cm, re)) 500 501 if self.verbose: 502 print("L=", ll) 503 504 return ll 505 506 def score(self, params): 507 """ 508 Calculate the score function for the model. 509 510 Parameters 511 ---------- 512 params : array_like 513 The packed parameters for the model. 514 515 Returns 516 ------- 517 The score vector at the given parameter point. 518 519 Notes 520 ----- 521 The mean, scaling, and smoothing parameters are packed into 522 a vector. Use `unpack` to access the component vectors. 523 """ 524 525 mnpar, scpar, smpar, nopar = self.unpack(params) 526 pm, pv, ps = len(mnpar), len(scpar), len(smpar) 527 528 # Residuals 529 resid = self.endog - np.dot(self.exog, mnpar) 530 531 # Scaling 532 sc = np.exp(np.dot(self.exog_scale, scpar)) 533 534 # Smoothness 535 sm = np.exp(np.dot(self.exog_smooth, smpar)) 536 537 # White noise standard deviation 538 if self._has_noise: 539 no = np.exp(np.dot(self.exog_noise, nopar)) 540 541 # Get the log-likelihood 542 score = np.zeros(len(mnpar) + len(scpar) + len(smpar) + len(nopar)) 543 for _, ix in self._groups_ix.items(): 544 545 sc_i = sc[ix] 546 sm_i = sm[ix] 547 resid_i = resid[ix] 548 time_i = self.time[ix] 549 exog_i = self.exog[ix, :] 550 exog_scale_i = self.exog_scale[ix, :] 551 exog_smooth_i = self.exog_smooth[ix, :] 552 553 # Get the covariance matrix for this person. 554 cm = self.cov.get_cov(time_i, sc_i, sm_i) 555 556 if self._has_noise: 557 no_i = no[ix] 558 exog_noise_i = self.exog_noise[ix, :] 559 cm.flat[::cm.shape[0] + 1] += no[ix]**2 560 561 cmi = np.linalg.inv(cm) 562 563 jacv, jacs = self.cov.jac(time_i, sc_i, sm_i) 564 565 # The derivatives for the mean parameters. 566 dcr = np.linalg.solve(cm, resid_i) 567 score[0:pm] += np.dot(exog_i.T, dcr) 568 569 # The derivatives for the scaling parameters. 570 rx = np.outer(resid_i, resid_i) 571 qm = np.linalg.solve(cm, rx) 572 qm = 0.5 * np.linalg.solve(cm, qm.T) 573 scx = sc_i[:, None] * exog_scale_i 574 for i, _ in enumerate(ix): 575 jq = np.sum(jacv[i] * qm) 576 score[pm:pm + pv] += jq * scx[i, :] 577 score[pm:pm + pv] -= 0.5 * np.sum(jacv[i] * cmi) * scx[i, :] 578 579 # The derivatives for the smoothness parameters. 580 smx = sm_i[:, None] * exog_smooth_i 581 for i, _ in enumerate(ix): 582 jq = np.sum(jacs[i] * qm) 583 score[pm + pv:pm + pv + ps] += jq * smx[i, :] 584 score[pm + pv:pm + pv + ps] -= ( 585 0.5 * np.sum(jacs[i] * cmi) * smx[i, :]) 586 587 # The derivatives with respect to the standard deviation parameters 588 if self._has_noise: 589 sno = no_i[:, None]**2 * exog_noise_i 590 score[pm + pv + ps:] -= np.dot(cmi.flat[::cm.shape[0] + 1], 591 sno) 592 bm = np.dot(cmi, np.dot(rx, cmi)) 593 score[pm + pv + ps:] += np.dot(bm.flat[::bm.shape[0] + 1], sno) 594 595 if self.verbose: 596 print("|G|=", np.sqrt(np.sum(score * score))) 597 598 return score 599 600 def hessian(self, params): 601 602 hess = approx_fprime(params, self.score) 603 return hess 604 605 def fit(self, start_params=None, method=None, maxiter=None, 606 **kwargs): 607 """ 608 Fit a grouped Gaussian process regression using MLE. 609 610 Parameters 611 ---------- 612 start_params : array_like 613 Optional starting values. 614 method : str or array of str 615 Method or sequence of methods for scipy optimize. 616 maxiter : int 617 The maximum number of iterations in the optimization. 618 619 Returns 620 ------- 621 An instance of ProcessMLEResults. 622 """ 623 624 if "verbose" in kwargs: 625 self.verbose = kwargs["verbose"] 626 627 minim_opts = {} 628 if "minim_opts" in kwargs: 629 minim_opts = kwargs["minim_opts"] 630 631 if start_params is None: 632 start_params = self._get_start() 633 634 if isinstance(method, str): 635 method = [method] 636 elif method is None: 637 method = ["powell", "bfgs"] 638 639 for j, meth in enumerate(method): 640 641 if meth not in ("powell",): 642 def jac(x): 643 return -self.score(x) 644 else: 645 jac = None 646 647 if maxiter is not None: 648 if np.isscalar(maxiter): 649 minim_opts["maxiter"] = maxiter 650 else: 651 minim_opts["maxiter"] = maxiter[j % len(maxiter)] 652 653 f = minimize( 654 lambda x: -self.loglike(x), 655 method=meth, 656 x0=start_params, 657 jac=jac, 658 options=minim_opts) 659 660 if not f.success: 661 msg = "Fitting did not converge" 662 if jac is not None: 663 msg += ", |gradient|=%.6f" % np.sqrt(np.sum(f.jac**2)) 664 if j < len(method) - 1: 665 msg += ", trying %s next..." % method[j+1] 666 warnings.warn(msg) 667 668 if np.isfinite(f.x).all(): 669 start_params = f.x 670 671 hess = self.hessian(f.x) 672 try: 673 cov_params = -np.linalg.inv(hess) 674 except Exception: 675 cov_params = None 676 677 class rslt: 678 pass 679 680 r = rslt() 681 r.params = f.x 682 r.normalized_cov_params = cov_params 683 r.optim_retvals = f 684 r.scale = 1 685 686 rslt = ProcessMLEResults(self, r) 687 688 return rslt 689 690 def covariance(self, time, scale_params, smooth_params, scale_data, 691 smooth_data): 692 """ 693 Returns a Gaussian process covariance matrix. 694 695 Parameters 696 ---------- 697 time : array_like 698 The time points at which the fitted covariance matrix is 699 calculated. 700 scale_params : array_like 701 The regression parameters for the scaling part 702 of the covariance structure. 703 smooth_params : array_like 704 The regression parameters for the smoothing part 705 of the covariance structure. 706 scale_data : DataFrame 707 The data used to determine the scale parameter, 708 must have len(time) rows. 709 smooth_data : DataFrame 710 The data used to determine the smoothness parameter, 711 must have len(time) rows. 712 713 Returns 714 ------- 715 A covariance matrix. 716 717 Notes 718 ----- 719 If the model was fit using formulas, `scale` and `smooth` should 720 be Dataframes, containing all variables that were present in the 721 respective scaling and smoothing formulas used to fit the model. 722 Otherwise, `scale` and `smooth` should contain data arrays whose 723 columns align with the fitted scaling and smoothing parameters. 724 725 The covariance is only for the Gaussian process and does not include 726 the white noise variance. 727 """ 728 729 if not hasattr(self.data, "scale_design_info"): 730 sca = np.dot(scale_data, scale_params) 731 smo = np.dot(smooth_data, smooth_params) 732 else: 733 sc = patsy.dmatrix(self.data.scale_design_info, scale_data) 734 sm = patsy.dmatrix(self.data.smooth_design_info, smooth_data) 735 sca = np.exp(np.dot(sc, scale_params)) 736 smo = np.exp(np.dot(sm, smooth_params)) 737 738 return self.cov.get_cov(time, sca, smo) 739 740 def predict(self, params, exog=None, *args, **kwargs): 741 """ 742 Obtain predictions of the mean structure. 743 744 Parameters 745 ---------- 746 params : array_like 747 The model parameters, may be truncated to include only mean 748 parameters. 749 exog : array_like 750 The design matrix for the mean structure. If not provided, 751 the model's design matrix is used. 752 """ 753 754 if exog is None: 755 exog = self.exog 756 elif hasattr(self.data, "design_info"): 757 # Run the provided data through the formula if present 758 exog = patsy.dmatrix(self.data.design_info, exog) 759 760 if len(params) > exog.shape[1]: 761 params = params[0:exog.shape[1]] 762 763 return np.dot(exog, params) 764 765 766class ProcessMLEResults(base.GenericLikelihoodModelResults): 767 """ 768 Results class for Gaussian process regression models. 769 """ 770 771 def __init__(self, model, mlefit): 772 773 super(ProcessMLEResults, self).__init__( 774 model, mlefit) 775 776 pa = model.unpack(mlefit.params) 777 778 self.mean_params = pa[0] 779 self.scale_params = pa[1] 780 self.smooth_params = pa[2] 781 self.no_params = pa[3] 782 783 self.df_resid = model.endog.shape[0] - len(mlefit.params) 784 785 self.k_exog = self.model.exog.shape[1] 786 self.k_scale = self.model.exog_scale.shape[1] 787 self.k_smooth = self.model.exog_smooth.shape[1] 788 789 self._has_noise = model._has_noise 790 if model._has_noise: 791 self.k_noise = self.model.exog_noise.shape[1] 792 793 def predict(self, exog=None, transform=True, *args, **kwargs): 794 795 if not transform: 796 warnings.warn("'transform=False' is ignored in predict") 797 798 if len(args) > 0 or len(kwargs) > 0: 799 warnings.warn("extra arguments ignored in 'predict'") 800 801 return self.model.predict(self.params, exog) 802 803 def covariance(self, time, scale, smooth): 804 """ 805 Returns a fitted covariance matrix. 806 807 Parameters 808 ---------- 809 time : array_like 810 The time points at which the fitted covariance 811 matrix is calculated. 812 scale : array_like 813 The data used to determine the scale parameter, 814 must have len(time) rows. 815 smooth : array_like 816 The data used to determine the smoothness parameter, 817 must have len(time) rows. 818 819 Returns 820 ------- 821 A covariance matrix. 822 823 Notes 824 ----- 825 If the model was fit using formulas, `scale` and `smooth` should 826 be Dataframes, containing all variables that were present in the 827 respective scaling and smoothing formulas used to fit the model. 828 Otherwise, `scale` and `smooth` should be data arrays whose 829 columns align with the fitted scaling and smoothing parameters. 830 """ 831 832 return self.model.covariance(time, self.scale_params, 833 self.smooth_params, scale, smooth) 834 835 def covariance_group(self, group): 836 837 # Check if the group exists, since _groups_ix is a 838 # DefaultDict use len instead of catching a KeyError. 839 ix = self.model._groups_ix[group] 840 if len(ix) == 0: 841 msg = "Group '%s' does not exist" % str(group) 842 raise ValueError(msg) 843 844 scale_data = self.model.exog_scale[ix, :] 845 smooth_data = self.model.exog_smooth[ix, :] 846 847 _, scale_names, smooth_names, _ = self.model._split_param_names() 848 849 scale_data = pd.DataFrame(scale_data, columns=scale_names) 850 smooth_data = pd.DataFrame(smooth_data, columns=smooth_names) 851 time = self.model.time[ix] 852 853 return self.model.covariance(time, 854 self.scale_params, 855 self.smooth_params, 856 scale_data, 857 smooth_data) 858 859 def summary(self, yname=None, xname=None, title=None, alpha=0.05): 860 861 df = pd.DataFrame() 862 863 typ = (["Mean"] * self.k_exog + ["Scale"] * self.k_scale + 864 ["Smooth"] * self.k_smooth) 865 if self._has_noise: 866 typ += ["SD"] * self.k_noise 867 df["Type"] = typ 868 869 df["coef"] = self.params 870 871 try: 872 df["std err"] = np.sqrt(np.diag(self.cov_params())) 873 except Exception: 874 df["std err"] = np.nan 875 876 from scipy.stats.distributions import norm 877 df["tvalues"] = df.coef / df["std err"] 878 df["P>|t|"] = 2 * norm.sf(np.abs(df.tvalues)) 879 880 f = norm.ppf(1 - alpha / 2) 881 df["[%.3f" % (alpha / 2)] = df.coef - f * df["std err"] 882 df["%.3f]" % (1 - alpha / 2)] = df.coef + f * df["std err"] 883 884 df.index = self.model.data.param_names 885 886 summ = summary2.Summary() 887 if title is None: 888 title = "Gaussian process regression results" 889 summ.add_title(title) 890 summ.add_df(df) 891 892 return summ 893