1# -*- coding: utf-8 -*- 2""" 3State Space Model 4 5Author: Chad Fulton 6License: Simplified-BSD 7""" 8from statsmodels.compat.pandas import NumericIndex, is_int_index 9 10import contextlib 11import warnings 12 13import datetime as dt 14from types import SimpleNamespace 15import numpy as np 16import pandas as pd 17from scipy.stats import norm 18 19from statsmodels.tools.tools import pinv_extended, Bunch 20from statsmodels.tools.sm_exceptions import PrecisionWarning, ValueWarning 21from statsmodels.tools.numdiff import (_get_epsilon, approx_hess_cs, 22 approx_fprime_cs, approx_fprime) 23from statsmodels.tools.decorators import cache_readonly 24from statsmodels.tools.eval_measures import aic, aicc, bic, hqic 25 26import statsmodels.base.wrapper as wrap 27 28import statsmodels.tsa.base.prediction as pred 29 30from statsmodels.base.data import PandasData 31import statsmodels.tsa.base.tsa_model as tsbase 32 33from .news import NewsResults 34from .simulation_smoother import SimulationSmoother 35from .kalman_smoother import SmootherResults 36from .kalman_filter import INVERT_UNIVARIATE, SOLVE_LU, MEMORY_CONSERVE 37from .initialization import Initialization 38from .tools import prepare_exog, concat, _safe_cond 39 40 41def _handle_args(names, defaults, *args, **kwargs): 42 output_args = [] 43 # We need to handle positional arguments in two ways, in case this was 44 # called by a Scipy optimization routine 45 if len(args) > 0: 46 # the fit() method will pass a dictionary 47 if isinstance(args[0], dict): 48 flags = args[0] 49 # otherwise, a user may have just used positional arguments... 50 else: 51 flags = dict(zip(names, args)) 52 for i in range(len(names)): 53 output_args.append(flags.get(names[i], defaults[i])) 54 55 for name, value in flags.items(): 56 if name in kwargs: 57 raise TypeError("loglike() got multiple values for keyword" 58 " argument '%s'" % name) 59 else: 60 for i in range(len(names)): 61 output_args.append(kwargs.pop(names[i], defaults[i])) 62 63 return tuple(output_args) + (kwargs,) 64 65 66def _check_index(desired_index, dta, title='data'): 67 given_index = None 68 if isinstance(dta, (pd.Series, pd.DataFrame)): 69 given_index = dta.index 70 if given_index is not None and not desired_index.equals(given_index): 71 desired_freq = getattr(desired_index, 'freq', None) 72 given_freq = getattr(given_index, 'freq', None) 73 if ((desired_freq is not None or given_freq is not None) and 74 desired_freq != given_freq): 75 raise ValueError('Given %s does not have an index' 76 ' that extends the index of the' 77 ' model. Expected index frequency is' 78 ' "%s", but got "%s".' 79 % (title, desired_freq, given_freq)) 80 else: 81 raise ValueError('Given %s does not have an index' 82 ' that extends the index of the' 83 ' model.' % title) 84 85 86class MLEModel(tsbase.TimeSeriesModel): 87 r""" 88 State space model for maximum likelihood estimation 89 90 Parameters 91 ---------- 92 endog : array_like 93 The observed time-series process :math:`y` 94 k_states : int 95 The dimension of the unobserved state process. 96 exog : array_like, optional 97 Array of exogenous regressors, shaped nobs x k. Default is no 98 exogenous regressors. 99 dates : array_like of datetime, optional 100 An array-like object of datetime objects. If a Pandas object is given 101 for endog, it is assumed to have a DateIndex. 102 freq : str, optional 103 The frequency of the time-series. A Pandas offset or 'B', 'D', 'W', 104 'M', 'A', or 'Q'. This is optional if dates are given. 105 **kwargs 106 Keyword arguments may be used to provide default values for state space 107 matrices or for Kalman filtering options. See `Representation`, and 108 `KalmanFilter` for more details. 109 110 Attributes 111 ---------- 112 ssm : statsmodels.tsa.statespace.kalman_filter.KalmanFilter 113 Underlying state space representation. 114 115 See Also 116 -------- 117 statsmodels.tsa.statespace.mlemodel.MLEResults 118 statsmodels.tsa.statespace.kalman_filter.KalmanFilter 119 statsmodels.tsa.statespace.representation.Representation 120 121 Notes 122 ----- 123 This class wraps the state space model with Kalman filtering to add in 124 functionality for maximum likelihood estimation. In particular, it adds 125 the concept of updating the state space representation based on a defined 126 set of parameters, through the `update` method or `updater` attribute (see 127 below for more details on which to use when), and it adds a `fit` method 128 which uses a numerical optimizer to select the parameters that maximize 129 the likelihood of the model. 130 131 The `start_params` `update` method must be overridden in the 132 child class (and the `transform` and `untransform` methods, if needed). 133 """ 134 135 def __init__(self, endog, k_states, exog=None, dates=None, freq=None, 136 **kwargs): 137 # Initialize the model base 138 super(MLEModel, self).__init__(endog=endog, exog=exog, 139 dates=dates, freq=freq, 140 missing='none') 141 142 # Store kwargs to recreate model 143 self._init_kwargs = kwargs 144 145 # Prepared the endog array: C-ordered, shape=(nobs x k_endog) 146 self.endog, self.exog = self.prepare_data() 147 148 # Dimensions 149 self.nobs = self.endog.shape[0] 150 self.k_states = k_states 151 152 # Initialize the state-space representation 153 self.initialize_statespace(**kwargs) 154 155 # Setup holder for fixed parameters 156 self._has_fixed_params = False 157 self._fixed_params = None 158 self._params_index = None 159 self._fixed_params_index = None 160 self._free_params_index = None 161 162 def prepare_data(self): 163 """ 164 Prepare data for use in the state space representation 165 """ 166 endog = np.array(self.data.orig_endog, order='C') 167 exog = self.data.orig_exog 168 if exog is not None: 169 exog = np.array(exog) 170 171 # Base class may allow 1-dim data, whereas we need 2-dim 172 if endog.ndim == 1: 173 endog.shape = (endog.shape[0], 1) # this will be C-contiguous 174 175 return endog, exog 176 177 def initialize_statespace(self, **kwargs): 178 """ 179 Initialize the state space representation 180 181 Parameters 182 ---------- 183 **kwargs 184 Additional keyword arguments to pass to the state space class 185 constructor. 186 """ 187 # (Now self.endog is C-ordered and in long format (nobs x k_endog). To 188 # get F-ordered and in wide format just need to transpose) 189 endog = self.endog.T 190 191 # Instantiate the state space object 192 self.ssm = SimulationSmoother(endog.shape[0], self.k_states, 193 nobs=endog.shape[1], **kwargs) 194 # Bind the data to the model 195 self.ssm.bind(endog) 196 197 # Other dimensions, now that `ssm` is available 198 self.k_endog = self.ssm.k_endog 199 200 def _get_index_with_final_state(self): 201 # The index we inherit from `TimeSeriesModel` will only cover the 202 # data sample itself, but we will also need an index value for the 203 # final state which is the next time step to the last datapoint. 204 # This method figures out an appropriate value for the three types of 205 # supported indexes: date-based, Int64Index, or RangeIndex 206 if self._index_dates: 207 if isinstance(self._index, pd.DatetimeIndex): 208 index = pd.date_range( 209 start=self._index[0], periods=len(self._index) + 1, 210 freq=self._index.freq) 211 elif isinstance(self._index, pd.PeriodIndex): 212 index = pd.period_range( 213 start=self._index[0], periods=len(self._index) + 1, 214 freq=self._index.freq) 215 else: 216 raise NotImplementedError 217 elif isinstance(self._index, pd.RangeIndex): 218 # COMPAT: pd.RangeIndex does not have start, stop, step prior to 219 # pandas 0.25 220 try: 221 start = self._index.start 222 stop = self._index.stop 223 step = self._index.step 224 except AttributeError: 225 start = self._index._start 226 stop = self._index._stop 227 step = self._index._step 228 index = pd.RangeIndex(start, stop + step, step) 229 elif is_int_index(self._index): 230 # The only valid Int64Index is a full, incrementing index, so this 231 # is general 232 value = self._index[-1] + 1 233 index = NumericIndex(self._index.tolist() + [value]) 234 else: 235 raise NotImplementedError 236 return index 237 238 def __setitem__(self, key, value): 239 return self.ssm.__setitem__(key, value) 240 241 def __getitem__(self, key): 242 return self.ssm.__getitem__(key) 243 244 def _get_init_kwds(self): 245 # Get keywords based on model attributes 246 kwds = super(MLEModel, self)._get_init_kwds() 247 248 for key, value in kwds.items(): 249 if value is None and hasattr(self.ssm, key): 250 kwds[key] = getattr(self.ssm, key) 251 252 return kwds 253 254 def clone(self, endog, exog=None, **kwargs): 255 """ 256 Clone state space model with new data and optionally new specification 257 258 Parameters 259 ---------- 260 endog : array_like 261 The observed time-series process :math:`y` 262 k_states : int 263 The dimension of the unobserved state process. 264 exog : array_like, optional 265 Array of exogenous regressors, shaped nobs x k. Default is no 266 exogenous regressors. 267 kwargs 268 Keyword arguments to pass to the new model class to change the 269 model specification. 270 271 Returns 272 ------- 273 model : MLEModel subclass 274 275 Notes 276 ----- 277 This method must be implemented 278 """ 279 raise NotImplementedError('This method is not implemented in the base' 280 ' class and must be set up by each specific' 281 ' model.') 282 283 def _clone_from_init_kwds(self, endog, **kwargs): 284 # Cannot make this the default, because there is extra work required 285 # for subclasses to make _get_init_kwds useful. 286 use_kwargs = self._get_init_kwds() 287 use_kwargs.update(kwargs) 288 289 # Check for `exog` 290 if getattr(self, 'k_exog', 0) > 0 and kwargs.get('exog', None) is None: 291 raise ValueError('Cloning a model with an exogenous component' 292 ' requires specifying a new exogenous array using' 293 ' the `exog` argument.') 294 295 mod = self.__class__(endog, **use_kwargs) 296 return mod 297 298 def set_filter_method(self, filter_method=None, **kwargs): 299 """ 300 Set the filtering method 301 302 The filtering method controls aspects of which Kalman filtering 303 approach will be used. 304 305 Parameters 306 ---------- 307 filter_method : int, optional 308 Bitmask value to set the filter method to. See notes for details. 309 **kwargs 310 Keyword arguments may be used to influence the filter method by 311 setting individual boolean flags. See notes for details. 312 313 Notes 314 ----- 315 This method is rarely used. See the corresponding function in the 316 `KalmanFilter` class for details. 317 """ 318 self.ssm.set_filter_method(filter_method, **kwargs) 319 320 def set_inversion_method(self, inversion_method=None, **kwargs): 321 """ 322 Set the inversion method 323 324 The Kalman filter may contain one matrix inversion: that of the 325 forecast error covariance matrix. The inversion method controls how and 326 if that inverse is performed. 327 328 Parameters 329 ---------- 330 inversion_method : int, optional 331 Bitmask value to set the inversion method to. See notes for 332 details. 333 **kwargs 334 Keyword arguments may be used to influence the inversion method by 335 setting individual boolean flags. See notes for details. 336 337 Notes 338 ----- 339 This method is rarely used. See the corresponding function in the 340 `KalmanFilter` class for details. 341 """ 342 self.ssm.set_inversion_method(inversion_method, **kwargs) 343 344 def set_stability_method(self, stability_method=None, **kwargs): 345 """ 346 Set the numerical stability method 347 348 The Kalman filter is a recursive algorithm that may in some cases 349 suffer issues with numerical stability. The stability method controls 350 what, if any, measures are taken to promote stability. 351 352 Parameters 353 ---------- 354 stability_method : int, optional 355 Bitmask value to set the stability method to. See notes for 356 details. 357 **kwargs 358 Keyword arguments may be used to influence the stability method by 359 setting individual boolean flags. See notes for details. 360 361 Notes 362 ----- 363 This method is rarely used. See the corresponding function in the 364 `KalmanFilter` class for details. 365 """ 366 self.ssm.set_stability_method(stability_method, **kwargs) 367 368 def set_conserve_memory(self, conserve_memory=None, **kwargs): 369 """ 370 Set the memory conservation method 371 372 By default, the Kalman filter computes a number of intermediate 373 matrices at each iteration. The memory conservation options control 374 which of those matrices are stored. 375 376 Parameters 377 ---------- 378 conserve_memory : int, optional 379 Bitmask value to set the memory conservation method to. See notes 380 for details. 381 **kwargs 382 Keyword arguments may be used to influence the memory conservation 383 method by setting individual boolean flags. 384 385 Notes 386 ----- 387 This method is rarely used. See the corresponding function in the 388 `KalmanFilter` class for details. 389 """ 390 self.ssm.set_conserve_memory(conserve_memory, **kwargs) 391 392 def set_smoother_output(self, smoother_output=None, **kwargs): 393 """ 394 Set the smoother output 395 396 The smoother can produce several types of results. The smoother output 397 variable controls which are calculated and returned. 398 399 Parameters 400 ---------- 401 smoother_output : int, optional 402 Bitmask value to set the smoother output to. See notes for details. 403 **kwargs 404 Keyword arguments may be used to influence the smoother output by 405 setting individual boolean flags. 406 407 Notes 408 ----- 409 This method is rarely used. See the corresponding function in the 410 `KalmanSmoother` class for details. 411 """ 412 self.ssm.set_smoother_output(smoother_output, **kwargs) 413 414 def initialize_known(self, initial_state, initial_state_cov): 415 """Initialize known""" 416 self.ssm.initialize_known(initial_state, initial_state_cov) 417 418 def initialize_approximate_diffuse(self, variance=None): 419 """Initialize approximate diffuse""" 420 self.ssm.initialize_approximate_diffuse(variance) 421 422 def initialize_stationary(self): 423 """Initialize stationary""" 424 self.ssm.initialize_stationary() 425 426 @property 427 def initialization(self): 428 return self.ssm.initialization 429 430 @initialization.setter 431 def initialization(self, value): 432 self.ssm.initialization = value 433 434 @property 435 def initial_variance(self): 436 return self.ssm.initial_variance 437 438 @initial_variance.setter 439 def initial_variance(self, value): 440 self.ssm.initial_variance = value 441 442 @property 443 def loglikelihood_burn(self): 444 return self.ssm.loglikelihood_burn 445 446 @loglikelihood_burn.setter 447 def loglikelihood_burn(self, value): 448 self.ssm.loglikelihood_burn = value 449 450 @property 451 def tolerance(self): 452 return self.ssm.tolerance 453 454 @tolerance.setter 455 def tolerance(self, value): 456 self.ssm.tolerance = value 457 458 def _validate_can_fix_params(self, param_names): 459 for param_name in param_names: 460 if param_name not in self.param_names: 461 raise ValueError('Invalid parameter name passed: "%s".' 462 % param_name) 463 464 @contextlib.contextmanager 465 def fix_params(self, params): 466 """ 467 Fix parameters to specific values (context manager) 468 469 Parameters 470 ---------- 471 params : dict 472 Dictionary describing the fixed parameter values, of the form 473 `param_name: fixed_value`. See the `param_names` property for valid 474 parameter names. 475 476 Examples 477 -------- 478 >>> mod = sm.tsa.SARIMAX(endog, order=(1, 0, 1)) 479 >>> with mod.fix_params({'ar.L1': 0.5}): 480 res = mod.fit() 481 """ 482 k_params = len(self.param_names) 483 # Initialization (this is done here rather than in the constructor 484 # because param_names may not be available at that point) 485 if self._fixed_params is None: 486 self._fixed_params = {} 487 self._params_index = dict( 488 zip(self.param_names, np.arange(k_params))) 489 490 # Cache the current fixed parameters 491 cache_fixed_params = self._fixed_params.copy() 492 cache_has_fixed_params = self._has_fixed_params 493 cache_fixed_params_index = self._fixed_params_index 494 cache_free_params_index = self._free_params_index 495 496 # Validate parameter names and values 497 all_fixed_param_names = ( 498 set(params.keys()) | set(self._fixed_params.keys()) 499 ) 500 self._validate_can_fix_params(all_fixed_param_names) 501 502 # Set the new fixed parameters, keeping the order as given by 503 # param_names 504 self._fixed_params.update(params) 505 self._fixed_params = dict([ 506 (name, self._fixed_params[name]) for name in self.param_names 507 if name in self._fixed_params]) 508 509 # Update associated values 510 self._has_fixed_params = True 511 self._fixed_params_index = [self._params_index[key] 512 for key in self._fixed_params.keys()] 513 self._free_params_index = list( 514 set(np.arange(k_params)).difference(self._fixed_params_index)) 515 516 try: 517 yield 518 finally: 519 # Reset the fixed parameters 520 self._has_fixed_params = cache_has_fixed_params 521 self._fixed_params = cache_fixed_params 522 self._fixed_params_index = cache_fixed_params_index 523 self._free_params_index = cache_free_params_index 524 525 def fit(self, start_params=None, transformed=True, includes_fixed=False, 526 cov_type=None, cov_kwds=None, method='lbfgs', maxiter=50, 527 full_output=1, disp=5, callback=None, return_params=False, 528 optim_score=None, optim_complex_step=None, optim_hessian=None, 529 flags=None, low_memory=False, **kwargs): 530 """ 531 Fits the model by maximum likelihood via Kalman filter. 532 533 Parameters 534 ---------- 535 start_params : array_like, optional 536 Initial guess of the solution for the loglikelihood maximization. 537 If None, the default is given by Model.start_params. 538 transformed : bool, optional 539 Whether or not `start_params` is already transformed. Default is 540 True. 541 includes_fixed : bool, optional 542 If parameters were previously fixed with the `fix_params` method, 543 this argument describes whether or not `start_params` also includes 544 the fixed parameters, in addition to the free parameters. Default 545 is False. 546 cov_type : str, optional 547 The `cov_type` keyword governs the method for calculating the 548 covariance matrix of parameter estimates. Can be one of: 549 550 - 'opg' for the outer product of gradient estimator 551 - 'oim' for the observed information matrix estimator, calculated 552 using the method of Harvey (1989) 553 - 'approx' for the observed information matrix estimator, 554 calculated using a numerical approximation of the Hessian matrix. 555 - 'robust' for an approximate (quasi-maximum likelihood) covariance 556 matrix that may be valid even in the presence of some 557 misspecifications. Intermediate calculations use the 'oim' 558 method. 559 - 'robust_approx' is the same as 'robust' except that the 560 intermediate calculations use the 'approx' method. 561 - 'none' for no covariance matrix calculation. 562 563 Default is 'opg' unless memory conservation is used to avoid 564 computing the loglikelihood values for each observation, in which 565 case the default is 'approx'. 566 cov_kwds : dict or None, optional 567 A dictionary of arguments affecting covariance matrix computation. 568 569 **opg, oim, approx, robust, robust_approx** 570 571 - 'approx_complex_step' : bool, optional - If True, numerical 572 approximations are computed using complex-step methods. If False, 573 numerical approximations are computed using finite difference 574 methods. Default is True. 575 - 'approx_centered' : bool, optional - If True, numerical 576 approximations computed using finite difference methods use a 577 centered approximation. Default is False. 578 method : str, optional 579 The `method` determines which solver from `scipy.optimize` 580 is used, and it can be chosen from among the following strings: 581 582 - 'newton' for Newton-Raphson 583 - 'nm' for Nelder-Mead 584 - 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS) 585 - 'lbfgs' for limited-memory BFGS with optional box constraints 586 - 'powell' for modified Powell's method 587 - 'cg' for conjugate gradient 588 - 'ncg' for Newton-conjugate gradient 589 - 'basinhopping' for global basin-hopping solver 590 591 The explicit arguments in `fit` are passed to the solver, 592 with the exception of the basin-hopping solver. Each 593 solver has several optional arguments that are not the same across 594 solvers. See the notes section below (or scipy.optimize) for the 595 available arguments and for the list of explicit arguments that the 596 basin-hopping solver supports. 597 maxiter : int, optional 598 The maximum number of iterations to perform. 599 full_output : bool, optional 600 Set to True to have all available output in the Results object's 601 mle_retvals attribute. The output is dependent on the solver. 602 See LikelihoodModelResults notes section for more information. 603 disp : bool, optional 604 Set to True to print convergence messages. 605 callback : callable callback(xk), optional 606 Called after each iteration, as callback(xk), where xk is the 607 current parameter vector. 608 return_params : bool, optional 609 Whether or not to return only the array of maximizing parameters. 610 Default is False. 611 optim_score : {'harvey', 'approx'} or None, optional 612 The method by which the score vector is calculated. 'harvey' uses 613 the method from Harvey (1989), 'approx' uses either finite 614 difference or complex step differentiation depending upon the 615 value of `optim_complex_step`, and None uses the built-in gradient 616 approximation of the optimizer. Default is None. This keyword is 617 only relevant if the optimization method uses the score. 618 optim_complex_step : bool, optional 619 Whether or not to use complex step differentiation when 620 approximating the score; if False, finite difference approximation 621 is used. Default is True. This keyword is only relevant if 622 `optim_score` is set to 'harvey' or 'approx'. 623 optim_hessian : {'opg','oim','approx'}, optional 624 The method by which the Hessian is numerically approximated. 'opg' 625 uses outer product of gradients, 'oim' uses the information 626 matrix formula from Harvey (1989), and 'approx' uses numerical 627 approximation. This keyword is only relevant if the 628 optimization method uses the Hessian matrix. 629 low_memory : bool, optional 630 If set to True, techniques are applied to substantially reduce 631 memory usage. If used, some features of the results object will 632 not be available (including smoothed results and in-sample 633 prediction), although out-of-sample forecasting is possible. 634 Default is False. 635 **kwargs 636 Additional keyword arguments to pass to the optimizer. 637 638 Returns 639 ------- 640 results 641 Results object holding results from fitting a state space model. 642 643 See Also 644 -------- 645 statsmodels.base.model.LikelihoodModel.fit 646 statsmodels.tsa.statespace.mlemodel.MLEResults 647 statsmodels.tsa.statespace.structural.UnobservedComponentsResults 648 """ 649 if start_params is None: 650 start_params = self.start_params 651 transformed = True 652 includes_fixed = True 653 654 # Update the score method 655 if optim_score is None and method == 'lbfgs': 656 kwargs.setdefault('approx_grad', True) 657 kwargs.setdefault('epsilon', 1e-5) 658 elif optim_score is None: 659 optim_score = 'approx' 660 661 # Check for complex step differentiation 662 if optim_complex_step is None: 663 optim_complex_step = not self.ssm._complex_endog 664 elif optim_complex_step and self.ssm._complex_endog: 665 raise ValueError('Cannot use complex step derivatives when data' 666 ' or parameters are complex.') 667 668 # Standardize starting parameters 669 start_params = self.handle_params(start_params, transformed=True, 670 includes_fixed=includes_fixed) 671 672 # Unconstrain the starting parameters 673 if transformed: 674 start_params = self.untransform_params(start_params) 675 676 # Remove any fixed parameters 677 if self._has_fixed_params: 678 start_params = start_params[self._free_params_index] 679 680 # If all parameters are fixed, we are done 681 if self._has_fixed_params and len(start_params) == 0: 682 mlefit = Bunch(params=[], mle_retvals=None, 683 mle_settings=None) 684 else: 685 # Remove disallowed kwargs 686 disallow = ( 687 "concentrate_scale", 688 "enforce_stationarity", 689 "enforce_invertibility" 690 ) 691 kwargs = {k: v for k, v in kwargs.items() if k not in disallow} 692 # Maximum likelihood estimation 693 if flags is None: 694 flags = {} 695 flags.update({ 696 'transformed': False, 697 'includes_fixed': False, 698 'score_method': optim_score, 699 'approx_complex_step': optim_complex_step 700 }) 701 if optim_hessian is not None: 702 flags['hessian_method'] = optim_hessian 703 fargs = (flags,) 704 mlefit = super(MLEModel, self).fit(start_params, method=method, 705 fargs=fargs, 706 maxiter=maxiter, 707 full_output=full_output, 708 disp=disp, callback=callback, 709 skip_hessian=True, **kwargs) 710 711 # Just return the fitted parameters if requested 712 if return_params: 713 return self.handle_params(mlefit.params, transformed=False, 714 includes_fixed=False) 715 # Otherwise construct the results class if desired 716 else: 717 # Handle memory conservation option 718 if low_memory: 719 conserve_memory = self.ssm.conserve_memory 720 self.ssm.set_conserve_memory(MEMORY_CONSERVE) 721 722 # Perform filtering / smoothing 723 if (self.ssm.memory_no_predicted or self.ssm.memory_no_gain 724 or self.ssm.memory_no_smoothing): 725 func = self.filter 726 else: 727 func = self.smooth 728 res = func(mlefit.params, transformed=False, includes_fixed=False, 729 cov_type=cov_type, cov_kwds=cov_kwds) 730 731 res.mlefit = mlefit 732 res.mle_retvals = mlefit.mle_retvals 733 res.mle_settings = mlefit.mle_settings 734 735 # Reset memory conservation 736 if low_memory: 737 self.ssm.set_conserve_memory(conserve_memory) 738 739 return res 740 741 def fit_constrained(self, constraints, start_params=None, **fit_kwds): 742 """ 743 Fit the model with some parameters subject to equality constraints. 744 745 Parameters 746 ---------- 747 constraints : dict 748 Dictionary of constraints, of the form `param_name: fixed_value`. 749 See the `param_names` property for valid parameter names. 750 start_params : array_like, optional 751 Initial guess of the solution for the loglikelihood maximization. 752 If None, the default is given by Model.start_params. 753 **fit_kwds : keyword arguments 754 fit_kwds are used in the optimization of the remaining parameters. 755 756 Returns 757 ------- 758 results : Results instance 759 760 Examples 761 -------- 762 >>> mod = sm.tsa.SARIMAX(endog, order=(1, 0, 1)) 763 >>> res = mod.fit_constrained({'ar.L1': 0.5}) 764 """ 765 with self.fix_params(constraints): 766 res = self.fit(start_params, **fit_kwds) 767 return res 768 769 @property 770 def _res_classes(self): 771 return {'fit': (MLEResults, MLEResultsWrapper)} 772 773 def _wrap_results(self, params, result, return_raw, cov_type=None, 774 cov_kwds=None, results_class=None, wrapper_class=None): 775 if not return_raw: 776 # Wrap in a results object 777 result_kwargs = {} 778 if cov_type is not None: 779 result_kwargs['cov_type'] = cov_type 780 if cov_kwds is not None: 781 result_kwargs['cov_kwds'] = cov_kwds 782 783 if results_class is None: 784 results_class = self._res_classes['fit'][0] 785 if wrapper_class is None: 786 wrapper_class = self._res_classes['fit'][1] 787 788 res = results_class(self, params, result, **result_kwargs) 789 result = wrapper_class(res) 790 return result 791 792 def filter(self, params, transformed=True, includes_fixed=False, 793 complex_step=False, cov_type=None, cov_kwds=None, 794 return_ssm=False, results_class=None, 795 results_wrapper_class=None, low_memory=False, **kwargs): 796 """ 797 Kalman filtering 798 799 Parameters 800 ---------- 801 params : array_like 802 Array of parameters at which to evaluate the loglikelihood 803 function. 804 transformed : bool, optional 805 Whether or not `params` is already transformed. Default is True. 806 return_ssm : bool,optional 807 Whether or not to return only the state space output or a full 808 results object. Default is to return a full results object. 809 cov_type : str, optional 810 See `MLEResults.fit` for a description of covariance matrix types 811 for results object. 812 cov_kwds : dict or None, optional 813 See `MLEResults.get_robustcov_results` for a description required 814 keywords for alternative covariance estimators 815 low_memory : bool, optional 816 If set to True, techniques are applied to substantially reduce 817 memory usage. If used, some features of the results object will 818 not be available (including in-sample prediction), although 819 out-of-sample forecasting is possible. Default is False. 820 **kwargs 821 Additional keyword arguments to pass to the Kalman filter. See 822 `KalmanFilter.filter` for more details. 823 """ 824 params = self.handle_params(params, transformed=transformed, 825 includes_fixed=includes_fixed) 826 self.update(params, transformed=True, includes_fixed=True, 827 complex_step=complex_step) 828 829 # Save the parameter names 830 self.data.param_names = self.param_names 831 832 if complex_step: 833 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 834 835 # Handle memory conservation 836 if low_memory: 837 kwargs['conserve_memory'] = MEMORY_CONSERVE 838 839 # Get the state space output 840 result = self.ssm.filter(complex_step=complex_step, **kwargs) 841 842 # Wrap in a results object 843 return self._wrap_results(params, result, return_ssm, cov_type, 844 cov_kwds, results_class, 845 results_wrapper_class) 846 847 def smooth(self, params, transformed=True, includes_fixed=False, 848 complex_step=False, cov_type=None, cov_kwds=None, 849 return_ssm=False, results_class=None, 850 results_wrapper_class=None, **kwargs): 851 """ 852 Kalman smoothing 853 854 Parameters 855 ---------- 856 params : array_like 857 Array of parameters at which to evaluate the loglikelihood 858 function. 859 transformed : bool, optional 860 Whether or not `params` is already transformed. Default is True. 861 return_ssm : bool,optional 862 Whether or not to return only the state space output or a full 863 results object. Default is to return a full results object. 864 cov_type : str, optional 865 See `MLEResults.fit` for a description of covariance matrix types 866 for results object. 867 cov_kwds : dict or None, optional 868 See `MLEResults.get_robustcov_results` for a description required 869 keywords for alternative covariance estimators 870 **kwargs 871 Additional keyword arguments to pass to the Kalman filter. See 872 `KalmanFilter.filter` for more details. 873 """ 874 params = self.handle_params(params, transformed=transformed, 875 includes_fixed=includes_fixed) 876 self.update(params, transformed=True, includes_fixed=True, 877 complex_step=complex_step) 878 879 # Save the parameter names 880 self.data.param_names = self.param_names 881 882 if complex_step: 883 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 884 885 # Get the state space output 886 result = self.ssm.smooth(complex_step=complex_step, **kwargs) 887 888 # Wrap in a results object 889 return self._wrap_results(params, result, return_ssm, cov_type, 890 cov_kwds, results_class, 891 results_wrapper_class) 892 893 _loglike_param_names = ['transformed', 'includes_fixed', 'complex_step'] 894 _loglike_param_defaults = [True, False, False] 895 896 def loglike(self, params, *args, **kwargs): 897 """ 898 Loglikelihood evaluation 899 900 Parameters 901 ---------- 902 params : array_like 903 Array of parameters at which to evaluate the loglikelihood 904 function. 905 transformed : bool, optional 906 Whether or not `params` is already transformed. Default is True. 907 **kwargs 908 Additional keyword arguments to pass to the Kalman filter. See 909 `KalmanFilter.filter` for more details. 910 911 See Also 912 -------- 913 update : modifies the internal state of the state space model to 914 reflect new params 915 916 Notes 917 ----- 918 [1]_ recommend maximizing the average likelihood to avoid scale issues; 919 this is done automatically by the base Model fit method. 920 921 References 922 ---------- 923 .. [1] Koopman, Siem Jan, Neil Shephard, and Jurgen A. Doornik. 1999. 924 Statistical Algorithms for Models in State Space Using SsfPack 2.2. 925 Econometrics Journal 2 (1): 107-60. doi:10.1111/1368-423X.00023. 926 """ 927 transformed, includes_fixed, complex_step, kwargs = _handle_args( 928 MLEModel._loglike_param_names, MLEModel._loglike_param_defaults, 929 *args, **kwargs) 930 931 params = self.handle_params(params, transformed=transformed, 932 includes_fixed=includes_fixed) 933 self.update(params, transformed=True, includes_fixed=True, 934 complex_step=complex_step) 935 936 if complex_step: 937 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 938 939 loglike = self.ssm.loglike(complex_step=complex_step, **kwargs) 940 941 # Koopman, Shephard, and Doornik recommend maximizing the average 942 # likelihood to avoid scale issues, but the averaging is done 943 # automatically in the base model `fit` method 944 return loglike 945 946 def loglikeobs(self, params, transformed=True, includes_fixed=False, 947 complex_step=False, **kwargs): 948 """ 949 Loglikelihood evaluation 950 951 Parameters 952 ---------- 953 params : array_like 954 Array of parameters at which to evaluate the loglikelihood 955 function. 956 transformed : bool, optional 957 Whether or not `params` is already transformed. Default is True. 958 **kwargs 959 Additional keyword arguments to pass to the Kalman filter. See 960 `KalmanFilter.filter` for more details. 961 962 See Also 963 -------- 964 update : modifies the internal state of the Model to reflect new params 965 966 Notes 967 ----- 968 [1]_ recommend maximizing the average likelihood to avoid scale issues; 969 this is done automatically by the base Model fit method. 970 971 References 972 ---------- 973 .. [1] Koopman, Siem Jan, Neil Shephard, and Jurgen A. Doornik. 1999. 974 Statistical Algorithms for Models in State Space Using SsfPack 2.2. 975 Econometrics Journal 2 (1): 107-60. doi:10.1111/1368-423X.00023. 976 """ 977 params = self.handle_params(params, transformed=transformed, 978 includes_fixed=includes_fixed) 979 980 # If we're using complex-step differentiation, then we cannot use 981 # Cholesky factorization 982 if complex_step: 983 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 984 985 self.update(params, transformed=True, includes_fixed=True, 986 complex_step=complex_step) 987 988 return self.ssm.loglikeobs(complex_step=complex_step, **kwargs) 989 990 def simulation_smoother(self, simulation_output=None, **kwargs): 991 r""" 992 Retrieve a simulation smoother for the state space model. 993 994 Parameters 995 ---------- 996 simulation_output : int, optional 997 Determines which simulation smoother output is calculated. 998 Default is all (including state and disturbances). 999 **kwargs 1000 Additional keyword arguments, used to set the simulation output. 1001 See `set_simulation_output` for more details. 1002 1003 Returns 1004 ------- 1005 SimulationSmoothResults 1006 """ 1007 return self.ssm.simulation_smoother( 1008 simulation_output=simulation_output, **kwargs) 1009 1010 def _forecasts_error_partial_derivatives(self, params, transformed=True, 1011 includes_fixed=False, 1012 approx_complex_step=None, 1013 approx_centered=False, 1014 res=None, **kwargs): 1015 params = np.array(params, ndmin=1) 1016 1017 # We cannot use complex-step differentiation with non-transformed 1018 # parameters 1019 if approx_complex_step is None: 1020 approx_complex_step = transformed 1021 if not transformed and approx_complex_step: 1022 raise ValueError("Cannot use complex-step approximations to" 1023 " calculate the observed_information_matrix" 1024 " with untransformed parameters.") 1025 1026 # If we're using complex-step differentiation, then we cannot use 1027 # Cholesky factorization 1028 if approx_complex_step: 1029 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 1030 1031 # Get values at the params themselves 1032 if res is None: 1033 self.update(params, transformed=transformed, 1034 includes_fixed=includes_fixed, 1035 complex_step=approx_complex_step) 1036 res = self.ssm.filter(complex_step=approx_complex_step, **kwargs) 1037 1038 # Setup 1039 n = len(params) 1040 1041 # Compute partial derivatives w.r.t. forecast error and forecast 1042 # error covariance 1043 partials_forecasts_error = ( 1044 np.zeros((self.k_endog, self.nobs, n)) 1045 ) 1046 partials_forecasts_error_cov = ( 1047 np.zeros((self.k_endog, self.k_endog, self.nobs, n)) 1048 ) 1049 if approx_complex_step: 1050 epsilon = _get_epsilon(params, 2, None, n) 1051 increments = np.identity(n) * 1j * epsilon 1052 1053 for i, ih in enumerate(increments): 1054 self.update(params + ih, transformed=transformed, 1055 includes_fixed=includes_fixed, 1056 complex_step=True) 1057 _res = self.ssm.filter(complex_step=True, **kwargs) 1058 1059 partials_forecasts_error[:, :, i] = ( 1060 _res.forecasts_error.imag / epsilon[i] 1061 ) 1062 1063 partials_forecasts_error_cov[:, :, :, i] = ( 1064 _res.forecasts_error_cov.imag / epsilon[i] 1065 ) 1066 elif not approx_centered: 1067 epsilon = _get_epsilon(params, 2, None, n) 1068 ei = np.zeros((n,), float) 1069 for i in range(n): 1070 ei[i] = epsilon[i] 1071 self.update(params + ei, transformed=transformed, 1072 includes_fixed=includes_fixed, complex_step=False) 1073 _res = self.ssm.filter(complex_step=False, **kwargs) 1074 1075 partials_forecasts_error[:, :, i] = ( 1076 _res.forecasts_error - res.forecasts_error) / epsilon[i] 1077 1078 partials_forecasts_error_cov[:, :, :, i] = ( 1079 _res.forecasts_error_cov - 1080 res.forecasts_error_cov) / epsilon[i] 1081 ei[i] = 0.0 1082 else: 1083 epsilon = _get_epsilon(params, 3, None, n) / 2. 1084 ei = np.zeros((n,), float) 1085 for i in range(n): 1086 ei[i] = epsilon[i] 1087 1088 self.update(params + ei, transformed=transformed, 1089 includes_fixed=includes_fixed, complex_step=False) 1090 _res1 = self.ssm.filter(complex_step=False, **kwargs) 1091 1092 self.update(params - ei, transformed=transformed, 1093 includes_fixed=includes_fixed, complex_step=False) 1094 _res2 = self.ssm.filter(complex_step=False, **kwargs) 1095 1096 partials_forecasts_error[:, :, i] = ( 1097 (_res1.forecasts_error - _res2.forecasts_error) / 1098 (2 * epsilon[i])) 1099 1100 partials_forecasts_error_cov[:, :, :, i] = ( 1101 (_res1.forecasts_error_cov - _res2.forecasts_error_cov) / 1102 (2 * epsilon[i])) 1103 1104 ei[i] = 0.0 1105 1106 return partials_forecasts_error, partials_forecasts_error_cov 1107 1108 def observed_information_matrix(self, params, transformed=True, 1109 includes_fixed=False, 1110 approx_complex_step=None, 1111 approx_centered=False, **kwargs): 1112 """ 1113 Observed information matrix 1114 1115 Parameters 1116 ---------- 1117 params : array_like, optional 1118 Array of parameters at which to evaluate the loglikelihood 1119 function. 1120 **kwargs 1121 Additional keyword arguments to pass to the Kalman filter. See 1122 `KalmanFilter.filter` for more details. 1123 1124 Notes 1125 ----- 1126 This method is from Harvey (1989), which shows that the information 1127 matrix only depends on terms from the gradient. This implementation is 1128 partially analytic and partially numeric approximation, therefore, 1129 because it uses the analytic formula for the information matrix, with 1130 numerically computed elements of the gradient. 1131 1132 References 1133 ---------- 1134 Harvey, Andrew C. 1990. 1135 Forecasting, Structural Time Series Models and the Kalman Filter. 1136 Cambridge University Press. 1137 """ 1138 params = np.array(params, ndmin=1) 1139 1140 # Setup 1141 n = len(params) 1142 1143 # We cannot use complex-step differentiation with non-transformed 1144 # parameters 1145 if approx_complex_step is None: 1146 approx_complex_step = transformed 1147 if not transformed and approx_complex_step: 1148 raise ValueError("Cannot use complex-step approximations to" 1149 " calculate the observed_information_matrix" 1150 " with untransformed parameters.") 1151 1152 # Get values at the params themselves 1153 params = self.handle_params(params, transformed=transformed, 1154 includes_fixed=includes_fixed) 1155 self.update(params, transformed=True, includes_fixed=True, 1156 complex_step=approx_complex_step) 1157 # If we're using complex-step differentiation, then we cannot use 1158 # Cholesky factorization 1159 if approx_complex_step: 1160 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 1161 res = self.ssm.filter(complex_step=approx_complex_step, **kwargs) 1162 dtype = self.ssm.dtype 1163 1164 # Save this for inversion later 1165 inv_forecasts_error_cov = res.forecasts_error_cov.copy() 1166 1167 partials_forecasts_error, partials_forecasts_error_cov = ( 1168 self._forecasts_error_partial_derivatives( 1169 params, transformed=transformed, includes_fixed=includes_fixed, 1170 approx_complex_step=approx_complex_step, 1171 approx_centered=approx_centered, res=res, **kwargs)) 1172 1173 # Compute the information matrix 1174 tmp = np.zeros((self.k_endog, self.k_endog, self.nobs, n), dtype=dtype) 1175 1176 information_matrix = np.zeros((n, n), dtype=dtype) 1177 d = np.maximum(self.ssm.loglikelihood_burn, res.nobs_diffuse) 1178 for t in range(d, self.nobs): 1179 inv_forecasts_error_cov[:, :, t] = ( 1180 np.linalg.inv(res.forecasts_error_cov[:, :, t]) 1181 ) 1182 for i in range(n): 1183 tmp[:, :, t, i] = np.dot( 1184 inv_forecasts_error_cov[:, :, t], 1185 partials_forecasts_error_cov[:, :, t, i] 1186 ) 1187 for i in range(n): 1188 for j in range(n): 1189 information_matrix[i, j] += ( 1190 0.5 * np.trace(np.dot(tmp[:, :, t, i], 1191 tmp[:, :, t, j])) 1192 ) 1193 information_matrix[i, j] += np.inner( 1194 partials_forecasts_error[:, t, i], 1195 np.dot(inv_forecasts_error_cov[:, :, t], 1196 partials_forecasts_error[:, t, j]) 1197 ) 1198 return information_matrix / (self.nobs - self.ssm.loglikelihood_burn) 1199 1200 def opg_information_matrix(self, params, transformed=True, 1201 includes_fixed=False, approx_complex_step=None, 1202 **kwargs): 1203 """ 1204 Outer product of gradients information matrix 1205 1206 Parameters 1207 ---------- 1208 params : array_like, optional 1209 Array of parameters at which to evaluate the loglikelihood 1210 function. 1211 **kwargs 1212 Additional arguments to the `loglikeobs` method. 1213 1214 References 1215 ---------- 1216 Berndt, Ernst R., Bronwyn Hall, Robert Hall, and Jerry Hausman. 1974. 1217 Estimation and Inference in Nonlinear Structural Models. 1218 NBER Chapters. National Bureau of Economic Research, Inc. 1219 """ 1220 # We cannot use complex-step differentiation with non-transformed 1221 # parameters 1222 if approx_complex_step is None: 1223 approx_complex_step = transformed 1224 if not transformed and approx_complex_step: 1225 raise ValueError("Cannot use complex-step approximations to" 1226 " calculate the observed_information_matrix" 1227 " with untransformed parameters.") 1228 1229 score_obs = self.score_obs(params, transformed=transformed, 1230 includes_fixed=includes_fixed, 1231 approx_complex_step=approx_complex_step, 1232 **kwargs).transpose() 1233 return ( 1234 np.inner(score_obs, score_obs) / 1235 (self.nobs - self.ssm.loglikelihood_burn) 1236 ) 1237 1238 def _score_complex_step(self, params, **kwargs): 1239 # the default epsilon can be too small 1240 # inversion_method = INVERT_UNIVARIATE | SOLVE_LU 1241 epsilon = _get_epsilon(params, 2., None, len(params)) 1242 kwargs['transformed'] = True 1243 kwargs['complex_step'] = True 1244 return approx_fprime_cs(params, self.loglike, epsilon=epsilon, 1245 kwargs=kwargs) 1246 1247 def _score_finite_difference(self, params, approx_centered=False, 1248 **kwargs): 1249 kwargs['transformed'] = True 1250 return approx_fprime(params, self.loglike, kwargs=kwargs, 1251 centered=approx_centered) 1252 1253 def _score_harvey(self, params, approx_complex_step=True, **kwargs): 1254 score_obs = self._score_obs_harvey( 1255 params, approx_complex_step=approx_complex_step, **kwargs) 1256 return np.sum(score_obs, axis=0) 1257 1258 def _score_obs_harvey(self, params, approx_complex_step=True, 1259 approx_centered=False, includes_fixed=False, 1260 **kwargs): 1261 """ 1262 Score 1263 1264 Parameters 1265 ---------- 1266 params : array_like, optional 1267 Array of parameters at which to evaluate the loglikelihood 1268 function. 1269 **kwargs 1270 Additional keyword arguments to pass to the Kalman filter. See 1271 `KalmanFilter.filter` for more details. 1272 1273 Notes 1274 ----- 1275 This method is from Harvey (1989), section 3.4.5 1276 1277 References 1278 ---------- 1279 Harvey, Andrew C. 1990. 1280 Forecasting, Structural Time Series Models and the Kalman Filter. 1281 Cambridge University Press. 1282 """ 1283 params = np.array(params, ndmin=1) 1284 n = len(params) 1285 1286 # Get values at the params themselves 1287 self.update(params, transformed=True, includes_fixed=includes_fixed, 1288 complex_step=approx_complex_step) 1289 if approx_complex_step: 1290 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 1291 if 'transformed' in kwargs: 1292 del kwargs['transformed'] 1293 res = self.ssm.filter(complex_step=approx_complex_step, **kwargs) 1294 1295 # Get forecasts error partials 1296 partials_forecasts_error, partials_forecasts_error_cov = ( 1297 self._forecasts_error_partial_derivatives( 1298 params, transformed=True, includes_fixed=includes_fixed, 1299 approx_complex_step=approx_complex_step, 1300 approx_centered=approx_centered, res=res, **kwargs)) 1301 1302 # Compute partial derivatives w.r.t. likelihood function 1303 partials = np.zeros((self.nobs, n)) 1304 k_endog = self.k_endog 1305 for t in range(self.nobs): 1306 inv_forecasts_error_cov = np.linalg.inv( 1307 res.forecasts_error_cov[:, :, t]) 1308 1309 for i in range(n): 1310 partials[t, i] += np.trace(np.dot( 1311 np.dot(inv_forecasts_error_cov, 1312 partials_forecasts_error_cov[:, :, t, i]), 1313 (np.eye(k_endog) - 1314 np.dot(inv_forecasts_error_cov, 1315 np.outer(res.forecasts_error[:, t], 1316 res.forecasts_error[:, t]))))) 1317 # 2 * dv / di * F^{-1} v_t 1318 # where x = F^{-1} v_t or F x = v 1319 partials[t, i] += 2 * np.dot( 1320 partials_forecasts_error[:, t, i], 1321 np.dot(inv_forecasts_error_cov, res.forecasts_error[:, t])) 1322 1323 return -partials / 2. 1324 1325 _score_param_names = ['transformed', 'includes_fixed', 'score_method', 1326 'approx_complex_step', 'approx_centered'] 1327 _score_param_defaults = [True, False, 'approx', None, False] 1328 1329 def score(self, params, *args, **kwargs): 1330 """ 1331 Compute the score function at params. 1332 1333 Parameters 1334 ---------- 1335 params : array_like 1336 Array of parameters at which to evaluate the score. 1337 *args 1338 Additional positional arguments to the `loglike` method. 1339 **kwargs 1340 Additional keyword arguments to the `loglike` method. 1341 1342 Returns 1343 ------- 1344 score : ndarray 1345 Score, evaluated at `params`. 1346 1347 Notes 1348 ----- 1349 This is a numerical approximation, calculated using first-order complex 1350 step differentiation on the `loglike` method. 1351 1352 Both args and kwargs are necessary because the optimizer from 1353 `fit` must call this function and only supports passing arguments via 1354 args (for example `scipy.optimize.fmin_l_bfgs`). 1355 """ 1356 (transformed, includes_fixed, method, approx_complex_step, 1357 approx_centered, kwargs) = ( 1358 _handle_args(MLEModel._score_param_names, 1359 MLEModel._score_param_defaults, *args, **kwargs)) 1360 # For fit() calls, the method is called 'score_method' (to distinguish 1361 # it from the method used for fit) but generally in kwargs the method 1362 # will just be called 'method' 1363 if 'method' in kwargs: 1364 method = kwargs.pop('method') 1365 1366 if approx_complex_step is None: 1367 approx_complex_step = not self.ssm._complex_endog 1368 if approx_complex_step and self.ssm._complex_endog: 1369 raise ValueError('Cannot use complex step derivatives when data' 1370 ' or parameters are complex.') 1371 1372 out = self.handle_params( 1373 params, transformed=transformed, includes_fixed=includes_fixed, 1374 return_jacobian=not transformed) 1375 if transformed: 1376 params = out 1377 else: 1378 params, transform_score = out 1379 1380 if method == 'harvey': 1381 kwargs['includes_fixed'] = True 1382 score = self._score_harvey( 1383 params, approx_complex_step=approx_complex_step, **kwargs) 1384 elif method == 'approx' and approx_complex_step: 1385 kwargs['includes_fixed'] = True 1386 score = self._score_complex_step(params, **kwargs) 1387 elif method == 'approx': 1388 kwargs['includes_fixed'] = True 1389 score = self._score_finite_difference( 1390 params, approx_centered=approx_centered, **kwargs) 1391 else: 1392 raise NotImplementedError('Invalid score method.') 1393 1394 if not transformed: 1395 score = np.dot(transform_score, score) 1396 1397 if self._has_fixed_params and not includes_fixed: 1398 score = score[self._free_params_index] 1399 1400 return score 1401 1402 def score_obs(self, params, method='approx', transformed=True, 1403 includes_fixed=False, approx_complex_step=None, 1404 approx_centered=False, **kwargs): 1405 """ 1406 Compute the score per observation, evaluated at params 1407 1408 Parameters 1409 ---------- 1410 params : array_like 1411 Array of parameters at which to evaluate the score. 1412 **kwargs 1413 Additional arguments to the `loglike` method. 1414 1415 Returns 1416 ------- 1417 score : ndarray 1418 Score per observation, evaluated at `params`. 1419 1420 Notes 1421 ----- 1422 This is a numerical approximation, calculated using first-order complex 1423 step differentiation on the `loglikeobs` method. 1424 """ 1425 if not transformed and approx_complex_step: 1426 raise ValueError("Cannot use complex-step approximations to" 1427 " calculate the score at each observation" 1428 " with untransformed parameters.") 1429 1430 if approx_complex_step is None: 1431 approx_complex_step = not self.ssm._complex_endog 1432 if approx_complex_step and self.ssm._complex_endog: 1433 raise ValueError('Cannot use complex step derivatives when data' 1434 ' or parameters are complex.') 1435 1436 params = self.handle_params(params, transformed=True, 1437 includes_fixed=includes_fixed) 1438 kwargs['transformed'] = transformed 1439 kwargs['includes_fixed'] = True 1440 1441 if method == 'harvey': 1442 score = self._score_obs_harvey( 1443 params, approx_complex_step=approx_complex_step, **kwargs) 1444 elif method == 'approx' and approx_complex_step: 1445 # the default epsilon can be too small 1446 epsilon = _get_epsilon(params, 2., None, len(params)) 1447 kwargs['complex_step'] = True 1448 score = approx_fprime_cs(params, self.loglikeobs, epsilon=epsilon, 1449 kwargs=kwargs) 1450 elif method == 'approx': 1451 score = approx_fprime(params, self.loglikeobs, kwargs=kwargs, 1452 centered=approx_centered) 1453 else: 1454 raise NotImplementedError('Invalid scoreobs method.') 1455 1456 return score 1457 1458 _hessian_param_names = ['transformed', 'hessian_method', 1459 'approx_complex_step', 'approx_centered'] 1460 _hessian_param_defaults = [True, 'approx', None, False] 1461 1462 def hessian(self, params, *args, **kwargs): 1463 r""" 1464 Hessian matrix of the likelihood function, evaluated at the given 1465 parameters 1466 1467 Parameters 1468 ---------- 1469 params : array_like 1470 Array of parameters at which to evaluate the hessian. 1471 *args 1472 Additional positional arguments to the `loglike` method. 1473 **kwargs 1474 Additional keyword arguments to the `loglike` method. 1475 1476 Returns 1477 ------- 1478 hessian : ndarray 1479 Hessian matrix evaluated at `params` 1480 1481 Notes 1482 ----- 1483 This is a numerical approximation. 1484 1485 Both args and kwargs are necessary because the optimizer from 1486 `fit` must call this function and only supports passing arguments via 1487 args (for example `scipy.optimize.fmin_l_bfgs`). 1488 """ 1489 transformed, method, approx_complex_step, approx_centered, kwargs = ( 1490 _handle_args(MLEModel._hessian_param_names, 1491 MLEModel._hessian_param_defaults, 1492 *args, **kwargs)) 1493 # For fit() calls, the method is called 'hessian_method' (to 1494 # distinguish it from the method used for fit) but generally in kwargs 1495 # the method will just be called 'method' 1496 if 'method' in kwargs: 1497 method = kwargs.pop('method') 1498 1499 if not transformed and approx_complex_step: 1500 raise ValueError("Cannot use complex-step approximations to" 1501 " calculate the hessian with untransformed" 1502 " parameters.") 1503 1504 if approx_complex_step is None: 1505 approx_complex_step = not self.ssm._complex_endog 1506 if approx_complex_step and self.ssm._complex_endog: 1507 raise ValueError('Cannot use complex step derivatives when data' 1508 ' or parameters are complex.') 1509 1510 if method == 'oim': 1511 hessian = self._hessian_oim( 1512 params, transformed=transformed, 1513 approx_complex_step=approx_complex_step, 1514 approx_centered=approx_centered, **kwargs) 1515 elif method == 'opg': 1516 hessian = self._hessian_opg( 1517 params, transformed=transformed, 1518 approx_complex_step=approx_complex_step, 1519 approx_centered=approx_centered, **kwargs) 1520 elif method == 'approx' and approx_complex_step: 1521 hessian = self._hessian_complex_step( 1522 params, transformed=transformed, **kwargs) 1523 elif method == 'approx': 1524 hessian = self._hessian_finite_difference( 1525 params, transformed=transformed, 1526 approx_centered=approx_centered, **kwargs) 1527 else: 1528 raise NotImplementedError('Invalid Hessian calculation method.') 1529 return hessian 1530 1531 def _hessian_oim(self, params, **kwargs): 1532 """ 1533 Hessian matrix computed using the Harvey (1989) information matrix 1534 """ 1535 return -self.observed_information_matrix(params, **kwargs) 1536 1537 def _hessian_opg(self, params, **kwargs): 1538 """ 1539 Hessian matrix computed using the outer product of gradients 1540 information matrix 1541 """ 1542 return -self.opg_information_matrix(params, **kwargs) 1543 1544 def _hessian_finite_difference(self, params, approx_centered=False, 1545 **kwargs): 1546 params = np.array(params, ndmin=1) 1547 1548 warnings.warn('Calculation of the Hessian using finite differences' 1549 ' is usually subject to substantial approximation' 1550 ' errors.', PrecisionWarning) 1551 1552 if not approx_centered: 1553 epsilon = _get_epsilon(params, 3, None, len(params)) 1554 else: 1555 epsilon = _get_epsilon(params, 4, None, len(params)) / 2 1556 hessian = approx_fprime(params, self._score_finite_difference, 1557 epsilon=epsilon, kwargs=kwargs, 1558 centered=approx_centered) 1559 1560 return hessian / (self.nobs - self.ssm.loglikelihood_burn) 1561 1562 def _hessian_complex_step(self, params, **kwargs): 1563 """ 1564 Hessian matrix computed by second-order complex-step differentiation 1565 on the `loglike` function. 1566 """ 1567 # the default epsilon can be too small 1568 epsilon = _get_epsilon(params, 3., None, len(params)) 1569 kwargs['transformed'] = True 1570 kwargs['complex_step'] = True 1571 hessian = approx_hess_cs( 1572 params, self.loglike, epsilon=epsilon, kwargs=kwargs) 1573 1574 return hessian / (self.nobs - self.ssm.loglikelihood_burn) 1575 1576 @property 1577 def start_params(self): 1578 """ 1579 (array) Starting parameters for maximum likelihood estimation. 1580 """ 1581 if hasattr(self, '_start_params'): 1582 return self._start_params 1583 else: 1584 raise NotImplementedError 1585 1586 @property 1587 def param_names(self): 1588 """ 1589 (list of str) List of human readable parameter names (for parameters 1590 actually included in the model). 1591 """ 1592 if hasattr(self, '_param_names'): 1593 return self._param_names 1594 else: 1595 try: 1596 names = ['param.%d' % i for i in range(len(self.start_params))] 1597 except NotImplementedError: 1598 names = [] 1599 return names 1600 1601 @property 1602 def state_names(self): 1603 """ 1604 (list of str) List of human readable names for unobserved states. 1605 """ 1606 if hasattr(self, '_state_names'): 1607 return self._state_names 1608 else: 1609 names = ['state.%d' % i for i in range(self.k_states)] 1610 return names 1611 1612 def transform_jacobian(self, unconstrained, approx_centered=False): 1613 """ 1614 Jacobian matrix for the parameter transformation function 1615 1616 Parameters 1617 ---------- 1618 unconstrained : array_like 1619 Array of unconstrained parameters used by the optimizer. 1620 1621 Returns 1622 ------- 1623 jacobian : ndarray 1624 Jacobian matrix of the transformation, evaluated at `unconstrained` 1625 1626 See Also 1627 -------- 1628 transform_params 1629 1630 Notes 1631 ----- 1632 This is a numerical approximation using finite differences. Note that 1633 in general complex step methods cannot be used because it is not 1634 guaranteed that the `transform_params` method is a real function (e.g. 1635 if Cholesky decomposition is used). 1636 """ 1637 return approx_fprime(unconstrained, self.transform_params, 1638 centered=approx_centered) 1639 1640 def transform_params(self, unconstrained): 1641 """ 1642 Transform unconstrained parameters used by the optimizer to constrained 1643 parameters used in likelihood evaluation 1644 1645 Parameters 1646 ---------- 1647 unconstrained : array_like 1648 Array of unconstrained parameters used by the optimizer, to be 1649 transformed. 1650 1651 Returns 1652 ------- 1653 constrained : array_like 1654 Array of constrained parameters which may be used in likelihood 1655 evaluation. 1656 1657 Notes 1658 ----- 1659 This is a noop in the base class, subclasses should override where 1660 appropriate. 1661 """ 1662 return np.array(unconstrained, ndmin=1) 1663 1664 def untransform_params(self, constrained): 1665 """ 1666 Transform constrained parameters used in likelihood evaluation 1667 to unconstrained parameters used by the optimizer 1668 1669 Parameters 1670 ---------- 1671 constrained : array_like 1672 Array of constrained parameters used in likelihood evaluation, to 1673 be transformed. 1674 1675 Returns 1676 ------- 1677 unconstrained : array_like 1678 Array of unconstrained parameters used by the optimizer. 1679 1680 Notes 1681 ----- 1682 This is a noop in the base class, subclasses should override where 1683 appropriate. 1684 """ 1685 return np.array(constrained, ndmin=1) 1686 1687 def handle_params(self, params, transformed=True, includes_fixed=False, 1688 return_jacobian=False): 1689 """ 1690 Ensure model parameters satisfy shape and other requirements 1691 """ 1692 params = np.array(params, ndmin=1) 1693 1694 # Never want integer dtype, so convert to floats 1695 if np.issubdtype(params.dtype, np.integer): 1696 params = params.astype(np.float64) 1697 1698 if not includes_fixed and self._has_fixed_params: 1699 k_params = len(self.param_names) 1700 new_params = np.zeros(k_params, dtype=params.dtype) * np.nan 1701 new_params[self._free_params_index] = params 1702 params = new_params 1703 1704 if not transformed: 1705 # It may be the case that the transformation relies on having 1706 # "some" (non-NaN) values for the fixed parameters, even if we will 1707 # not actually be transforming the fixed parameters (as they will) 1708 # be set below regardless 1709 if not includes_fixed and self._has_fixed_params: 1710 params[self._fixed_params_index] = ( 1711 list(self._fixed_params.values())) 1712 1713 if return_jacobian: 1714 transform_score = self.transform_jacobian(params) 1715 params = self.transform_params(params) 1716 1717 if not includes_fixed and self._has_fixed_params: 1718 params[self._fixed_params_index] = ( 1719 list(self._fixed_params.values())) 1720 1721 return (params, transform_score) if return_jacobian else params 1722 1723 def update(self, params, transformed=True, includes_fixed=False, 1724 complex_step=False): 1725 """ 1726 Update the parameters of the model 1727 1728 Parameters 1729 ---------- 1730 params : array_like 1731 Array of new parameters. 1732 transformed : bool, optional 1733 Whether or not `params` is already transformed. If set to False, 1734 `transform_params` is called. Default is True. 1735 1736 Returns 1737 ------- 1738 params : array_like 1739 Array of parameters. 1740 1741 Notes 1742 ----- 1743 Since Model is a base class, this method should be overridden by 1744 subclasses to perform actual updating steps. 1745 """ 1746 return self.handle_params(params=params, transformed=transformed, 1747 includes_fixed=includes_fixed) 1748 1749 def _validate_out_of_sample_exog(self, exog, out_of_sample): 1750 """ 1751 Validate given `exog` as satisfactory for out-of-sample operations 1752 1753 Parameters 1754 ---------- 1755 exog : array_like or None 1756 New observations of exogenous regressors, if applicable. 1757 out_of_sample : int 1758 Number of new observations required. 1759 1760 Returns 1761 ------- 1762 exog : array or None 1763 A numpy array of shape (out_of_sample, k_exog) if the model 1764 contains an `exog` component, or None if it does not. 1765 """ 1766 if out_of_sample and self.k_exog > 0: 1767 if exog is None: 1768 raise ValueError('Out-of-sample operations in a model' 1769 ' with a regression component require' 1770 ' additional exogenous values via the' 1771 ' `exog` argument.') 1772 exog = np.array(exog) 1773 required_exog_shape = (out_of_sample, self.k_exog) 1774 try: 1775 exog = exog.reshape(required_exog_shape) 1776 except ValueError: 1777 raise ValueError('Provided exogenous values are not of the' 1778 ' appropriate shape. Required %s, got %s.' 1779 % (str(required_exog_shape), 1780 str(exog.shape))) 1781 elif self.k_exog > 0 and exog is not None: 1782 exog = None 1783 warnings.warn('Exogenous array provided, but additional data' 1784 ' is not required. `exog` argument ignored.', 1785 ValueWarning) 1786 1787 return exog 1788 1789 def _get_extension_time_varying_matrices( 1790 self, params, exog, out_of_sample, extend_kwargs=None, 1791 transformed=True, includes_fixed=False, **kwargs): 1792 """ 1793 Get updated time-varying state space system matrices 1794 1795 Parameters 1796 ---------- 1797 params : array_like 1798 Array of parameters used to construct the time-varying system 1799 matrices. 1800 exog : array_like or None 1801 New observations of exogenous regressors, if applicable. 1802 out_of_sample : int 1803 Number of new observations required. 1804 extend_kwargs : dict, optional 1805 Dictionary of keyword arguments to pass to the state space model 1806 constructor. For example, for an SARIMAX state space model, this 1807 could be used to pass the `concentrate_scale=True` keyword 1808 argument. Any arguments that are not explicitly set in this 1809 dictionary will be copied from the current model instance. 1810 transformed : bool, optional 1811 Whether or not `start_params` is already transformed. Default is 1812 True. 1813 includes_fixed : bool, optional 1814 If parameters were previously fixed with the `fix_params` method, 1815 this argument describes whether or not `start_params` also includes 1816 the fixed parameters, in addition to the free parameters. Default 1817 is False. 1818 """ 1819 # Get the appropriate exog for the extended sample 1820 exog = self._validate_out_of_sample_exog(exog, out_of_sample) 1821 1822 # Create extended model 1823 if extend_kwargs is None: 1824 extend_kwargs = {} 1825 1826 # Handle trend offset for extended model 1827 if getattr(self, 'k_trend', 0) > 0 and hasattr(self, 'trend_offset'): 1828 extend_kwargs.setdefault( 1829 'trend_offset', self.trend_offset + self.nobs) 1830 1831 mod_extend = self.clone( 1832 endog=np.zeros((out_of_sample, self.k_endog)), exog=exog, 1833 **extend_kwargs) 1834 mod_extend.update(params, transformed=transformed, 1835 includes_fixed=includes_fixed) 1836 1837 # Retrieve the extensions to the time-varying system matrices and 1838 # put them in kwargs 1839 for name in self.ssm.shapes.keys(): 1840 if name == 'obs' or name in kwargs: 1841 continue 1842 original = getattr(self.ssm, name) 1843 extended = getattr(mod_extend.ssm, name) 1844 so = original.shape[-1] 1845 se = extended.shape[-1] 1846 if ((so > 1 or se > 1) or ( 1847 so == 1 and self.nobs == 1 and 1848 np.any(original[..., 0] != extended[..., 0]))): 1849 kwargs[name] = extended[..., -out_of_sample:] 1850 1851 return kwargs 1852 1853 def simulate(self, params, nsimulations, measurement_shocks=None, 1854 state_shocks=None, initial_state=None, anchor=None, 1855 repetitions=None, exog=None, extend_model=None, 1856 extend_kwargs=None, transformed=True, includes_fixed=False, 1857 **kwargs): 1858 r""" 1859 Simulate a new time series following the state space model 1860 1861 Parameters 1862 ---------- 1863 params : array_like 1864 Array of parameters to use in constructing the state space 1865 representation to use when simulating. 1866 nsimulations : int 1867 The number of observations to simulate. If the model is 1868 time-invariant this can be any number. If the model is 1869 time-varying, then this number must be less than or equal to the 1870 number of observations. 1871 measurement_shocks : array_like, optional 1872 If specified, these are the shocks to the measurement equation, 1873 :math:`\varepsilon_t`. If unspecified, these are automatically 1874 generated using a pseudo-random number generator. If specified, 1875 must be shaped `nsimulations` x `k_endog`, where `k_endog` is the 1876 same as in the state space model. 1877 state_shocks : array_like, optional 1878 If specified, these are the shocks to the state equation, 1879 :math:`\eta_t`. If unspecified, these are automatically 1880 generated using a pseudo-random number generator. If specified, 1881 must be shaped `nsimulations` x `k_posdef` where `k_posdef` is the 1882 same as in the state space model. 1883 initial_state : array_like, optional 1884 If specified, this is the initial state vector to use in 1885 simulation, which should be shaped (`k_states` x 1), where 1886 `k_states` is the same as in the state space model. If unspecified, 1887 but the model has been initialized, then that initialization is 1888 used. This must be specified if `anchor` is anything other than 1889 "start" or 0 (or else you can use the `simulate` method on a 1890 results object rather than on the model object). 1891 anchor : int, str, or datetime, optional 1892 First period for simulation. The simulation will be conditional on 1893 all existing datapoints prior to the `anchor`. Type depends on the 1894 index of the given `endog` in the model. Two special cases are the 1895 strings 'start' and 'end'. `start` refers to beginning the 1896 simulation at the first period of the sample, and `end` refers to 1897 beginning the simulation at the first period after the sample. 1898 Integer values can run from 0 to `nobs`, or can be negative to 1899 apply negative indexing. Finally, if a date/time index was provided 1900 to the model, then this argument can be a date string to parse or a 1901 datetime type. Default is 'start'. 1902 repetitions : int, optional 1903 Number of simulated paths to generate. Default is 1 simulated path. 1904 exog : array_like, optional 1905 New observations of exogenous regressors, if applicable. 1906 transformed : bool, optional 1907 Whether or not `params` is already transformed. Default is 1908 True. 1909 includes_fixed : bool, optional 1910 If parameters were previously fixed with the `fix_params` method, 1911 this argument describes whether or not `params` also includes 1912 the fixed parameters, in addition to the free parameters. Default 1913 is False. 1914 1915 Returns 1916 ------- 1917 simulated_obs : ndarray 1918 An array of simulated observations. If `repetitions=None`, then it 1919 will be shaped (nsimulations x k_endog) or (nsimulations,) if 1920 `k_endog=1`. Otherwise it will be shaped 1921 (nsimulations x k_endog x repetitions). If the model was given 1922 Pandas input then the output will be a Pandas object. If 1923 `k_endog > 1` and `repetitions` is not None, then the output will 1924 be a Pandas DataFrame that has a MultiIndex for the columns, with 1925 the first level containing the names of the `endog` variables and 1926 the second level containing the repetition number. 1927 """ 1928 # Make sure the model class has the current parameters 1929 self.update(params, transformed=transformed, 1930 includes_fixed=includes_fixed) 1931 1932 # Get the starting location 1933 if anchor is None or anchor == 'start': 1934 iloc = 0 1935 elif anchor == 'end': 1936 iloc = self.nobs 1937 else: 1938 iloc, _, _ = self._get_index_loc(anchor) 1939 if isinstance(iloc, slice): 1940 iloc = iloc.start 1941 1942 if iloc < 0: 1943 iloc = self.nobs + iloc 1944 if iloc > self.nobs: 1945 raise ValueError('Cannot anchor simulation outside of the sample.') 1946 1947 if iloc > 0 and initial_state is None: 1948 raise ValueError('If `anchor` is after the start of the sample,' 1949 ' must provide a value for `initial_state`.') 1950 1951 # Get updated time-varying system matrices in **kwargs, if necessary 1952 out_of_sample = max(iloc + nsimulations - self.nobs, 0) 1953 if extend_model is None: 1954 extend_model = self.exog is not None or not self.ssm.time_invariant 1955 if out_of_sample and extend_model: 1956 kwargs = self._get_extension_time_varying_matrices( 1957 params, exog, out_of_sample, extend_kwargs, 1958 transformed=transformed, includes_fixed=includes_fixed, 1959 **kwargs) 1960 1961 # Standardize the dimensions of the initial state 1962 if initial_state is not None: 1963 initial_state = np.array(initial_state) 1964 if initial_state.ndim < 2: 1965 initial_state = np.atleast_2d(initial_state).T 1966 1967 # Construct a model that represents the simulation period 1968 end = min(self.nobs, iloc + nsimulations) 1969 nextend = iloc + nsimulations - end 1970 sim_model = self.ssm.extend(np.empty((nextend, self.k_endog)), 1971 start=iloc, end=end, **kwargs) 1972 1973 # Simulate the data 1974 _repetitions = 1 if repetitions is None else repetitions 1975 sim = np.zeros((nsimulations, self.k_endog, _repetitions)) 1976 1977 for i in range(_repetitions): 1978 initial_state_variates = None 1979 if initial_state is not None: 1980 if initial_state.shape[1] == 1: 1981 initial_state_variates = initial_state[:, 0] 1982 else: 1983 initial_state_variates = initial_state[:, i] 1984 1985 # TODO: allow specifying measurement / state shocks for each 1986 # repetition? 1987 1988 out, _ = sim_model.simulate( 1989 nsimulations, measurement_shocks, state_shocks, 1990 initial_state_variates) 1991 1992 sim[:, :, i] = out 1993 1994 # Wrap data / squeeze where appropriate 1995 use_pandas = isinstance(self.data, PandasData) 1996 index = None 1997 if use_pandas: 1998 _, _, _, index = self._get_prediction_index( 1999 iloc, iloc + nsimulations - 1) 2000 # If `repetitions` isn't set, we squeeze the last dimension(s) 2001 if repetitions is None: 2002 if self.k_endog == 1: 2003 sim = sim[:, 0, 0] 2004 if use_pandas: 2005 sim = pd.Series(sim, index=index, name=self.endog_names) 2006 else: 2007 sim = sim[:, :, 0] 2008 if use_pandas: 2009 sim = pd.DataFrame(sim, index=index, 2010 columns=self.endog_names) 2011 elif use_pandas: 2012 shape = sim.shape 2013 endog_names = self.endog_names 2014 if not isinstance(endog_names, list): 2015 endog_names = [endog_names] 2016 columns = pd.MultiIndex.from_product([endog_names, 2017 np.arange(shape[2])]) 2018 sim = pd.DataFrame(sim.reshape(shape[0], shape[1] * shape[2]), 2019 index=index, columns=columns) 2020 2021 return sim 2022 2023 def impulse_responses(self, params, steps=1, impulse=0, 2024 orthogonalized=False, cumulative=False, anchor=None, 2025 exog=None, extend_model=None, extend_kwargs=None, 2026 transformed=True, includes_fixed=False, **kwargs): 2027 """ 2028 Impulse response function 2029 2030 Parameters 2031 ---------- 2032 params : array_like 2033 Array of model parameters. 2034 steps : int, optional 2035 The number of steps for which impulse responses are calculated. 2036 Default is 1. Note that for time-invariant models, the initial 2037 impulse is not counted as a step, so if `steps=1`, the output will 2038 have 2 entries. 2039 impulse : int, str or array_like 2040 If an integer, the state innovation to pulse; must be between 0 2041 and `k_posdef-1`. If a str, it indicates which column of df 2042 the unit (1) impulse is given. 2043 Alternatively, a custom impulse vector may be provided; must be 2044 shaped `k_posdef x 1`. 2045 orthogonalized : bool, optional 2046 Whether or not to perform impulse using orthogonalized innovations. 2047 Note that this will also affect custum `impulse` vectors. Default 2048 is False. 2049 cumulative : bool, optional 2050 Whether or not to return cumulative impulse responses. Default is 2051 False. 2052 anchor : int, str, or datetime, optional 2053 Time point within the sample for the state innovation impulse. Type 2054 depends on the index of the given `endog` in the model. Two special 2055 cases are the strings 'start' and 'end', which refer to setting the 2056 impulse at the first and last points of the sample, respectively. 2057 Integer values can run from 0 to `nobs - 1`, or can be negative to 2058 apply negative indexing. Finally, if a date/time index was provided 2059 to the model, then this argument can be a date string to parse or a 2060 datetime type. Default is 'start'. 2061 exog : array_like, optional 2062 New observations of exogenous regressors for our-of-sample periods, 2063 if applicable. 2064 transformed : bool, optional 2065 Whether or not `params` is already transformed. Default is 2066 True. 2067 includes_fixed : bool, optional 2068 If parameters were previously fixed with the `fix_params` method, 2069 this argument describes whether or not `params` also includes 2070 the fixed parameters, in addition to the free parameters. Default 2071 is False. 2072 **kwargs 2073 If the model has time-varying design or transition matrices and the 2074 combination of `anchor` and `steps` implies creating impulse 2075 responses for the out-of-sample period, then these matrices must 2076 have updated values provided for the out-of-sample steps. For 2077 example, if `design` is a time-varying component, `nobs` is 10, 2078 `anchor=1`, and `steps` is 15, a (`k_endog` x `k_states` x 7) 2079 matrix must be provided with the new design matrix values. 2080 2081 Returns 2082 ------- 2083 impulse_responses : ndarray 2084 Responses for each endogenous variable due to the impulse 2085 given by the `impulse` argument. For a time-invariant model, the 2086 impulse responses are given for `steps + 1` elements (this gives 2087 the "initial impulse" followed by `steps` responses for the 2088 important cases of VAR and SARIMAX models), while for time-varying 2089 models the impulse responses are only given for `steps` elements 2090 (to avoid having to unexpectedly provide updated time-varying 2091 matrices). 2092 2093 Notes 2094 ----- 2095 Intercepts in the measurement and state equation are ignored when 2096 calculating impulse responses. 2097 2098 TODO: add an option to allow changing the ordering for the 2099 orthogonalized option. Will require permuting matrices when 2100 constructing the extended model. 2101 """ 2102 # Make sure the model class has the current parameters 2103 self.update(params, transformed=transformed, 2104 includes_fixed=includes_fixed) 2105 2106 # For time-invariant models, add an additional `step`. This is the 2107 # default for time-invariant models based on the expected behavior for 2108 # ARIMA and VAR models: we want to record the initial impulse and also 2109 # `steps` values of the responses afterwards. 2110 # Note: we don't modify `steps` itself, because 2111 # `KalmanFilter.impulse_responses` also adds an additional step in this 2112 # case (this is so that there isn't different behavior when calling 2113 # this method versus that method). We just need to also keep track of 2114 # this here because we need to generate the correct extended model. 2115 additional_steps = 0 2116 if (self.ssm._design.shape[2] == 1 and 2117 self.ssm._transition.shape[2] == 1 and 2118 self.ssm._selection.shape[2] == 1): 2119 additional_steps = 1 2120 2121 # Get the starting location 2122 if anchor is None or anchor == 'start': 2123 iloc = 0 2124 elif anchor == 'end': 2125 iloc = self.nobs - 1 2126 else: 2127 iloc, _, _ = self._get_index_loc(anchor) 2128 if isinstance(iloc, slice): 2129 iloc = iloc.start 2130 2131 if iloc < 0: 2132 iloc = self.nobs + iloc 2133 if iloc >= self.nobs: 2134 raise ValueError('Cannot anchor impulse responses outside of the' 2135 ' sample.') 2136 2137 time_invariant = ( 2138 self.ssm._design.shape[2] == self.ssm._obs_cov.shape[2] == 2139 self.ssm._transition.shape[2] == self.ssm._selection.shape[2] == 2140 self.ssm._state_cov.shape[2] == 1) 2141 2142 # Get updated time-varying system matrices in **kwargs, if necessary 2143 # (Note: KalmanFilter adds 1 to steps to account for the first impulse) 2144 out_of_sample = max( 2145 iloc + (steps + additional_steps + 1) - self.nobs, 0) 2146 if extend_model is None: 2147 extend_model = self.exog is not None and not time_invariant 2148 if out_of_sample and extend_model: 2149 kwargs = self._get_extension_time_varying_matrices( 2150 params, exog, out_of_sample, extend_kwargs, 2151 transformed=transformed, includes_fixed=includes_fixed, 2152 **kwargs) 2153 2154 # Special handling for matrix terms that are time-varying but 2155 # irrelevant for impulse response functions. Must be set since 2156 # ssm.extend() requires that we pass new matrices for these, but they 2157 # are ignored for IRF purposes. 2158 end = min(self.nobs, iloc + steps + additional_steps) 2159 nextend = iloc + (steps + additional_steps + 1) - end 2160 if ('obs_intercept' not in kwargs and 2161 self.ssm._obs_intercept.shape[1] > 1): 2162 kwargs['obs_intercept'] = np.zeros((self.k_endog, nextend)) 2163 if ('state_intercept' not in kwargs and 2164 self.ssm._state_intercept.shape[1] > 1): 2165 kwargs['state_intercept'] = np.zeros((self.k_states, nextend)) 2166 if 'obs_cov' not in kwargs and self.ssm._obs_cov.shape[2] > 1: 2167 kwargs['obs_cov'] = np.zeros((self.k_endog, self.k_endog, nextend)) 2168 # Special handling for matrix terms that are time-varying but 2169 # only the value at the anchor matters for IRF purposes. 2170 if 'state_cov' not in kwargs and self.ssm._state_cov.shape[2] > 1: 2171 tmp = np.zeros((self.ssm.k_posdef, self.ssm.k_posdef, nextend)) 2172 tmp[:] = self['state_cov', :, :, iloc:iloc + 1] 2173 kwargs['state_cov'] = tmp 2174 if 'selection' not in kwargs and self.ssm._selection.shape[2] > 1: 2175 tmp = np.zeros((self.k_states, self.ssm.k_posdef, nextend)) 2176 tmp[:] = self['selection', :, :, iloc:iloc + 1] 2177 kwargs['selection'] = tmp 2178 2179 # Construct a model that represents the simulation period 2180 sim_model = self.ssm.extend(np.empty((nextend, self.k_endog)), 2181 start=iloc, end=end, **kwargs) 2182 2183 # Compute the impulse responses 2184 2185 # Convert endog name to index 2186 use_pandas = isinstance(self.data, PandasData) 2187 if type(impulse) == str: 2188 if not use_pandas: 2189 raise ValueError('Endog must be pd.DataFrame.') 2190 impulse = self.endog_names.index(impulse) 2191 2192 irfs = sim_model.impulse_responses( 2193 steps, impulse, orthogonalized, cumulative) 2194 2195 # IRF is (nobs x k_endog); do not want to squeeze in case of steps = 1 2196 if irfs.shape[1] == 1: 2197 irfs = irfs[:, 0] 2198 2199 # Wrap data / squeeze where appropriate 2200 if use_pandas: 2201 if self.k_endog == 1: 2202 irfs = pd.Series(irfs, name=self.endog_names) 2203 else: 2204 irfs = pd.DataFrame(irfs, columns=self.endog_names) 2205 return irfs 2206 2207 @classmethod 2208 def from_formula(cls, formula, data, subset=None): 2209 """ 2210 Not implemented for state space models 2211 """ 2212 raise NotImplementedError 2213 2214 2215class MLEResults(tsbase.TimeSeriesModelResults): 2216 r""" 2217 Class to hold results from fitting a state space model. 2218 2219 Parameters 2220 ---------- 2221 model : MLEModel instance 2222 The fitted model instance 2223 params : ndarray 2224 Fitted parameters 2225 filter_results : KalmanFilter instance 2226 The underlying state space model and Kalman filter output 2227 2228 Attributes 2229 ---------- 2230 model : Model instance 2231 A reference to the model that was fit. 2232 filter_results : KalmanFilter instance 2233 The underlying state space model and Kalman filter output 2234 nobs : float 2235 The number of observations used to fit the model. 2236 params : ndarray 2237 The parameters of the model. 2238 scale : float 2239 This is currently set to 1.0 unless the model uses concentrated 2240 filtering. 2241 2242 See Also 2243 -------- 2244 MLEModel 2245 statsmodels.tsa.statespace.kalman_filter.FilterResults 2246 statsmodels.tsa.statespace.representation.FrozenRepresentation 2247 """ 2248 def __init__(self, model, params, results, cov_type=None, cov_kwds=None, 2249 **kwargs): 2250 self.data = model.data 2251 scale = results.scale 2252 2253 tsbase.TimeSeriesModelResults.__init__(self, model, params, 2254 normalized_cov_params=None, 2255 scale=scale) 2256 2257 # Save the fixed parameters 2258 self._has_fixed_params = self.model._has_fixed_params 2259 self._fixed_params_index = self.model._fixed_params_index 2260 self._free_params_index = self.model._free_params_index 2261 # TODO: seems like maybe self.fixed_params should be the dictionary 2262 # itself, not just the keys? 2263 if self._has_fixed_params: 2264 self._fixed_params = self.model._fixed_params.copy() 2265 self.fixed_params = list(self._fixed_params.keys()) 2266 else: 2267 self._fixed_params = None 2268 self.fixed_params = [] 2269 self.param_names = [ 2270 '%s (fixed)' % name if name in self.fixed_params else name 2271 for name in (self.data.param_names or [])] 2272 2273 # Save the state space representation output 2274 self.filter_results = results 2275 if isinstance(results, SmootherResults): 2276 self.smoother_results = results 2277 else: 2278 self.smoother_results = None 2279 2280 # Dimensions 2281 self.nobs = self.filter_results.nobs 2282 self.nobs_diffuse = self.filter_results.nobs_diffuse 2283 if self.nobs_diffuse > 0 and self.loglikelihood_burn > 0: 2284 warnings.warn('Care should be used when applying a loglikelihood' 2285 ' burn to a model with exact diffuse initialization.' 2286 ' Some results objects, e.g. degrees of freedom,' 2287 ' expect only one of the two to be set.') 2288 # This only excludes explicitly burned (usually approximate diffuse) 2289 # periods but does not exclude exact diffuse periods. This is 2290 # because the loglikelihood remains valid for the initial periods in 2291 # the exact diffuse case (see DK, 2012, section 7.2) and so also do 2292 # e.g. information criteria (see DK, 2012, section 7.4) and the score 2293 # vector (see DK, 2012, section 7.3.3, equation 7.15). 2294 # However, other objects should be excluded in the diffuse periods 2295 # (e.g. the diffuse forecast errors, so in some cases a different 2296 # nobs_effective will have to be computed and used) 2297 self.nobs_effective = self.nobs - self.loglikelihood_burn 2298 2299 P = self.filter_results.initial_diffuse_state_cov 2300 self.k_diffuse_states = 0 if P is None else np.sum(np.diagonal(P) == 1) 2301 2302 # Degrees of freedom (see DK 2012, section 7.4) 2303 k_free_params = self.params.size - len(self.fixed_params) 2304 self.df_model = (k_free_params + self.k_diffuse_states 2305 + self.filter_results.filter_concentrated) 2306 self.df_resid = self.nobs_effective - self.df_model 2307 2308 # Setup covariance matrix notes dictionary 2309 if not hasattr(self, 'cov_kwds'): 2310 self.cov_kwds = {} 2311 if cov_type is None: 2312 cov_type = 'approx' if results.memory_no_likelihood else 'opg' 2313 self.cov_type = cov_type 2314 2315 # Setup the cache 2316 self._cache = {} 2317 2318 # Handle covariance matrix calculation 2319 if cov_kwds is None: 2320 cov_kwds = {} 2321 self._cov_approx_complex_step = ( 2322 cov_kwds.pop('approx_complex_step', True)) 2323 self._cov_approx_centered = cov_kwds.pop('approx_centered', False) 2324 try: 2325 self._rank = None 2326 self._get_robustcov_results(cov_type=cov_type, use_self=True, 2327 **cov_kwds) 2328 except np.linalg.LinAlgError: 2329 self._rank = 0 2330 k_params = len(self.params) 2331 self.cov_params_default = np.zeros((k_params, k_params)) * np.nan 2332 self.cov_kwds['cov_type'] = ( 2333 'Covariance matrix could not be calculated: singular.' 2334 ' information matrix.') 2335 self.model.update(self.params, transformed=True, includes_fixed=True) 2336 2337 # References of filter and smoother output 2338 extra_arrays = [ 2339 'filtered_state', 'filtered_state_cov', 'predicted_state', 2340 'predicted_state_cov', 'forecasts', 'forecasts_error', 2341 'forecasts_error_cov', 'standardized_forecasts_error', 2342 'forecasts_error_diffuse_cov', 'predicted_diffuse_state_cov', 2343 'scaled_smoothed_estimator', 2344 'scaled_smoothed_estimator_cov', 'smoothing_error', 2345 'smoothed_state', 2346 'smoothed_state_cov', 'smoothed_state_autocov', 2347 'smoothed_measurement_disturbance', 2348 'smoothed_state_disturbance', 2349 'smoothed_measurement_disturbance_cov', 2350 'smoothed_state_disturbance_cov'] 2351 for name in extra_arrays: 2352 setattr(self, name, getattr(self.filter_results, name, None)) 2353 2354 # Remove too-short results when memory conservation was used 2355 if self.filter_results.memory_no_forecast_mean: 2356 self.forecasts = None 2357 self.forecasts_error = None 2358 if self.filter_results.memory_no_forecast_cov: 2359 self.forecasts_error_cov = None 2360 if self.filter_results.memory_no_predicted_mean: 2361 self.predicted_state = None 2362 if self.filter_results.memory_no_predicted_cov: 2363 self.predicted_state_cov = None 2364 if self.filter_results.memory_no_filtered_mean: 2365 self.filtered_state = None 2366 if self.filter_results.memory_no_filtered_cov: 2367 self.filtered_state_cov = None 2368 if self.filter_results.memory_no_gain: 2369 pass 2370 if self.filter_results.memory_no_smoothing: 2371 pass 2372 if self.filter_results.memory_no_std_forecast: 2373 self.standardized_forecasts_error = None 2374 2375 # Save more convenient access to states 2376 # (will create a private attribute _states here and provide actual 2377 # access via a getter, so that we can e.g. issue a warning in the case 2378 # that a useless Pandas index was given in the model specification) 2379 self._states = SimpleNamespace() 2380 2381 use_pandas = isinstance(self.data, PandasData) 2382 index = self.model._index 2383 columns = self.model.state_names 2384 2385 # Predicted states 2386 # Note: a complication here is that we also include the initial values 2387 # here, so that we need an extended index in the Pandas case 2388 if (self.predicted_state is None or 2389 self.filter_results.memory_no_predicted_mean): 2390 self._states.predicted = None 2391 elif use_pandas: 2392 extended_index = self.model._get_index_with_final_state() 2393 self._states.predicted = pd.DataFrame( 2394 self.predicted_state.T, index=extended_index, columns=columns) 2395 else: 2396 self._states.predicted = self.predicted_state.T 2397 if (self.predicted_state_cov is None or 2398 self.filter_results.memory_no_predicted_cov): 2399 self._states.predicted_cov = None 2400 elif use_pandas: 2401 extended_index = self.model._get_index_with_final_state() 2402 tmp = np.transpose(self.predicted_state_cov, (2, 0, 1)) 2403 self._states.predicted_cov = pd.DataFrame( 2404 np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])), 2405 index=pd.MultiIndex.from_product( 2406 [extended_index, columns]).swaplevel(), 2407 columns=columns) 2408 else: 2409 self._states.predicted_cov = np.transpose( 2410 self.predicted_state_cov, (2, 0, 1)) 2411 2412 # Filtered states 2413 if (self.filtered_state is None or 2414 self.filter_results.memory_no_filtered_mean): 2415 self._states.filtered = None 2416 elif use_pandas: 2417 self._states.filtered = pd.DataFrame( 2418 self.filtered_state.T, index=index, columns=columns) 2419 else: 2420 self._states.filtered = self.filtered_state.T 2421 if (self.filtered_state_cov is None or 2422 self.filter_results.memory_no_filtered_cov): 2423 self._states.filtered_cov = None 2424 elif use_pandas: 2425 tmp = np.transpose(self.filtered_state_cov, (2, 0, 1)) 2426 self._states.filtered_cov = pd.DataFrame( 2427 np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])), 2428 index=pd.MultiIndex.from_product([index, columns]).swaplevel(), 2429 columns=columns) 2430 else: 2431 self._states.filtered_cov = np.transpose( 2432 self.filtered_state_cov, (2, 0, 1)) 2433 2434 # Smoothed states 2435 if self.smoothed_state is None: 2436 self._states.smoothed = None 2437 elif use_pandas: 2438 self._states.smoothed = pd.DataFrame( 2439 self.smoothed_state.T, index=index, columns=columns) 2440 else: 2441 self._states.smoothed = self.smoothed_state.T 2442 if self.smoothed_state_cov is None: 2443 self._states.smoothed_cov = None 2444 elif use_pandas: 2445 tmp = np.transpose(self.smoothed_state_cov, (2, 0, 1)) 2446 self._states.smoothed_cov = pd.DataFrame( 2447 np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])), 2448 index=pd.MultiIndex.from_product([index, columns]).swaplevel(), 2449 columns=columns) 2450 else: 2451 self._states.smoothed_cov = np.transpose( 2452 self.smoothed_state_cov, (2, 0, 1)) 2453 2454 # Handle removing data 2455 self._data_attr_model = getattr(self, '_data_attr_model', []) 2456 self._data_attr_model.extend(['ssm']) 2457 self._data_attr.extend(extra_arrays) 2458 self._data_attr.extend(['filter_results', 'smoother_results']) 2459 2460 def _get_robustcov_results(self, cov_type='opg', **kwargs): 2461 """ 2462 Create new results instance with specified covariance estimator as 2463 default 2464 2465 Note: creating new results instance currently not supported. 2466 2467 Parameters 2468 ---------- 2469 cov_type : str 2470 the type of covariance matrix estimator to use. See Notes below 2471 kwargs : depends on cov_type 2472 Required or optional arguments for covariance calculation. 2473 See Notes below. 2474 2475 Returns 2476 ------- 2477 results : results instance 2478 This method creates a new results instance with the requested 2479 covariance as the default covariance of the parameters. 2480 Inferential statistics like p-values and hypothesis tests will be 2481 based on this covariance matrix. 2482 2483 Notes 2484 ----- 2485 The following covariance types and required or optional arguments are 2486 currently available: 2487 2488 - 'opg' for the outer product of gradient estimator 2489 - 'oim' for the observed information matrix estimator, calculated 2490 using the method of Harvey (1989) 2491 - 'approx' for the observed information matrix estimator, 2492 calculated using a numerical approximation of the Hessian matrix. 2493 Uses complex step approximation by default, or uses finite 2494 differences if `approx_complex_step=False` in the `cov_kwds` 2495 dictionary. 2496 - 'robust' for an approximate (quasi-maximum likelihood) covariance 2497 matrix that may be valid even in the presence of some 2498 misspecifications. Intermediate calculations use the 'oim' 2499 method. 2500 - 'robust_approx' is the same as 'robust' except that the 2501 intermediate calculations use the 'approx' method. 2502 - 'none' for no covariance matrix calculation. 2503 """ 2504 from statsmodels.base.covtype import descriptions 2505 2506 use_self = kwargs.pop('use_self', False) 2507 if use_self: 2508 res = self 2509 else: 2510 raise NotImplementedError 2511 res = self.__class__( 2512 self.model, self.params, 2513 normalized_cov_params=self.normalized_cov_params, 2514 scale=self.scale) 2515 2516 # Set the new covariance type 2517 res.cov_type = cov_type 2518 res.cov_kwds = {} 2519 2520 # Calculate the new covariance matrix 2521 approx_complex_step = self._cov_approx_complex_step 2522 if approx_complex_step: 2523 approx_type_str = 'complex-step' 2524 elif self._cov_approx_centered: 2525 approx_type_str = 'centered finite differences' 2526 else: 2527 approx_type_str = 'finite differences' 2528 2529 k_params = len(self.params) 2530 if k_params == 0: 2531 res.cov_params_default = np.zeros((0, 0)) 2532 res._rank = 0 2533 res.cov_kwds['description'] = 'No parameters estimated.' 2534 elif cov_type == 'custom': 2535 res.cov_type = kwargs['custom_cov_type'] 2536 res.cov_params_default = kwargs['custom_cov_params'] 2537 res.cov_kwds['description'] = kwargs['custom_description'] 2538 if len(self.fixed_params) > 0: 2539 mask = np.ix_(self._free_params_index, self._free_params_index) 2540 else: 2541 mask = np.s_[...] 2542 res._rank = np.linalg.matrix_rank(res.cov_params_default[mask]) 2543 elif cov_type == 'none': 2544 res.cov_params_default = np.zeros((k_params, k_params)) * np.nan 2545 res._rank = np.nan 2546 res.cov_kwds['description'] = descriptions['none'] 2547 elif self.cov_type == 'approx': 2548 res.cov_params_default = res.cov_params_approx 2549 res.cov_kwds['description'] = descriptions['approx'].format( 2550 approx_type=approx_type_str) 2551 elif self.cov_type == 'oim': 2552 res.cov_params_default = res.cov_params_oim 2553 res.cov_kwds['description'] = descriptions['OIM'].format( 2554 approx_type=approx_type_str) 2555 elif self.cov_type == 'opg': 2556 res.cov_params_default = res.cov_params_opg 2557 res.cov_kwds['description'] = descriptions['OPG'].format( 2558 approx_type=approx_type_str) 2559 elif self.cov_type == 'robust' or self.cov_type == 'robust_oim': 2560 res.cov_params_default = res.cov_params_robust_oim 2561 res.cov_kwds['description'] = descriptions['robust-OIM'].format( 2562 approx_type=approx_type_str) 2563 elif self.cov_type == 'robust_approx': 2564 res.cov_params_default = res.cov_params_robust_approx 2565 res.cov_kwds['description'] = descriptions['robust-approx'].format( 2566 approx_type=approx_type_str) 2567 else: 2568 raise NotImplementedError('Invalid covariance matrix type.') 2569 2570 return res 2571 2572 @cache_readonly 2573 def aic(self): 2574 """ 2575 (float) Akaike Information Criterion 2576 """ 2577 return aic(self.llf, self.nobs_effective, self.df_model) 2578 2579 @cache_readonly 2580 def aicc(self): 2581 """ 2582 (float) Akaike Information Criterion with small sample correction 2583 """ 2584 return aicc(self.llf, self.nobs_effective, self.df_model) 2585 2586 @cache_readonly 2587 def bic(self): 2588 """ 2589 (float) Bayes Information Criterion 2590 """ 2591 return bic(self.llf, self.nobs_effective, self.df_model) 2592 2593 def _cov_params_approx(self, approx_complex_step=True, 2594 approx_centered=False): 2595 evaluated_hessian = self.nobs_effective * self.model.hessian( 2596 params=self.params, transformed=True, includes_fixed=True, 2597 method='approx', approx_complex_step=approx_complex_step, 2598 approx_centered=approx_centered) 2599 # TODO: Case with "not approx_complex_step" is not hit in 2600 # tests as of 2017-05-19 2601 2602 if len(self.fixed_params) > 0: 2603 mask = np.ix_(self._free_params_index, self._free_params_index) 2604 (tmp, singular_values) = pinv_extended(evaluated_hessian[mask]) 2605 neg_cov = np.zeros_like(evaluated_hessian) * np.nan 2606 neg_cov[mask] = tmp 2607 else: 2608 (neg_cov, singular_values) = pinv_extended(evaluated_hessian) 2609 2610 self.model.update(self.params, transformed=True, includes_fixed=True) 2611 if self._rank is None: 2612 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 2613 return -neg_cov 2614 2615 @cache_readonly 2616 def cov_params_approx(self): 2617 """ 2618 (array) The variance / covariance matrix. Computed using the numerical 2619 Hessian approximated by complex step or finite differences methods. 2620 """ 2621 return self._cov_params_approx(self._cov_approx_complex_step, 2622 self._cov_approx_centered) 2623 2624 def _cov_params_oim(self, approx_complex_step=True, approx_centered=False): 2625 evaluated_hessian = self.nobs_effective * self.model.hessian( 2626 self.params, hessian_method='oim', transformed=True, 2627 includes_fixed=True, approx_complex_step=approx_complex_step, 2628 approx_centered=approx_centered) 2629 2630 if len(self.fixed_params) > 0: 2631 mask = np.ix_(self._free_params_index, self._free_params_index) 2632 (tmp, singular_values) = pinv_extended(evaluated_hessian[mask]) 2633 neg_cov = np.zeros_like(evaluated_hessian) * np.nan 2634 neg_cov[mask] = tmp 2635 else: 2636 (neg_cov, singular_values) = pinv_extended(evaluated_hessian) 2637 2638 self.model.update(self.params, transformed=True, includes_fixed=True) 2639 if self._rank is None: 2640 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 2641 return -neg_cov 2642 2643 @cache_readonly 2644 def cov_params_oim(self): 2645 """ 2646 (array) The variance / covariance matrix. Computed using the method 2647 from Harvey (1989). 2648 """ 2649 return self._cov_params_oim(self._cov_approx_complex_step, 2650 self._cov_approx_centered) 2651 2652 def _cov_params_opg(self, approx_complex_step=True, approx_centered=False): 2653 evaluated_hessian = self.nobs_effective * self.model._hessian_opg( 2654 self.params, transformed=True, includes_fixed=True, 2655 approx_complex_step=approx_complex_step, 2656 approx_centered=approx_centered) 2657 2658 no_free_params = (self._free_params_index is not None and 2659 len(self._free_params_index) == 0) 2660 2661 if no_free_params: 2662 neg_cov = np.zeros_like(evaluated_hessian) * np.nan 2663 singular_values = np.empty(0) 2664 elif len(self.fixed_params) > 0: 2665 mask = np.ix_(self._free_params_index, self._free_params_index) 2666 (tmp, singular_values) = pinv_extended(evaluated_hessian[mask]) 2667 neg_cov = np.zeros_like(evaluated_hessian) * np.nan 2668 neg_cov[mask] = tmp 2669 else: 2670 (neg_cov, singular_values) = pinv_extended(evaluated_hessian) 2671 2672 self.model.update(self.params, transformed=True, includes_fixed=True) 2673 if self._rank is None: 2674 if no_free_params: 2675 self._rank = 0 2676 else: 2677 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 2678 return -neg_cov 2679 2680 @cache_readonly 2681 def cov_params_opg(self): 2682 """ 2683 (array) The variance / covariance matrix. Computed using the outer 2684 product of gradients method. 2685 """ 2686 return self._cov_params_opg(self._cov_approx_complex_step, 2687 self._cov_approx_centered) 2688 2689 @cache_readonly 2690 def cov_params_robust(self): 2691 """ 2692 (array) The QMLE variance / covariance matrix. Alias for 2693 `cov_params_robust_oim` 2694 """ 2695 return self.cov_params_robust_oim 2696 2697 def _cov_params_robust_oim(self, approx_complex_step=True, 2698 approx_centered=False): 2699 cov_opg = self._cov_params_opg(approx_complex_step=approx_complex_step, 2700 approx_centered=approx_centered) 2701 2702 evaluated_hessian = self.nobs_effective * self.model.hessian( 2703 self.params, hessian_method='oim', transformed=True, 2704 includes_fixed=True, approx_complex_step=approx_complex_step, 2705 approx_centered=approx_centered) 2706 2707 if len(self.fixed_params) > 0: 2708 mask = np.ix_(self._free_params_index, self._free_params_index) 2709 cov_params = np.zeros_like(evaluated_hessian) * np.nan 2710 2711 cov_opg = cov_opg[mask] 2712 evaluated_hessian = evaluated_hessian[mask] 2713 2714 tmp, singular_values = pinv_extended( 2715 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian)) 2716 2717 cov_params[mask] = tmp 2718 else: 2719 (cov_params, singular_values) = pinv_extended( 2720 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian)) 2721 2722 self.model.update(self.params, transformed=True, includes_fixed=True) 2723 if self._rank is None: 2724 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 2725 return cov_params 2726 2727 @cache_readonly 2728 def cov_params_robust_oim(self): 2729 """ 2730 (array) The QMLE variance / covariance matrix. Computed using the 2731 method from Harvey (1989) as the evaluated hessian. 2732 """ 2733 return self._cov_params_robust_oim(self._cov_approx_complex_step, 2734 self._cov_approx_centered) 2735 2736 def _cov_params_robust_approx(self, approx_complex_step=True, 2737 approx_centered=False): 2738 cov_opg = self._cov_params_opg(approx_complex_step=approx_complex_step, 2739 approx_centered=approx_centered) 2740 2741 evaluated_hessian = self.nobs_effective * self.model.hessian( 2742 self.params, transformed=True, includes_fixed=True, 2743 method='approx', approx_complex_step=approx_complex_step) 2744 # TODO: Case with "not approx_complex_step" is not 2745 # hit in tests as of 2017-05-19 2746 2747 if len(self.fixed_params) > 0: 2748 mask = np.ix_(self._free_params_index, self._free_params_index) 2749 cov_params = np.zeros_like(evaluated_hessian) * np.nan 2750 2751 cov_opg = cov_opg[mask] 2752 evaluated_hessian = evaluated_hessian[mask] 2753 2754 tmp, singular_values = pinv_extended( 2755 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian)) 2756 2757 cov_params[mask] = tmp 2758 else: 2759 (cov_params, singular_values) = pinv_extended( 2760 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian)) 2761 2762 self.model.update(self.params, transformed=True, includes_fixed=True) 2763 if self._rank is None: 2764 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 2765 return cov_params 2766 2767 @cache_readonly 2768 def cov_params_robust_approx(self): 2769 """ 2770 (array) The QMLE variance / covariance matrix. Computed using the 2771 numerical Hessian as the evaluated hessian. 2772 """ 2773 return self._cov_params_robust_approx(self._cov_approx_complex_step, 2774 self._cov_approx_centered) 2775 2776 def info_criteria(self, criteria, method='standard'): 2777 r""" 2778 Information criteria 2779 2780 Parameters 2781 ---------- 2782 criteria : {'aic', 'bic', 'hqic'} 2783 The information criteria to compute. 2784 method : {'standard', 'lutkepohl'} 2785 The method for information criteria computation. Default is 2786 'standard' method; 'lutkepohl' computes the information criteria 2787 as in Lütkepohl (2007). See Notes for formulas. 2788 2789 Notes 2790 ----- 2791 The `'standard'` formulas are: 2792 2793 .. math:: 2794 2795 AIC & = -2 \log L(Y_n | \hat \psi) + 2 k \\ 2796 BIC & = -2 \log L(Y_n | \hat \psi) + k \log n \\ 2797 HQIC & = -2 \log L(Y_n | \hat \psi) + 2 k \log \log n \\ 2798 2799 where :math:`\hat \psi` are the maximum likelihood estimates of the 2800 parameters, :math:`n` is the number of observations, and `k` is the 2801 number of estimated parameters. 2802 2803 Note that the `'standard'` formulas are returned from the `aic`, `bic`, 2804 and `hqic` results attributes. 2805 2806 The `'lutkepohl'` formulas are (Lütkepohl, 2010): 2807 2808 .. math:: 2809 2810 AIC_L & = \log | Q | + \frac{2 k}{n} \\ 2811 BIC_L & = \log | Q | + \frac{k \log n}{n} \\ 2812 HQIC_L & = \log | Q | + \frac{2 k \log \log n}{n} \\ 2813 2814 where :math:`Q` is the state covariance matrix. Note that the Lütkepohl 2815 definitions do not apply to all state space models, and should be used 2816 with care outside of SARIMAX and VARMAX models. 2817 2818 References 2819 ---------- 2820 .. [*] Lütkepohl, Helmut. 2007. *New Introduction to Multiple Time* 2821 *Series Analysis.* Berlin: Springer. 2822 """ 2823 criteria = criteria.lower() 2824 method = method.lower() 2825 2826 if method == 'standard': 2827 out = getattr(self, criteria) 2828 elif method == 'lutkepohl': 2829 if self.filter_results.state_cov.shape[-1] > 1: 2830 raise ValueError('Cannot compute Lütkepohl statistics for' 2831 ' models with time-varying state covariance' 2832 ' matrix.') 2833 2834 cov = self.filter_results.state_cov[:, :, 0] 2835 if criteria == 'aic': 2836 out = np.squeeze(np.linalg.slogdet(cov)[1] + 2837 2 * self.df_model / self.nobs_effective) 2838 elif criteria == 'bic': 2839 out = np.squeeze(np.linalg.slogdet(cov)[1] + 2840 self.df_model * np.log(self.nobs_effective) / 2841 self.nobs_effective) 2842 elif criteria == 'hqic': 2843 out = np.squeeze(np.linalg.slogdet(cov)[1] + 2844 2 * self.df_model * 2845 np.log(np.log(self.nobs_effective)) / 2846 self.nobs_effective) 2847 else: 2848 raise ValueError('Invalid information criteria') 2849 2850 else: 2851 raise ValueError('Invalid information criteria computation method') 2852 2853 return out 2854 2855 @cache_readonly 2856 def fittedvalues(self): 2857 """ 2858 (array) The predicted values of the model. An (nobs x k_endog) array. 2859 """ 2860 # This is a (k_endog x nobs array; do not want to squeeze in case of 2861 # the corner case where nobs = 1 (mostly a concern in the predict or 2862 # forecast functions, but here also to maintain consistency) 2863 fittedvalues = self.forecasts 2864 if fittedvalues is None: 2865 pass 2866 elif fittedvalues.shape[0] == 1: 2867 fittedvalues = fittedvalues[0, :] 2868 else: 2869 fittedvalues = fittedvalues.T 2870 return fittedvalues 2871 2872 @cache_readonly 2873 def hqic(self): 2874 """ 2875 (float) Hannan-Quinn Information Criterion 2876 """ 2877 # return (-2 * self.llf + 2878 # 2 * np.log(np.log(self.nobs_effective)) * self.df_model) 2879 return hqic(self.llf, self.nobs_effective, self.df_model) 2880 2881 @cache_readonly 2882 def llf_obs(self): 2883 """ 2884 (float) The value of the log-likelihood function evaluated at `params`. 2885 """ 2886 return self.filter_results.llf_obs 2887 2888 @cache_readonly 2889 def llf(self): 2890 """ 2891 (float) The value of the log-likelihood function evaluated at `params`. 2892 """ 2893 return self.filter_results.llf 2894 2895 @cache_readonly 2896 def loglikelihood_burn(self): 2897 """ 2898 (float) The number of observations during which the likelihood is not 2899 evaluated. 2900 """ 2901 return self.filter_results.loglikelihood_burn 2902 2903 @cache_readonly 2904 def mae(self): 2905 """ 2906 (float) Mean absolute error 2907 """ 2908 return np.mean(np.abs(self.resid)) 2909 2910 @cache_readonly 2911 def mse(self): 2912 """ 2913 (float) Mean squared error 2914 """ 2915 return self.sse / self.nobs 2916 2917 @cache_readonly 2918 def pvalues(self): 2919 """ 2920 (array) The p-values associated with the z-statistics of the 2921 coefficients. Note that the coefficients are assumed to have a Normal 2922 distribution. 2923 """ 2924 pvalues = np.zeros_like(self.zvalues) * np.nan 2925 mask = np.ones_like(pvalues, dtype=bool) 2926 mask[self._free_params_index] = True 2927 mask &= ~np.isnan(self.zvalues) 2928 pvalues[mask] = norm.sf(np.abs(self.zvalues[mask])) * 2 2929 return pvalues 2930 2931 @cache_readonly 2932 def resid(self): 2933 """ 2934 (array) The model residuals. An (nobs x k_endog) array. 2935 """ 2936 # This is a (k_endog x nobs array; do not want to squeeze in case of 2937 # the corner case where nobs = 1 (mostly a concern in the predict or 2938 # forecast functions, but here also to maintain consistency) 2939 resid = self.forecasts_error 2940 if resid is None: 2941 pass 2942 elif resid.shape[0] == 1: 2943 resid = resid[0, :] 2944 else: 2945 resid = resid.T 2946 return resid 2947 2948 @property 2949 def states(self): 2950 if self.model._index_generated and not self.model._index_none: 2951 warnings.warn('No supported index is available. The `states`' 2952 ' DataFrame uses a generated integer index', 2953 ValueWarning) 2954 return self._states 2955 2956 @cache_readonly 2957 def sse(self): 2958 """ 2959 (float) Sum of squared errors 2960 """ 2961 return np.sum(self.resid**2) 2962 2963 @cache_readonly 2964 def zvalues(self): 2965 """ 2966 (array) The z-statistics for the coefficients. 2967 """ 2968 return self.params / self.bse 2969 2970 def test_normality(self, method): 2971 """ 2972 Test for normality of standardized residuals. 2973 2974 Null hypothesis is normality. 2975 2976 Parameters 2977 ---------- 2978 method : {'jarquebera', None} 2979 The statistical test for normality. Must be 'jarquebera' for 2980 Jarque-Bera normality test. If None, an attempt is made to select 2981 an appropriate test. 2982 2983 See Also 2984 -------- 2985 statsmodels.stats.stattools.jarque_bera 2986 The Jarque-Bera test of normality. 2987 2988 Notes 2989 ----- 2990 Let `d` = max(loglikelihood_burn, nobs_diffuse); this test is 2991 calculated ignoring the first `d` residuals. 2992 2993 In the case of missing data, the maintained hypothesis is that the 2994 data are missing completely at random. This test is then run on the 2995 standardized residuals excluding those corresponding to missing 2996 observations. 2997 """ 2998 if method is None: 2999 method = 'jarquebera' 3000 3001 if self.standardized_forecasts_error is None: 3002 raise ValueError('Cannot compute test statistic when standardized' 3003 ' forecast errors have not been computed.') 3004 3005 if method == 'jarquebera': 3006 from statsmodels.stats.stattools import jarque_bera 3007 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse) 3008 output = [] 3009 for i in range(self.model.k_endog): 3010 resid = self.filter_results.standardized_forecasts_error[i, d:] 3011 mask = ~np.isnan(resid) 3012 output.append(jarque_bera(resid[mask])) 3013 else: 3014 raise NotImplementedError('Invalid normality test method.') 3015 3016 return np.array(output) 3017 3018 def test_heteroskedasticity(self, method, alternative='two-sided', 3019 use_f=True): 3020 r""" 3021 Test for heteroskedasticity of standardized residuals 3022 3023 Tests whether the sum-of-squares in the first third of the sample is 3024 significantly different than the sum-of-squares in the last third 3025 of the sample. Analogous to a Goldfeld-Quandt test. The null hypothesis 3026 is of no heteroskedasticity. 3027 3028 Parameters 3029 ---------- 3030 method : {'breakvar', None} 3031 The statistical test for heteroskedasticity. Must be 'breakvar' 3032 for test of a break in the variance. If None, an attempt is 3033 made to select an appropriate test. 3034 alternative : str, 'increasing', 'decreasing' or 'two-sided' 3035 This specifies the alternative for the p-value calculation. Default 3036 is two-sided. 3037 use_f : bool, optional 3038 Whether or not to compare against the asymptotic distribution 3039 (chi-squared) or the approximate small-sample distribution (F). 3040 Default is True (i.e. default is to compare against an F 3041 distribution). 3042 3043 Returns 3044 ------- 3045 output : ndarray 3046 An array with `(test_statistic, pvalue)` for each endogenous 3047 variable. The array is then sized `(k_endog, 2)`. If the method is 3048 called as `het = res.test_heteroskedasticity()`, then `het[0]` is 3049 an array of size 2 corresponding to the first endogenous variable, 3050 where `het[0][0]` is the test statistic, and `het[0][1]` is the 3051 p-value. 3052 3053 See Also 3054 -------- 3055 statsmodels.tsa.stattools.breakvar_heteroskedasticity_test 3056 3057 Notes 3058 ----- 3059 The null hypothesis is of no heteroskedasticity. 3060 3061 For :math:`h = [T/3]`, the test statistic is: 3062 3063 .. math:: 3064 3065 H(h) = \sum_{t=T-h+1}^T \tilde v_t^2 3066 \Bigg / \sum_{t=d+1}^{d+1+h} \tilde v_t^2 3067 3068 where :math:`d` = max(loglikelihood_burn, nobs_diffuse)` (usually 3069 corresponding to diffuse initialization under either the approximate 3070 or exact approach). 3071 3072 This statistic can be tested against an :math:`F(h,h)` distribution. 3073 Alternatively, :math:`h H(h)` is asymptotically distributed according 3074 to :math:`\chi_h^2`; this second test can be applied by passing 3075 `use_f=True` as an argument. 3076 3077 See section 5.4 of [1]_ for the above formula and discussion, as well 3078 as additional details. 3079 3080 TODO 3081 3082 - Allow specification of :math:`h` 3083 3084 References 3085 ---------- 3086 .. [1] Harvey, Andrew C. 1990. *Forecasting, Structural Time Series* 3087 *Models and the Kalman Filter.* Cambridge University Press. 3088 """ 3089 if method is None: 3090 method = 'breakvar' 3091 3092 if self.standardized_forecasts_error is None: 3093 raise ValueError('Cannot compute test statistic when standardized' 3094 ' forecast errors have not been computed.') 3095 3096 if method == 'breakvar': 3097 from statsmodels.tsa.stattools import ( 3098 breakvar_heteroskedasticity_test 3099 ) 3100 # Store some values 3101 resid = self.filter_results.standardized_forecasts_error 3102 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse) 3103 # This differs from self.nobs_effective because here we want to 3104 # exclude exact diffuse periods, whereas self.nobs_effective only 3105 # excludes explicitly burned (usually approximate diffuse) periods. 3106 nobs_effective = self.nobs - d 3107 h = int(np.round(nobs_effective / 3)) 3108 3109 test_statistics = [] 3110 p_values = [] 3111 for i in range(self.model.k_endog): 3112 test_statistic, p_value = breakvar_heteroskedasticity_test( 3113 resid[i, d:], 3114 subset_length=h, 3115 alternative=alternative, 3116 use_f=use_f 3117 ) 3118 test_statistics.append(test_statistic) 3119 p_values.append(p_value) 3120 3121 output = np.c_[test_statistics, p_values] 3122 else: 3123 raise NotImplementedError('Invalid heteroskedasticity test' 3124 ' method.') 3125 3126 return output 3127 3128 def test_serial_correlation(self, method, df_adjust=False, lags=None): 3129 """ 3130 Ljung-Box test for no serial correlation of standardized residuals 3131 3132 Null hypothesis is no serial correlation. 3133 3134 Parameters 3135 ---------- 3136 method : {'ljungbox','boxpierece', None} 3137 The statistical test for serial correlation. If None, an attempt is 3138 made to select an appropriate test. 3139 lags : None, int or array_like 3140 If lags is an integer then this is taken to be the largest lag 3141 that is included, the test result is reported for all smaller lag 3142 length. 3143 If lags is a list or array, then all lags are included up to the 3144 largest lag in the list, however only the tests for the lags in the 3145 list are reported. 3146 If lags is None, then the default maxlag is 12*(nobs/100)^{1/4}. 3147 After 0.12 the default maxlag will change to min(10, nobs // 5) for 3148 non-seasonal models and min(2*m, nobs // 5) for seasonal time 3149 series where m is the seasonal period. 3150 df_adjust : bool, optional 3151 If True, the degrees of freedom consumed by the model is subtracted 3152 from the degrees-of-freedom used in the test so that the adjusted 3153 dof for the statistics are lags - model_df. In an ARMA model, this 3154 value is usually p+q where p is the AR order and q is the MA order. 3155 When using df_adjust, it is not possible to use tests based on 3156 fewer than model_df lags. 3157 Returns 3158 ------- 3159 output : ndarray 3160 An array with `(test_statistic, pvalue)` for each endogenous 3161 variable and each lag. The array is then sized 3162 `(k_endog, 2, lags)`. If the method is called as 3163 `ljungbox = res.test_serial_correlation()`, then `ljungbox[i]` 3164 holds the results of the Ljung-Box test (as would be returned by 3165 `statsmodels.stats.diagnostic.acorr_ljungbox`) for the `i` th 3166 endogenous variable. 3167 3168 See Also 3169 -------- 3170 statsmodels.stats.diagnostic.acorr_ljungbox 3171 Ljung-Box test for serial correlation. 3172 3173 Notes 3174 ----- 3175 Let `d` = max(loglikelihood_burn, nobs_diffuse); this test is 3176 calculated ignoring the first `d` residuals. 3177 3178 Output is nan for any endogenous variable which has missing values. 3179 """ 3180 if method is None: 3181 method = 'ljungbox' 3182 3183 if self.standardized_forecasts_error is None: 3184 raise ValueError('Cannot compute test statistic when standardized' 3185 ' forecast errors have not been computed.') 3186 3187 if method == 'ljungbox' or method == 'boxpierce': 3188 from statsmodels.stats.diagnostic import acorr_ljungbox 3189 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse) 3190 # This differs from self.nobs_effective because here we want to 3191 # exclude exact diffuse periods, whereas self.nobs_effective only 3192 # excludes explicitly burned (usually approximate diffuse) periods. 3193 nobs_effective = self.nobs - d 3194 output = [] 3195 3196 # Default lags for acorr_ljungbox is 40, but may not always have 3197 # that many observations 3198 if lags is None: 3199 seasonal_periods = getattr(self.model, "seasonal_periods", 0) 3200 if seasonal_periods: 3201 lags = min(2 * seasonal_periods, nobs_effective // 5) 3202 else: 3203 lags = min(10, nobs_effective // 5) 3204 3205 model_df = 0 3206 if df_adjust: 3207 model_df = max(0, self.df_model - self.k_diffuse_states - 1) 3208 3209 for i in range(self.model.k_endog): 3210 results = acorr_ljungbox( 3211 self.filter_results.standardized_forecasts_error[i][d:], 3212 lags=lags, boxpierce=(method == 'boxpierce'), 3213 model_df=model_df, return_df=False) 3214 if method == 'ljungbox': 3215 output.append(results[0:2]) 3216 else: 3217 output.append(results[2:]) 3218 3219 output = np.c_[output] 3220 else: 3221 raise NotImplementedError('Invalid serial correlation test' 3222 ' method.') 3223 return output 3224 3225 def get_prediction(self, start=None, end=None, dynamic=False, 3226 index=None, exog=None, extend_model=None, 3227 extend_kwargs=None, **kwargs): 3228 """ 3229 In-sample prediction and out-of-sample forecasting 3230 3231 Parameters 3232 ---------- 3233 start : int, str, or datetime, optional 3234 Zero-indexed observation number at which to start forecasting, 3235 i.e., the first forecast is start. Can also be a date string to 3236 parse or a datetime type. Default is the the zeroth observation. 3237 end : int, str, or datetime, optional 3238 Zero-indexed observation number at which to end forecasting, i.e., 3239 the last forecast is end. Can also be a date string to 3240 parse or a datetime type. However, if the dates index does not 3241 have a fixed frequency, end must be an integer index if you 3242 want out of sample prediction. Default is the last observation in 3243 the sample. 3244 dynamic : bool, int, str, or datetime, optional 3245 Integer offset relative to `start` at which to begin dynamic 3246 prediction. Can also be an absolute date string to parse or a 3247 datetime type (these are not interpreted as offsets). 3248 Prior to this observation, true endogenous values will be used for 3249 prediction; starting with this observation and continuing through 3250 the end of prediction, forecasted endogenous values will be used 3251 instead. 3252 **kwargs 3253 Additional arguments may required for forecasting beyond the end 3254 of the sample. See `FilterResults.predict` for more details. 3255 3256 Returns 3257 ------- 3258 predictions : PredictionResults 3259 PredictionResults instance containing in-sample predictions and 3260 out-of-sample forecasts. 3261 """ 3262 if start is None: 3263 start = 0 3264 3265 # Handle start, end, dynamic 3266 start, end, out_of_sample, prediction_index = ( 3267 self.model._get_prediction_index(start, end, index)) 3268 3269 # Handle `dynamic` 3270 if isinstance(dynamic, (str, dt.datetime, pd.Timestamp)): 3271 dynamic, _, _ = self.model._get_index_loc(dynamic) 3272 # Convert to offset relative to start 3273 dynamic = dynamic - start 3274 3275 # If we have out-of-sample forecasting and `exog` or in general any 3276 # kind of time-varying state space model, then we need to create an 3277 # extended model to get updated state space system matrices 3278 if extend_model is None: 3279 extend_model = (self.model.exog is not None or 3280 not self.filter_results.time_invariant) 3281 if out_of_sample and extend_model: 3282 kwargs = self.model._get_extension_time_varying_matrices( 3283 self.params, exog, out_of_sample, extend_kwargs, 3284 transformed=True, includes_fixed=True, **kwargs) 3285 3286 # Make sure the model class has the current parameters 3287 self.model.update(self.params, transformed=True, includes_fixed=True) 3288 3289 # Perform the prediction 3290 # This is a (k_endog x npredictions) array; do not want to squeeze in 3291 # case of npredictions = 1 3292 prediction_results = self.filter_results.predict( 3293 start, end + out_of_sample + 1, dynamic, **kwargs) 3294 3295 # Return a new mlemodel.PredictionResults object 3296 return PredictionResultsWrapper(PredictionResults( 3297 self, prediction_results, row_labels=prediction_index)) 3298 3299 def get_forecast(self, steps=1, **kwargs): 3300 """ 3301 Out-of-sample forecasts and prediction intervals 3302 3303 Parameters 3304 ---------- 3305 steps : int, str, or datetime, optional 3306 If an integer, the number of steps to forecast from the end of the 3307 sample. Can also be a date string to parse or a datetime type. 3308 However, if the dates index does not have a fixed frequency, steps 3309 must be an integer. Default 3310 **kwargs 3311 Additional arguments may required for forecasting beyond the end 3312 of the sample. See `FilterResults.predict` for more details. 3313 3314 Returns 3315 ------- 3316 predictions : PredictionResults 3317 PredictionResults instance containing in-sample predictions and 3318 out-of-sample forecasts. 3319 """ 3320 if isinstance(steps, int): 3321 end = self.nobs + steps - 1 3322 else: 3323 end = steps 3324 return self.get_prediction(start=self.nobs, end=end, **kwargs) 3325 3326 def predict(self, start=None, end=None, dynamic=False, **kwargs): 3327 """ 3328 In-sample prediction and out-of-sample forecasting 3329 3330 Parameters 3331 ---------- 3332 start : int, str, or datetime, optional 3333 Zero-indexed observation number at which to start forecasting, 3334 i.e., the first forecast is start. Can also be a date string to 3335 parse or a datetime type. Default is the the zeroth observation. 3336 end : int, str, or datetime, optional 3337 Zero-indexed observation number at which to end forecasting, i.e., 3338 the last forecast is end. Can also be a date string to 3339 parse or a datetime type. However, if the dates index does not 3340 have a fixed frequency, end must be an integer index if you 3341 want out of sample prediction. Default is the last observation in 3342 the sample. 3343 dynamic : bool, int, str, or datetime, optional 3344 Integer offset relative to `start` at which to begin dynamic 3345 prediction. Can also be an absolute date string to parse or a 3346 datetime type (these are not interpreted as offsets). 3347 Prior to this observation, true endogenous values will be used for 3348 prediction; starting with this observation and continuing through 3349 the end of prediction, forecasted endogenous values will be used 3350 instead. 3351 **kwargs 3352 Additional arguments may required for forecasting beyond the end 3353 of the sample. See `FilterResults.predict` for more details. 3354 3355 Returns 3356 ------- 3357 forecast : array_like 3358 Array of out of in-sample predictions and / or out-of-sample 3359 forecasts. An (npredict x k_endog) array. 3360 3361 See Also 3362 -------- 3363 forecast 3364 Out-of-sample forecasts 3365 get_prediction 3366 Prediction results and confidence intervals 3367 """ 3368 # Perform the prediction 3369 prediction_results = self.get_prediction(start, end, dynamic, **kwargs) 3370 return prediction_results.predicted_mean 3371 3372 def forecast(self, steps=1, **kwargs): 3373 """ 3374 Out-of-sample forecasts 3375 3376 Parameters 3377 ---------- 3378 steps : int, str, or datetime, optional 3379 If an integer, the number of steps to forecast from the end of the 3380 sample. Can also be a date string to parse or a datetime type. 3381 However, if the dates index does not have a fixed frequency, steps 3382 must be an integer. Default 3383 **kwargs 3384 Additional arguments may required for forecasting beyond the end 3385 of the sample. See `FilterResults.predict` for more details. 3386 3387 Returns 3388 ------- 3389 forecast : PredictionResults 3390 PredictionResults instance containing in-sample predictions and 3391 out-of-sample forecasts. 3392 """ 3393 if isinstance(steps, int): 3394 end = self.nobs + steps - 1 3395 else: 3396 end = steps 3397 return self.predict(start=self.nobs, end=end, **kwargs) 3398 3399 def simulate(self, nsimulations, measurement_shocks=None, 3400 state_shocks=None, initial_state=None, anchor=None, 3401 repetitions=None, exog=None, extend_model=None, 3402 extend_kwargs=None, **kwargs): 3403 r""" 3404 Simulate a new time series following the state space model 3405 3406 Parameters 3407 ---------- 3408 nsimulations : int 3409 The number of observations to simulate. If the model is 3410 time-invariant this can be any number. If the model is 3411 time-varying, then this number must be less than or equal to the 3412 number 3413 measurement_shocks : array_like, optional 3414 If specified, these are the shocks to the measurement equation, 3415 :math:`\varepsilon_t`. If unspecified, these are automatically 3416 generated using a pseudo-random number generator. If specified, 3417 must be shaped `nsimulations` x `k_endog`, where `k_endog` is the 3418 same as in the state space model. 3419 state_shocks : array_like, optional 3420 If specified, these are the shocks to the state equation, 3421 :math:`\eta_t`. If unspecified, these are automatically 3422 generated using a pseudo-random number generator. If specified, 3423 must be shaped `nsimulations` x `k_posdef` where `k_posdef` is the 3424 same as in the state space model. 3425 initial_state : array_like, optional 3426 If specified, this is the initial state vector to use in 3427 simulation, which should be shaped (`k_states` x 1), where 3428 `k_states` is the same as in the state space model. If unspecified, 3429 but the model has been initialized, then that initialization is 3430 used. This must be specified if `anchor` is anything other than 3431 "start" or 0. 3432 anchor : int, str, or datetime, optional 3433 Starting point from which to begin the simulations; type depends on 3434 the index of the given `endog` model. Two special cases are the 3435 strings 'start' and 'end', which refer to starting at the beginning 3436 and end of the sample, respectively. If a date/time index was 3437 provided to the model, then this argument can be a date string to 3438 parse or a datetime type. Otherwise, an integer index should be 3439 given. Default is 'start'. 3440 repetitions : int, optional 3441 Number of simulated paths to generate. Default is 1 simulated path. 3442 exog : array_like, optional 3443 New observations of exogenous regressors, if applicable. 3444 3445 Returns 3446 ------- 3447 simulated_obs : ndarray 3448 An array of simulated observations. If `repetitions=None`, then it 3449 will be shaped (nsimulations x k_endog) or (nsimulations,) if 3450 `k_endog=1`. Otherwise it will be shaped 3451 (nsimulations x k_endog x repetitions). If the model was given 3452 Pandas input then the output will be a Pandas object. If 3453 `k_endog > 1` and `repetitions` is not None, then the output will 3454 be a Pandas DataFrame that has a MultiIndex for the columns, with 3455 the first level containing the names of the `endog` variables and 3456 the second level containing the repetition number. 3457 """ 3458 # Get the starting location 3459 if anchor is None or anchor == 'start': 3460 iloc = 0 3461 elif anchor == 'end': 3462 iloc = self.nobs 3463 else: 3464 iloc, _, _ = self.model._get_index_loc(anchor) 3465 if isinstance(iloc, slice): 3466 iloc = iloc.start 3467 3468 if iloc < 0: 3469 iloc = self.nobs + iloc 3470 if iloc > self.nobs: 3471 raise ValueError('Cannot anchor simulation outside of the sample.') 3472 3473 # Setup the initial state 3474 if initial_state is None: 3475 initial_state_moments = ( 3476 self.predicted_state[:, iloc], 3477 self.predicted_state_cov[:, :, iloc]) 3478 3479 _repetitions = 1 if repetitions is None else repetitions 3480 3481 initial_state = np.random.multivariate_normal( 3482 *initial_state_moments, size=_repetitions).T 3483 3484 scale = self.scale if self.filter_results.filter_concentrated else None 3485 with self.model.ssm.fixed_scale(scale): 3486 sim = self.model.simulate( 3487 self.params, nsimulations, 3488 measurement_shocks=measurement_shocks, 3489 state_shocks=state_shocks, initial_state=initial_state, 3490 anchor=anchor, repetitions=repetitions, exog=exog, 3491 transformed=True, includes_fixed=True, 3492 extend_model=extend_model, extend_kwargs=extend_kwargs, 3493 **kwargs) 3494 3495 return sim 3496 3497 def impulse_responses(self, steps=1, impulse=0, orthogonalized=False, 3498 cumulative=False, **kwargs): 3499 """ 3500 Impulse response function 3501 3502 Parameters 3503 ---------- 3504 steps : int, optional 3505 The number of steps for which impulse responses are calculated. 3506 Default is 1. Note that for time-invariant models, the initial 3507 impulse is not counted as a step, so if `steps=1`, the output will 3508 have 2 entries. 3509 impulse : int, str or array_like 3510 If an integer, the state innovation to pulse; must be between 0 3511 and `k_posdef-1`. If a str, it indicates which column of df 3512 the unit (1) impulse is given. 3513 Alternatively, a custom impulse vector may be provided; must be 3514 shaped `k_posdef x 1`. 3515 orthogonalized : bool, optional 3516 Whether or not to perform impulse using orthogonalized innovations. 3517 Note that this will also affect custum `impulse` vectors. Default 3518 is False. 3519 cumulative : bool, optional 3520 Whether or not to return cumulative impulse responses. Default is 3521 False. 3522 anchor : int, str, or datetime, optional 3523 Time point within the sample for the state innovation impulse. Type 3524 depends on the index of the given `endog` in the model. Two special 3525 cases are the strings 'start' and 'end', which refer to setting the 3526 impulse at the first and last points of the sample, respectively. 3527 Integer values can run from 0 to `nobs - 1`, or can be negative to 3528 apply negative indexing. Finally, if a date/time index was provided 3529 to the model, then this argument can be a date string to parse or a 3530 datetime type. Default is 'start'. 3531 exog : array_like, optional 3532 New observations of exogenous regressors, if applicable. 3533 **kwargs 3534 If the model has time-varying design or transition matrices and the 3535 combination of `anchor` and `steps` implies creating impulse 3536 responses for the out-of-sample period, then these matrices must 3537 have updated values provided for the out-of-sample steps. For 3538 example, if `design` is a time-varying component, `nobs` is 10, 3539 `anchor=1`, and `steps` is 15, a (`k_endog` x `k_states` x 7) 3540 matrix must be provided with the new design matrix values. 3541 3542 Returns 3543 ------- 3544 impulse_responses : ndarray 3545 Responses for each endogenous variable due to the impulse 3546 given by the `impulse` argument. For a time-invariant model, the 3547 impulse responses are given for `steps + 1` elements (this gives 3548 the "initial impulse" followed by `steps` responses for the 3549 important cases of VAR and SARIMAX models), while for time-varying 3550 models the impulse responses are only given for `steps` elements 3551 (to avoid having to unexpectedly provide updated time-varying 3552 matrices). 3553 3554 Notes 3555 ----- 3556 Intercepts in the measurement and state equation are ignored when 3557 calculating impulse responses. 3558 """ 3559 scale = self.scale if self.filter_results.filter_concentrated else None 3560 with self.model.ssm.fixed_scale(scale): 3561 irfs = self.model.impulse_responses(self.params, steps, impulse, 3562 orthogonalized, cumulative, 3563 **kwargs) 3564 # These are wrapped automatically, so just return the array 3565 if isinstance(irfs, (pd.Series, pd.DataFrame)): 3566 irfs = irfs.values 3567 return irfs 3568 3569 def _apply(self, mod, refit=False, fit_kwargs=None, **kwargs): 3570 if fit_kwargs is None: 3571 fit_kwargs = {} 3572 3573 if refit: 3574 fit_kwargs.setdefault('start_params', self.params) 3575 if self._has_fixed_params: 3576 fit_kwargs.setdefault('includes_fixed', True) 3577 res = mod.fit_constrained(self._fixed_params, **fit_kwargs) 3578 else: 3579 res = mod.fit(**fit_kwargs) 3580 else: 3581 if 'cov_type' in fit_kwargs: 3582 raise ValueError('Cannot specify covariance type in' 3583 ' `fit_kwargs` unless refitting' 3584 ' parameters (not available in extend).') 3585 if 'cov_kwds' in fit_kwargs: 3586 raise ValueError('Cannot specify covariance keyword arguments' 3587 ' in `fit_kwargs` unless refitting' 3588 ' parameters (not available in extend).') 3589 3590 if self.cov_type == 'none': 3591 fit_kwargs['cov_type'] = 'none' 3592 else: 3593 fit_kwargs['cov_type'] = 'custom' 3594 fit_kwargs['cov_kwds'] = { 3595 'custom_cov_type': self.cov_type, 3596 'custom_cov_params': self.cov_params_default, 3597 'custom_description': ('Parameters and standard errors' 3598 ' were estimated using a different' 3599 ' dataset and were then applied to' 3600 ' this dataset. %s' 3601 % self.cov_kwds['description'])} 3602 3603 if self.smoother_results is not None: 3604 func = mod.smooth 3605 else: 3606 func = mod.filter 3607 3608 if self._has_fixed_params: 3609 with mod.fix_params(self._fixed_params): 3610 fit_kwargs.setdefault('includes_fixed', True) 3611 res = func(self.params, **fit_kwargs) 3612 else: 3613 res = func(self.params, **fit_kwargs) 3614 3615 return res 3616 3617 def _news_previous_results(self, previous, start, end, periods): 3618 # Compute the news 3619 out = self.smoother_results.news(previous.smoother_results, 3620 start=start, end=end) 3621 return out 3622 3623 def _news_updated_results(self, updated, start, end, periods): 3624 return updated._news_previous_results(self, start, end, periods) 3625 3626 def _news_previous_data(self, endog, start, end, periods, exog): 3627 previous = self.apply(endog, exog=exog, copy_initialization=True) 3628 return self._news_previous_results(previous, start, end, periods) 3629 3630 def _news_updated_data(self, endog, start, end, periods, exog): 3631 updated = self.apply(endog, exog=exog, copy_initialization=True) 3632 return self._news_updated_results(updated, start, end, periods) 3633 3634 def news(self, comparison, impact_date=None, impacted_variable=None, 3635 start=None, end=None, periods=None, exog=None, 3636 comparison_type=None, return_raw=False, tolerance=1e-10, 3637 **kwargs): 3638 """ 3639 Compute impacts from updated data (news and revisions) 3640 3641 Parameters 3642 ---------- 3643 comparison : array_like or MLEResults 3644 An updated dataset with updated and/or revised data from which the 3645 news can be computed, or an updated or previous results object 3646 to use in computing the news. 3647 impact_date : int, str, or datetime, optional 3648 A single specific period of impacts from news and revisions to 3649 compute. Can also be a date string to parse or a datetime type. 3650 This argument cannot be used in combination with `start`, `end`, or 3651 `periods`. Default is the first out-of-sample observation. 3652 impacted_variable : str, list, array, or slice, optional 3653 Observation variable label or slice of labels specifying that only 3654 specific impacted variables should be shown in the News output. The 3655 impacted variable(s) describe the variables that were *affected* by 3656 the news. If you do not know the labels for the variables, check 3657 the `endog_names` attribute of the model instance. 3658 start : int, str, or datetime, optional 3659 The first period of impacts from news and revisions to compute. 3660 Can also be a date string to parse or a datetime type. Default is 3661 the first out-of-sample observation. 3662 end : int, str, or datetime, optional 3663 The last period of impacts from news and revisions to compute. 3664 Can also be a date string to parse or a datetime type. Default is 3665 the first out-of-sample observation. 3666 periods : int, optional 3667 The number of periods of impacts from news and revisions to 3668 compute. 3669 exog : array_like, optional 3670 Array of exogenous regressors for the out-of-sample period, if 3671 applicable. 3672 comparison_type : {None, 'previous', 'updated'} 3673 This denotes whether the `comparison` argument represents a 3674 *previous* results object or dataset or an *updated* results object 3675 or dataset. If not specified, then an attempt is made to determine 3676 the comparison type. 3677 return_raw : bool, optional 3678 Whether or not to return only the specific output or a full 3679 results object. Default is to return a full results object. 3680 tolerance : float, optional 3681 The numerical threshold for determining zero impact. Default is 3682 that any impact less than 1e-10 is assumed to be zero. 3683 3684 Returns 3685 ------- 3686 NewsResults 3687 Impacts of data revisions and news on estimates 3688 3689 References 3690 ---------- 3691 .. [1] Bańbura, Marta, and Michele Modugno. 3692 "Maximum likelihood estimation of factor models on datasets with 3693 arbitrary pattern of missing data." 3694 Journal of Applied Econometrics 29, no. 1 (2014): 133-160. 3695 .. [2] Bańbura, Marta, Domenico Giannone, and Lucrezia Reichlin. 3696 "Nowcasting." 3697 The Oxford Handbook of Economic Forecasting. July 8, 2011. 3698 .. [3] Bańbura, Marta, Domenico Giannone, Michele Modugno, and Lucrezia 3699 Reichlin. 3700 "Now-casting and the real-time data flow." 3701 In Handbook of economic forecasting, vol. 2, pp. 195-237. 3702 Elsevier, 2013. 3703 """ 3704 # Validate input 3705 if self.smoother_results is None: 3706 raise ValueError('Cannot compute news without Kalman smoother' 3707 ' results.') 3708 3709 # If we were given data, create a new results object 3710 comparison_dataset = not isinstance( 3711 comparison, (MLEResults, MLEResultsWrapper)) 3712 if comparison_dataset: 3713 # If `exog` is longer than `comparison`, then we extend it to match 3714 nobs_endog = len(comparison) 3715 nobs_exog = len(exog) if exog is not None else nobs_endog 3716 3717 if nobs_exog > nobs_endog: 3718 _, _, _, ix = self.model._get_prediction_index( 3719 start=0, end=nobs_exog - 1) 3720 # TODO: check that the index of `comparison` matches the model 3721 comparison = np.asarray(comparison) 3722 if comparison.ndim < 2: 3723 comparison = np.atleast_2d(comparison).T 3724 if (comparison.ndim != 2 or 3725 comparison.shape[1] != self.model.k_endog): 3726 raise ValueError('Invalid shape for `comparison`. Must' 3727 f' contain {self.model.k_endog} columns.') 3728 extra = np.zeros((nobs_exog - nobs_endog, 3729 self.model.k_endog)) * np.nan 3730 comparison = pd.DataFrame( 3731 np.concatenate([comparison, extra], axis=0), index=ix, 3732 columns=self.model.endog_names) 3733 3734 # Get the results object 3735 comparison = self.apply(comparison, exog=exog, 3736 copy_initialization=True, **kwargs) 3737 3738 # Now, figure out the `updated` versus `previous` results objects 3739 nmissing = self.filter_results.missing.sum() 3740 nmissing_comparison = comparison.filter_results.missing.sum() 3741 if (comparison_type == 'updated' or (comparison_type is None and ( 3742 comparison.nobs > self.nobs or 3743 (comparison.nobs == self.nobs and 3744 nmissing > nmissing_comparison)))): 3745 updated = comparison 3746 previous = self 3747 elif (comparison_type == 'previous' or (comparison_type is None and ( 3748 comparison.nobs < self.nobs or 3749 (comparison.nobs == self.nobs and 3750 nmissing < nmissing_comparison)))): 3751 updated = self 3752 previous = comparison 3753 else: 3754 raise ValueError('Could not automatically determine the type' 3755 ' of comparison requested to compute the' 3756 ' News, so it must be specified as "updated"' 3757 ' or "previous", using the `comparison_type`' 3758 ' keyword argument') 3759 3760 # Check that the index of `updated` is a superset of the 3761 # index of `previous` 3762 # Note: the try/except block is for Pandas < 0.25, in which 3763 # `PeriodIndex.difference` raises a ValueError if the argument is not 3764 # also a `PeriodIndex`. 3765 diff = previous.model._index.difference(updated.model._index) 3766 if len(diff) > 0: 3767 raise ValueError('The index associated with the updated results is' 3768 ' not a superset of the index associated with the' 3769 ' previous results, and so these datasets do not' 3770 ' appear to be related. Can only compute the' 3771 ' news by comparing this results set to previous' 3772 ' results objects.') 3773 3774 # Handle start, end, periods 3775 # There doesn't seem to be any universal defaults that both (a) make 3776 # sense for all data update combinations, and (b) work with both 3777 # time-invariant and time-varying models. So we require that the user 3778 # specify exactly two of start, end, periods. 3779 if impact_date is not None: 3780 if not (start is None and end is None and periods is None): 3781 raise ValueError('Cannot use the `impact_date` argument in' 3782 ' combination with `start`, `end`, or' 3783 ' `periods`.') 3784 start = impact_date 3785 periods = 1 3786 if start is None and end is None and periods is None: 3787 start = previous.nobs - 1 3788 end = previous.nobs - 1 3789 if int(start is None) + int(end is None) + int(periods is None) != 1: 3790 raise ValueError('Of the three parameters: start, end, and' 3791 ' periods, exactly two must be specified') 3792 # If we have the `periods` object, we need to convert `start`/`end` to 3793 # integers so that we can compute the other one. That's because 3794 # _get_prediction_index doesn't support a `periods` argument 3795 elif start is not None and periods is not None: 3796 start, _, _, _ = self.model._get_prediction_index(start, start) 3797 end = start + (periods - 1) 3798 elif end is not None and periods is not None: 3799 _, end, _, _ = self.model._get_prediction_index(end, end) 3800 start = end - (periods - 1) 3801 elif start is not None and end is not None: 3802 pass 3803 3804 # Get the integer-based start, end and the prediction index 3805 start, end, out_of_sample, prediction_index = ( 3806 updated.model._get_prediction_index(start, end)) 3807 end = end + out_of_sample 3808 3809 # News results will always use Pandas, so if the model's data was not 3810 # from Pandas, we'll create an index, as if the model's data had been 3811 # given a default Pandas index. 3812 if prediction_index is None: 3813 prediction_index = pd.RangeIndex(start=start, stop=end + 1) 3814 3815 # For time-varying models try to create an appended `updated` model 3816 # with NaN values. Do not extend the model if this was already done 3817 # above (i.e. the case that `comparison` was a new dataset), because 3818 # in that case `exog` and `kwargs` should have 3819 # been set with the input `comparison` dataset in mind, and so would be 3820 # useless here. Ultimately, we've already extended `updated` as far 3821 # as we can. So raise an exception in that case with a useful message. 3822 # However, we still want to try to accommodate extending the model here 3823 # if it is possible. 3824 # Note that we do not need to extend time-invariant models, because 3825 # `KalmanSmoother.news` can itself handle any impact dates for 3826 # time-invariant models. 3827 time_varying = not (previous.filter_results.time_invariant or 3828 updated.filter_results.time_invariant) 3829 if time_varying and end >= updated.nobs: 3830 # If we the given `comparison` was a dataset and either `exog` or 3831 # `kwargs` was set, then we assume that we cannot create an updated 3832 # time-varying model (because then we can't tell if `kwargs` and 3833 # `exog` arguments are meant to apply to the `comparison` dataset 3834 # or to this extension) 3835 if comparison_dataset and (exog is not None or len(kwargs) > 0): 3836 if comparison is updated: 3837 raise ValueError('If providing an updated dataset as the' 3838 ' `comparison` with a time-varying model,' 3839 ' then the `end` period cannot be beyond' 3840 ' the end of that updated dataset.') 3841 else: 3842 raise ValueError('If providing an previous dataset as the' 3843 ' `comparison` with a time-varying model,' 3844 ' then the `end` period cannot be beyond' 3845 ' the end of the (updated) results' 3846 ' object.') 3847 3848 # Try to extend `updated` 3849 updated_orig = updated 3850 # TODO: `append` should fix this k_endog=1 issue for us 3851 # TODO: is the + 1 necessary? 3852 if self.model.k_endog > 1: 3853 extra = np.zeros((end - updated.nobs + 1, 3854 self.model.k_endog)) * np.nan 3855 else: 3856 extra = np.zeros((end - updated.nobs + 1,)) * np.nan 3857 updated = updated_orig.append(extra, exog=exog, **kwargs) 3858 3859 # Compute the news 3860 news_results = ( 3861 updated._news_previous_results(previous, start, end + 1, periods)) 3862 3863 if not return_raw: 3864 news_results = NewsResults( 3865 news_results, self, updated, previous, impacted_variable, 3866 tolerance, row_labels=prediction_index) 3867 return news_results 3868 3869 def append(self, endog, exog=None, refit=False, fit_kwargs=None, 3870 copy_initialization=False, **kwargs): 3871 """ 3872 Recreate the results object with new data appended to the original data 3873 3874 Creates a new result object applied to a dataset that is created by 3875 appending new data to the end of the model's original data. The new 3876 results can then be used for analysis or forecasting. 3877 3878 Parameters 3879 ---------- 3880 endog : array_like 3881 New observations from the modeled time-series process. 3882 exog : array_like, optional 3883 New observations of exogenous regressors, if applicable. 3884 refit : bool, optional 3885 Whether to re-fit the parameters, based on the combined dataset. 3886 Default is False (so parameters from the current results object 3887 are used to create the new results object). 3888 copy_initialization : bool, optional 3889 Whether or not to copy the initialization from the current results 3890 set to the new model. Default is False 3891 fit_kwargs : dict, optional 3892 Keyword arguments to pass to `fit` (if `refit=True`) or `filter` / 3893 `smooth`. 3894 copy_initialization : bool, optional 3895 **kwargs 3896 Keyword arguments may be used to modify model specification 3897 arguments when created the new model object. 3898 3899 Returns 3900 ------- 3901 results 3902 Updated Results object, that includes results from both the 3903 original dataset and the new dataset. 3904 3905 Notes 3906 ----- 3907 The `endog` and `exog` arguments to this method must be formatted in 3908 the same way (e.g. Pandas Series versus Numpy array) as were the 3909 `endog` and `exog` arrays passed to the original model. 3910 3911 The `endog` argument to this method should consist of new observations 3912 that occurred directly after the last element of `endog`. For any other 3913 kind of dataset, see the `apply` method. 3914 3915 This method will apply filtering to all of the original data as well 3916 as to the new data. To apply filtering only to the new data (which 3917 can be much faster if the original dataset is large), see the `extend` 3918 method. 3919 3920 See Also 3921 -------- 3922 statsmodels.tsa.statespace.mlemodel.MLEResults.extend 3923 statsmodels.tsa.statespace.mlemodel.MLEResults.apply 3924 3925 Examples 3926 -------- 3927 >>> index = pd.period_range(start='2000', periods=2, freq='A') 3928 >>> original_observations = pd.Series([1.2, 1.5], index=index) 3929 >>> mod = sm.tsa.SARIMAX(original_observations) 3930 >>> res = mod.fit() 3931 >>> print(res.params) 3932 ar.L1 0.9756 3933 sigma2 0.0889 3934 dtype: float64 3935 >>> print(res.fittedvalues) 3936 2000 0.0000 3937 2001 1.1707 3938 Freq: A-DEC, dtype: float64 3939 >>> print(res.forecast(1)) 3940 2002 1.4634 3941 Freq: A-DEC, dtype: float64 3942 3943 >>> new_index = pd.period_range(start='2002', periods=1, freq='A') 3944 >>> new_observations = pd.Series([0.9], index=new_index) 3945 >>> updated_res = res.append(new_observations) 3946 >>> print(updated_res.params) 3947 ar.L1 0.9756 3948 sigma2 0.0889 3949 dtype: float64 3950 >>> print(updated_res.fittedvalues) 3951 2000 0.0000 3952 2001 1.1707 3953 2002 1.4634 3954 Freq: A-DEC, dtype: float64 3955 >>> print(updated_res.forecast(1)) 3956 2003 0.878 3957 Freq: A-DEC, dtype: float64 3958 """ 3959 start = self.nobs 3960 end = self.nobs + len(endog) - 1 3961 _, _, _, append_ix = self.model._get_prediction_index(start, end) 3962 3963 # Check the index of the new data 3964 if isinstance(self.model.data, PandasData): 3965 _check_index(append_ix, endog, '`endog`') 3966 3967 # Concatenate the new data to original data 3968 new_endog = concat([self.model.data.orig_endog, endog], axis=0, 3969 allow_mix=True) 3970 3971 # Handle `exog` 3972 if exog is not None: 3973 _, exog = prepare_exog(exog) 3974 _check_index(append_ix, exog, '`exog`') 3975 3976 new_exog = concat([self.model.data.orig_exog, exog], axis=0, 3977 allow_mix=True) 3978 else: 3979 new_exog = None 3980 3981 # Create a continuous index for the combined data 3982 if isinstance(self.model.data, PandasData): 3983 start = 0 3984 end = len(new_endog) - 1 3985 _, _, _, new_index = self.model._get_prediction_index(start, end) 3986 3987 # Standardize `endog` to have the right index and columns 3988 columns = self.model.endog_names 3989 if not isinstance(columns, list): 3990 columns = [columns] 3991 new_endog = pd.DataFrame(new_endog, index=new_index, 3992 columns=columns) 3993 3994 # Standardize `exog` to have the right index 3995 if new_exog is not None: 3996 new_exog = pd.DataFrame(new_exog, index=new_index, 3997 columns=self.model.exog_names) 3998 3999 if copy_initialization: 4000 res = self.filter_results 4001 init = Initialization( 4002 self.model.k_states, 'known', constant=res.initial_state, 4003 stationary_cov=res.initial_state_cov) 4004 kwargs.setdefault('initialization', init) 4005 4006 mod = self.model.clone(new_endog, exog=new_exog, **kwargs) 4007 res = self._apply(mod, refit=refit, fit_kwargs=fit_kwargs, **kwargs) 4008 4009 return res 4010 4011 def extend(self, endog, exog=None, fit_kwargs=None, **kwargs): 4012 """ 4013 Recreate the results object for new data that extends the original data 4014 4015 Creates a new result object applied to a new dataset that is assumed to 4016 follow directly from the end of the model's original data. The new 4017 results can then be used for analysis or forecasting. 4018 4019 Parameters 4020 ---------- 4021 endog : array_like 4022 New observations from the modeled time-series process. 4023 exog : array_like, optional 4024 New observations of exogenous regressors, if applicable. 4025 fit_kwargs : dict, optional 4026 Keyword arguments to pass to `filter` or `smooth`. 4027 **kwargs 4028 Keyword arguments may be used to modify model specification 4029 arguments when created the new model object. 4030 4031 Returns 4032 ------- 4033 results 4034 Updated Results object, that includes results only for the new 4035 dataset. 4036 4037 See Also 4038 -------- 4039 statsmodels.tsa.statespace.mlemodel.MLEResults.append 4040 statsmodels.tsa.statespace.mlemodel.MLEResults.apply 4041 4042 Notes 4043 ----- 4044 The `endog` argument to this method should consist of new observations 4045 that occurred directly after the last element of the model's original 4046 `endog` array. For any other kind of dataset, see the `apply` method. 4047 4048 This method will apply filtering only to the new data provided by the 4049 `endog` argument, which can be much faster than re-filtering the entire 4050 dataset. However, the returned results object will only have results 4051 for the new data. To retrieve results for both the new data and the 4052 original data, see the `append` method. 4053 4054 Examples 4055 -------- 4056 >>> index = pd.period_range(start='2000', periods=2, freq='A') 4057 >>> original_observations = pd.Series([1.2, 1.5], index=index) 4058 >>> mod = sm.tsa.SARIMAX(original_observations) 4059 >>> res = mod.fit() 4060 >>> print(res.params) 4061 ar.L1 0.9756 4062 sigma2 0.0889 4063 dtype: float64 4064 >>> print(res.fittedvalues) 4065 2000 0.0000 4066 2001 1.1707 4067 Freq: A-DEC, dtype: float64 4068 >>> print(res.forecast(1)) 4069 2002 1.4634 4070 Freq: A-DEC, dtype: float64 4071 4072 >>> new_index = pd.period_range(start='2002', periods=1, freq='A') 4073 >>> new_observations = pd.Series([0.9], index=new_index) 4074 >>> updated_res = res.extend(new_observations) 4075 >>> print(updated_res.params) 4076 ar.L1 0.9756 4077 sigma2 0.0889 4078 dtype: float64 4079 >>> print(updated_res.fittedvalues) 4080 2002 1.4634 4081 Freq: A-DEC, dtype: float64 4082 >>> print(updated_res.forecast(1)) 4083 2003 0.878 4084 Freq: A-DEC, dtype: float64 4085 """ 4086 start = self.nobs 4087 end = self.nobs + len(endog) - 1 4088 _, _, _, extend_ix = self.model._get_prediction_index(start, end) 4089 4090 if isinstance(self.model.data, PandasData): 4091 _check_index(extend_ix, endog, '`endog`') 4092 4093 # Standardize `endog` to have the right index and columns 4094 columns = self.model.endog_names 4095 if not isinstance(columns, list): 4096 columns = [columns] 4097 endog = pd.DataFrame(endog, index=extend_ix, columns=columns) 4098 # Extend the current fit result to additional data 4099 mod = self.model.clone(endog, exog=exog, **kwargs) 4100 mod.ssm.initialization = Initialization( 4101 mod.k_states, 'known', constant=self.predicted_state[..., -1], 4102 stationary_cov=self.predicted_state_cov[..., -1]) 4103 res = self._apply(mod, refit=False, fit_kwargs=fit_kwargs, **kwargs) 4104 4105 return res 4106 4107 def apply(self, endog, exog=None, refit=False, fit_kwargs=None, 4108 copy_initialization=False, **kwargs): 4109 """ 4110 Apply the fitted parameters to new data unrelated to the original data 4111 4112 Creates a new result object using the current fitted parameters, 4113 applied to a completely new dataset that is assumed to be unrelated to 4114 the model's original data. The new results can then be used for 4115 analysis or forecasting. 4116 4117 Parameters 4118 ---------- 4119 endog : array_like 4120 New observations from the modeled time-series process. 4121 exog : array_like, optional 4122 New observations of exogenous regressors, if applicable. 4123 refit : bool, optional 4124 Whether to re-fit the parameters, using the new dataset. 4125 Default is False (so parameters from the current results object 4126 are used to create the new results object). 4127 copy_initialization : bool, optional 4128 Whether or not to copy the initialization from the current results 4129 set to the new model. Default is False 4130 fit_kwargs : dict, optional 4131 Keyword arguments to pass to `fit` (if `refit=True`) or `filter` / 4132 `smooth`. 4133 **kwargs 4134 Keyword arguments may be used to modify model specification 4135 arguments when created the new model object. 4136 4137 Returns 4138 ------- 4139 results 4140 Updated Results object, that includes results only for the new 4141 dataset. 4142 4143 See Also 4144 -------- 4145 statsmodels.tsa.statespace.mlemodel.MLEResults.append 4146 statsmodels.tsa.statespace.mlemodel.MLEResults.apply 4147 4148 Notes 4149 ----- 4150 The `endog` argument to this method should consist of new observations 4151 that are not necessarily related to the original model's `endog` 4152 dataset. For observations that continue that original dataset by follow 4153 directly after its last element, see the `append` and `extend` methods. 4154 4155 Examples 4156 -------- 4157 >>> index = pd.period_range(start='2000', periods=2, freq='A') 4158 >>> original_observations = pd.Series([1.2, 1.5], index=index) 4159 >>> mod = sm.tsa.SARIMAX(original_observations) 4160 >>> res = mod.fit() 4161 >>> print(res.params) 4162 ar.L1 0.9756 4163 sigma2 0.0889 4164 dtype: float64 4165 >>> print(res.fittedvalues) 4166 2000 0.0000 4167 2001 1.1707 4168 Freq: A-DEC, dtype: float64 4169 >>> print(res.forecast(1)) 4170 2002 1.4634 4171 Freq: A-DEC, dtype: float64 4172 4173 >>> new_index = pd.period_range(start='1980', periods=3, freq='A') 4174 >>> new_observations = pd.Series([1.4, 0.3, 1.2], index=new_index) 4175 >>> new_res = res.apply(new_observations) 4176 >>> print(new_res.params) 4177 ar.L1 0.9756 4178 sigma2 0.0889 4179 dtype: float64 4180 >>> print(new_res.fittedvalues) 4181 1980 1.1707 4182 1981 1.3659 4183 1982 0.2927 4184 Freq: A-DEC, dtype: float64 4185 Freq: A-DEC, dtype: float64 4186 >>> print(new_res.forecast(1)) 4187 1983 1.1707 4188 Freq: A-DEC, dtype: float64 4189 """ 4190 mod = self.model.clone(endog, exog=exog, **kwargs) 4191 4192 if copy_initialization: 4193 res = self.filter_results 4194 init = Initialization( 4195 self.model.k_states, 'known', constant=res.initial_state, 4196 stationary_cov=res.initial_state_cov) 4197 mod.ssm.initialization = init 4198 4199 res = self._apply(mod, refit=refit, fit_kwargs=fit_kwargs, **kwargs) 4200 4201 return res 4202 4203 def plot_diagnostics(self, variable=0, lags=10, fig=None, figsize=None, 4204 truncate_endog_names=24, auto_ylims=False, 4205 bartlett_confint=False, acf_kwargs=None): 4206 """ 4207 Diagnostic plots for standardized residuals of one endogenous variable 4208 4209 Parameters 4210 ---------- 4211 variable : int, optional 4212 Index of the endogenous variable for which the diagnostic plots 4213 should be created. Default is 0. 4214 lags : int, optional 4215 Number of lags to include in the correlogram. Default is 10. 4216 fig : Figure, optional 4217 If given, subplots are created in this figure instead of in a new 4218 figure. Note that the 2x2 grid will be created in the provided 4219 figure using `fig.add_subplot()`. 4220 figsize : tuple, optional 4221 If a figure is created, this argument allows specifying a size. 4222 The tuple is (width, height). 4223 auto_ylims : bool, optional 4224 If True, adjusts automatically the y-axis limits to ACF values. 4225 bartlett_confint : bool, default True 4226 Confidence intervals for ACF values are generally placed at 2 4227 standard errors around r_k. The formula used for standard error 4228 depends upon the situation. If the autocorrelations are being used 4229 to test for randomness of residuals as part of the ARIMA routine, 4230 the standard errors are determined assuming the residuals are white 4231 noise. The approximate formula for any lag is that standard error 4232 of each r_k = 1/sqrt(N). See section 9.4 of [1] for more details on 4233 the 1/sqrt(N) result. For more elementary discussion, see section 4234 5.3.2 in [2]. 4235 For the ACF of raw data, the standard error at a lag k is 4236 found as if the right model was an MA(k-1). This allows the 4237 possible interpretation that if all autocorrelations past a 4238 certain lag are within the limits, the model might be an MA of 4239 order defined by the last significant autocorrelation. In this 4240 case, a moving average model is assumed for the data and the 4241 standard errors for the confidence intervals should be 4242 generated using Bartlett's formula. For more details on 4243 Bartlett formula result, see section 7.2 in [1].+ 4244 acf_kwargs : dict, optional 4245 Optional dictionary of keyword arguments that are directly passed 4246 on to the correlogram Matplotlib plot produced by plot_acf(). 4247 4248 Returns 4249 ------- 4250 Figure 4251 Figure instance with diagnostic plots 4252 4253 See Also 4254 -------- 4255 statsmodels.graphics.gofplots.qqplot 4256 statsmodels.graphics.tsaplots.plot_acf 4257 4258 Notes 4259 ----- 4260 Produces a 2x2 plot grid with the following plots (ordered clockwise 4261 from top left): 4262 4263 1. Standardized residuals over time 4264 2. Histogram plus estimated density of standardized residuals, along 4265 with a Normal(0,1) density plotted for reference. 4266 3. Normal Q-Q plot, with Normal reference line. 4267 4. Correlogram 4268 4269 References 4270 ---------- 4271 [1] Brockwell and Davis, 1987. Time Series Theory and Methods 4272 [2] Brockwell and Davis, 2010. Introduction to Time Series and 4273 Forecasting, 2nd edition. 4274 """ 4275 from statsmodels.graphics.utils import _import_mpl, create_mpl_fig 4276 _import_mpl() 4277 fig = create_mpl_fig(fig, figsize) 4278 # Eliminate residuals associated with burned or diffuse likelihoods 4279 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse) 4280 4281 # If given a variable name, find the index 4282 if isinstance(variable, str): 4283 variable = self.model.endog_names.index(variable) 4284 4285 # Get residuals 4286 if hasattr(self.data, 'dates') and self.data.dates is not None: 4287 ix = self.data.dates[d:] 4288 else: 4289 ix = np.arange(self.nobs - d) 4290 resid = pd.Series( 4291 self.filter_results.standardized_forecasts_error[variable, d:], 4292 index=ix) 4293 4294 if resid.shape[0] < max(d, lags): 4295 raise ValueError( 4296 "Length of endogenous variable must be larger the the number " 4297 "of lags used in the model and the number of observations " 4298 "burned in the log-likelihood calculation." 4299 ) 4300 4301 # Top-left: residuals vs time 4302 ax = fig.add_subplot(221) 4303 resid.dropna().plot(ax=ax) 4304 ax.hlines(0, ix[0], ix[-1], alpha=0.5) 4305 ax.set_xlim(ix[0], ix[-1]) 4306 name = self.model.endog_names[variable] 4307 if len(name) > truncate_endog_names: 4308 name = name[:truncate_endog_names - 3] + '...' 4309 ax.set_title(f'Standardized residual for "{name}"') 4310 4311 # Top-right: histogram, Gaussian kernel density, Normal density 4312 # Can only do histogram and Gaussian kernel density on the non-null 4313 # elements 4314 resid_nonmissing = resid.dropna() 4315 ax = fig.add_subplot(222) 4316 4317 ax.hist(resid_nonmissing, density=True, label='Hist', 4318 edgecolor='#FFFFFF') 4319 4320 from scipy.stats import gaussian_kde, norm 4321 kde = gaussian_kde(resid_nonmissing) 4322 xlim = (-1.96*2, 1.96*2) 4323 x = np.linspace(xlim[0], xlim[1]) 4324 ax.plot(x, kde(x), label='KDE') 4325 ax.plot(x, norm.pdf(x), label='N(0,1)') 4326 ax.set_xlim(xlim) 4327 ax.legend() 4328 ax.set_title('Histogram plus estimated density') 4329 4330 # Bottom-left: QQ plot 4331 ax = fig.add_subplot(223) 4332 from statsmodels.graphics.gofplots import qqplot 4333 qqplot(resid_nonmissing, line='s', ax=ax) 4334 ax.set_title('Normal Q-Q') 4335 4336 # Bottom-right: Correlogram 4337 ax = fig.add_subplot(224) 4338 from statsmodels.graphics.tsaplots import plot_acf 4339 4340 if acf_kwargs is None: 4341 acf_kwargs = {} 4342 plot_acf(resid, ax=ax, lags=lags, auto_ylims=auto_ylims, 4343 bartlett_confint=bartlett_confint, **acf_kwargs) 4344 ax.set_title('Correlogram') 4345 4346 return fig 4347 4348 def summary(self, alpha=.05, start=None, title=None, model_name=None, 4349 display_params=True, display_diagnostics=True, 4350 truncate_endog_names=None, display_max_endog=None, 4351 extra_top_left=None, extra_top_right=None): 4352 """ 4353 Summarize the Model 4354 4355 Parameters 4356 ---------- 4357 alpha : float, optional 4358 Significance level for the confidence intervals. Default is 0.05. 4359 start : int, optional 4360 Integer of the start observation. Default is 0. 4361 model_name : str 4362 The name of the model used. Default is to use model class name. 4363 4364 Returns 4365 ------- 4366 summary : Summary instance 4367 This holds the summary table and text, which can be printed or 4368 converted to various output formats. 4369 4370 See Also 4371 -------- 4372 statsmodels.iolib.summary.Summary 4373 """ 4374 from statsmodels.iolib.summary import Summary 4375 from statsmodels.iolib.table import SimpleTable 4376 from statsmodels.iolib.tableformatting import fmt_params 4377 4378 # Model specification results 4379 model = self.model 4380 if title is None: 4381 title = 'Statespace Model Results' 4382 4383 if start is None: 4384 start = 0 4385 if self.model._index_dates: 4386 ix = self.model._index 4387 d = ix[start] 4388 sample = ['%02d-%02d-%02d' % (d.month, d.day, d.year)] 4389 d = ix[-1] 4390 sample += ['- ' + '%02d-%02d-%02d' % (d.month, d.day, d.year)] 4391 else: 4392 sample = [str(start), ' - ' + str(self.nobs)] 4393 4394 # Standardize the model name as a list of str 4395 if model_name is None: 4396 model_name = model.__class__.__name__ 4397 4398 # Truncate endog names 4399 if truncate_endog_names is None: 4400 truncate_endog_names = False if self.model.k_endog == 1 else 24 4401 endog_names = self.model.endog_names 4402 if not isinstance(endog_names, list): 4403 endog_names = [endog_names] 4404 endog_names = [str(name) for name in endog_names] 4405 if truncate_endog_names is not False: 4406 n = truncate_endog_names 4407 endog_names = [name if len(name) <= n else name[:n] + '...' 4408 for name in endog_names] 4409 4410 # Shorten the endog name list if applicable 4411 if display_max_endog is None: 4412 display_max_endog = np.inf 4413 yname = None 4414 if self.model.k_endog > display_max_endog: 4415 k = self.model.k_endog - 1 4416 yname = '"' + endog_names[0] + f'", and {k} more' 4417 4418 # Create the tables 4419 if not isinstance(model_name, list): 4420 model_name = [model_name] 4421 4422 top_left = [('Dep. Variable:', None)] 4423 top_left.append(('Model:', [model_name[0]])) 4424 for i in range(1, len(model_name)): 4425 top_left.append(('', ['+ ' + model_name[i]])) 4426 top_left += [ 4427 ('Date:', None), 4428 ('Time:', None), 4429 ('Sample:', [sample[0]]), 4430 ('', [sample[1]]) 4431 ] 4432 4433 top_right = [ 4434 ('No. Observations:', [self.nobs]), 4435 ('Log Likelihood', ["%#5.3f" % self.llf]), 4436 ] 4437 if hasattr(self, 'rsquared'): 4438 top_right.append(('R-squared:', ["%#8.3f" % self.rsquared])) 4439 top_right += [ 4440 ('AIC', ["%#5.3f" % self.aic]), 4441 ('BIC', ["%#5.3f" % self.bic]), 4442 ('HQIC', ["%#5.3f" % self.hqic])] 4443 if (self.filter_results is not None and 4444 self.filter_results.filter_concentrated): 4445 top_right.append(('Scale', ["%#5.3f" % self.scale])) 4446 4447 if hasattr(self, 'cov_type'): 4448 cov_type = self.cov_type 4449 if cov_type == 'none': 4450 cov_type = 'Not computed' 4451 top_left.append(('Covariance Type:', [cov_type])) 4452 4453 if extra_top_left is not None: 4454 top_left += extra_top_left 4455 if extra_top_right is not None: 4456 top_right += extra_top_right 4457 4458 summary = Summary() 4459 summary.add_table_2cols(self, gleft=top_left, gright=top_right, 4460 title=title, yname=yname) 4461 table_ix = 1 4462 if len(self.params) > 0 and display_params: 4463 summary.add_table_params(self, alpha=alpha, 4464 xname=self.param_names, use_t=False) 4465 table_ix += 1 4466 4467 # Diagnostic tests results 4468 if display_diagnostics: 4469 try: 4470 het = self.test_heteroskedasticity(method='breakvar') 4471 except Exception: # FIXME: catch something specific 4472 het = np.zeros((self.model.k_endog, 2)) * np.nan 4473 try: 4474 lb = self.test_serial_correlation(method='ljungbox', lags=[1]) 4475 except Exception: # FIXME: catch something specific 4476 lb = np.zeros((self.model.k_endog, 2, 1)) * np.nan 4477 try: 4478 jb = self.test_normality(method='jarquebera') 4479 except Exception: # FIXME: catch something specific 4480 jb = np.zeros((self.model.k_endog, 4)) * np.nan 4481 4482 if self.model.k_endog <= display_max_endog: 4483 format_str = lambda array: [ # noqa:E731 4484 ', '.join(['{0:.2f}'.format(i) for i in array]) 4485 ] 4486 diagn_left = [ 4487 ('Ljung-Box (L1) (Q):', format_str(lb[:, 0, -1])), 4488 ('Prob(Q):', format_str(lb[:, 1, -1])), 4489 ('Heteroskedasticity (H):', format_str(het[:, 0])), 4490 ('Prob(H) (two-sided):', format_str(het[:, 1]))] 4491 4492 diagn_right = [('Jarque-Bera (JB):', format_str(jb[:, 0])), 4493 ('Prob(JB):', format_str(jb[:, 1])), 4494 ('Skew:', format_str(jb[:, 2])), 4495 ('Kurtosis:', format_str(jb[:, 3])) 4496 ] 4497 4498 summary.add_table_2cols(self, gleft=diagn_left, 4499 gright=diagn_right, title="") 4500 else: 4501 columns = ['LjungBox\n(L1) (Q)', 'Prob(Q)', 4502 'Het.(H)', 'Prob(H)', 4503 'Jarque\nBera(JB)', 'Prob(JB)', 'Skew', 'Kurtosis'] 4504 data = pd.DataFrame( 4505 np.c_[lb[:, :2, -1], het[:, :2], jb[:, :4]], 4506 index=endog_names, columns=columns).applymap( 4507 lambda num: '' if pd.isnull(num) else '%.2f' % num) 4508 data.index.name = 'Residual of\nDep. variable' 4509 data = data.reset_index() 4510 4511 params_data = data.values 4512 params_header = data.columns.tolist() 4513 params_stubs = None 4514 4515 title = 'Residual diagnostics:' 4516 table = SimpleTable( 4517 params_data, params_header, params_stubs, 4518 txt_fmt=fmt_params, title=title) 4519 summary.tables.insert(table_ix, table) 4520 4521 # Add warnings/notes, added to text format only 4522 etext = [] 4523 if hasattr(self, 'cov_type') and 'description' in self.cov_kwds: 4524 etext.append(self.cov_kwds['description']) 4525 if self._rank < (len(self.params) - len(self.fixed_params)): 4526 cov_params = self.cov_params() 4527 if len(self.fixed_params) > 0: 4528 mask = np.ix_(self._free_params_index, self._free_params_index) 4529 cov_params = cov_params[mask] 4530 etext.append("Covariance matrix is singular or near-singular," 4531 " with condition number %6.3g. Standard errors may be" 4532 " unstable." % _safe_cond(cov_params)) 4533 4534 if etext: 4535 etext = ["[{0}] {1}".format(i + 1, text) 4536 for i, text in enumerate(etext)] 4537 etext.insert(0, "Warnings:") 4538 summary.add_extra_txt(etext) 4539 4540 return summary 4541 4542 4543class MLEResultsWrapper(wrap.ResultsWrapper): 4544 _attrs = { 4545 'zvalues': 'columns', 4546 'cov_params_approx': 'cov', 4547 'cov_params_default': 'cov', 4548 'cov_params_oim': 'cov', 4549 'cov_params_opg': 'cov', 4550 'cov_params_robust': 'cov', 4551 'cov_params_robust_approx': 'cov', 4552 'cov_params_robust_oim': 'cov', 4553 } 4554 _wrap_attrs = wrap.union_dicts(tsbase.TimeSeriesResultsWrapper._wrap_attrs, 4555 _attrs) 4556 _methods = { 4557 'forecast': 'dates', 4558 'impulse_responses': 'ynames' 4559 } 4560 _wrap_methods = wrap.union_dicts( 4561 tsbase.TimeSeriesResultsWrapper._wrap_methods, _methods) 4562wrap.populate_wrapper(MLEResultsWrapper, MLEResults) # noqa:E305 4563 4564 4565class PredictionResults(pred.PredictionResults): 4566 """ 4567 4568 Parameters 4569 ---------- 4570 prediction_results : kalman_filter.PredictionResults instance 4571 Results object from prediction after fitting or filtering a state space 4572 model. 4573 row_labels : iterable 4574 Row labels for the predicted data. 4575 4576 Attributes 4577 ---------- 4578 """ 4579 def __init__(self, model, prediction_results, row_labels=None): 4580 if model.model.k_endog == 1: 4581 endog = pd.Series(prediction_results.endog[0], 4582 name=model.model.endog_names) 4583 else: 4584 endog = pd.DataFrame(prediction_results.endog.T, 4585 columns=model.model.endog_names) 4586 self.model = Bunch(data=model.data.__class__( 4587 endog=endog, predict_dates=row_labels)) 4588 self.prediction_results = prediction_results 4589 4590 # Get required values 4591 k_endog, nobs = prediction_results.endog.shape 4592 if not prediction_results.results.memory_no_forecast_mean: 4593 predicted_mean = self.prediction_results.forecasts 4594 else: 4595 predicted_mean = np.zeros((k_endog, nobs)) * np.nan 4596 4597 if predicted_mean.shape[0] == 1: 4598 predicted_mean = predicted_mean[0, :] 4599 else: 4600 predicted_mean = predicted_mean.transpose() 4601 4602 if not prediction_results.results.memory_no_forecast_cov: 4603 var_pred_mean = self.prediction_results.forecasts_error_cov 4604 else: 4605 var_pred_mean = np.zeros((k_endog, k_endog, nobs)) * np.nan 4606 4607 if var_pred_mean.shape[0] == 1: 4608 var_pred_mean = var_pred_mean[0, 0, :] 4609 else: 4610 var_pred_mean = var_pred_mean.transpose() 4611 4612 # Initialize 4613 super(PredictionResults, self).__init__(predicted_mean, var_pred_mean, 4614 dist='norm', 4615 row_labels=row_labels) 4616 4617 @property 4618 def se_mean(self): 4619 # Replace negative values with np.nan to avoid a RuntimeWarning 4620 var_pred_mean = self.var_pred_mean.copy() 4621 var_pred_mean[var_pred_mean < 0] = np.nan 4622 if var_pred_mean.ndim == 1: 4623 se_mean = np.sqrt(var_pred_mean) 4624 else: 4625 se_mean = np.sqrt(var_pred_mean.T.diagonal()) 4626 return se_mean 4627 4628 def conf_int(self, method='endpoint', alpha=0.05, **kwds): 4629 # TODO: this performs metadata wrapping, and that should be handled 4630 # by attach_* methods. However, they do not currently support 4631 # this use case. 4632 _use_pandas = self._use_pandas 4633 self._use_pandas = False 4634 conf_int = super(PredictionResults, self).conf_int(alpha, **kwds) 4635 self._use_pandas = _use_pandas 4636 4637 # Create a dataframe 4638 if self._row_labels is not None: 4639 conf_int = pd.DataFrame(conf_int, index=self.row_labels) 4640 4641 # Attach the endog names 4642 ynames = self.model.data.ynames 4643 if not type(ynames) == list: 4644 ynames = [ynames] 4645 names = (['lower {0}'.format(name) for name in ynames] + 4646 ['upper {0}'.format(name) for name in ynames]) 4647 conf_int.columns = names 4648 4649 return conf_int 4650 4651 def summary_frame(self, endog=0, alpha=0.05): 4652 # TODO: finish and cleanup 4653 # import pandas as pd 4654 # ci_obs = self.conf_int(alpha=alpha, obs=True) # need to split 4655 ci_mean = np.asarray(self.conf_int(alpha=alpha)) 4656 _use_pandas = self._use_pandas 4657 self._use_pandas = False 4658 to_include = {} 4659 if self.predicted_mean.ndim == 1: 4660 yname = self.model.data.ynames 4661 to_include['mean'] = self.predicted_mean 4662 to_include['mean_se'] = self.se_mean 4663 k_endog = 1 4664 else: 4665 yname = self.model.data.ynames[endog] 4666 to_include['mean'] = self.predicted_mean[:, endog] 4667 to_include['mean_se'] = self.se_mean[:, endog] 4668 k_endog = self.predicted_mean.shape[1] 4669 self._use_pandas = _use_pandas 4670 to_include['mean_ci_lower'] = ci_mean[:, endog] 4671 to_include['mean_ci_upper'] = ci_mean[:, k_endog + endog] 4672 4673 # pandas dict does not handle 2d_array 4674 # data = np.column_stack(list(to_include.values())) 4675 # names = .... 4676 res = pd.DataFrame(to_include, index=self._row_labels, 4677 columns=list(to_include.keys())) 4678 res.columns.name = yname 4679 return res 4680 4681 4682class PredictionResultsWrapper(wrap.ResultsWrapper): 4683 _attrs = { 4684 'predicted_mean': 'dates', 4685 'se_mean': 'dates', 4686 't_values': 'dates', 4687 } 4688 _wrap_attrs = wrap.union_dicts(_attrs) 4689 4690 _methods = {} 4691 _wrap_methods = wrap.union_dicts(_methods) 4692wrap.populate_wrapper(PredictionResultsWrapper, PredictionResults) # noqa:E305 4693