1""" 2Univariate structural time series models 3 4TODO: tests: "** On entry to DLASCL, parameter number 4 had an illegal value" 5 6Author: Chad Fulton 7License: Simplified-BSD 8""" 9 10from warnings import warn 11 12import numpy as np 13 14from statsmodels.compat.pandas import Appender 15from statsmodels.tools.tools import Bunch 16from statsmodels.tools.sm_exceptions import OutputWarning, SpecificationWarning 17import statsmodels.base.wrapper as wrap 18 19from statsmodels.tsa.filters.hp_filter import hpfilter 20from statsmodels.tsa.tsatools import lagmat 21 22from .mlemodel import MLEModel, MLEResults, MLEResultsWrapper 23from .initialization import Initialization 24from .tools import ( 25 companion_matrix, constrain_stationary_univariate, 26 unconstrain_stationary_univariate, prepare_exog) 27 28_mask_map = { 29 1: 'irregular', 30 2: 'fixed intercept', 31 3: 'deterministic constant', 32 6: 'random walk', 33 7: 'local level', 34 8: 'fixed slope', 35 11: 'deterministic trend', 36 14: 'random walk with drift', 37 15: 'local linear deterministic trend', 38 31: 'local linear trend', 39 27: 'smooth trend', 40 26: 'random trend' 41} 42 43 44class UnobservedComponents(MLEModel): 45 r""" 46 Univariate unobserved components time series model 47 48 These are also known as structural time series models, and decompose a 49 (univariate) time series into trend, seasonal, cyclical, and irregular 50 components. 51 52 Parameters 53 ---------- 54 55 endog : array_like 56 The observed time-series process :math:`y` 57 level : {bool, str}, optional 58 Whether or not to include a level component. Default is False. Can also 59 be a string specification of the level / trend component; see Notes 60 for available model specification strings. 61 trend : bool, optional 62 Whether or not to include a trend component. Default is False. If True, 63 `level` must also be True. 64 seasonal : {int, None}, optional 65 The period of the seasonal component, if any. Default is None. 66 freq_seasonal : {list[dict], None}, optional. 67 Whether (and how) to model seasonal component(s) with trig. functions. 68 If specified, there is one dictionary for each frequency-domain 69 seasonal component. Each dictionary must have the key, value pair for 70 'period' -- integer and may have a key, value pair for 71 'harmonics' -- integer. If 'harmonics' is not specified in any of the 72 dictionaries, it defaults to the floor of period/2. 73 cycle : bool, optional 74 Whether or not to include a cycle component. Default is False. 75 autoregressive : {int, None}, optional 76 The order of the autoregressive component. Default is None. 77 exog : {array_like, None}, optional 78 Exogenous variables. 79 irregular : bool, optional 80 Whether or not to include an irregular component. Default is False. 81 stochastic_level : bool, optional 82 Whether or not any level component is stochastic. Default is False. 83 stochastic_trend : bool, optional 84 Whether or not any trend component is stochastic. Default is False. 85 stochastic_seasonal : bool, optional 86 Whether or not any seasonal component is stochastic. Default is True. 87 stochastic_freq_seasonal : list[bool], optional 88 Whether or not each seasonal component(s) is (are) stochastic. Default 89 is True for each component. The list should be of the same length as 90 freq_seasonal. 91 stochastic_cycle : bool, optional 92 Whether or not any cycle component is stochastic. Default is False. 93 damped_cycle : bool, optional 94 Whether or not the cycle component is damped. Default is False. 95 cycle_period_bounds : tuple, optional 96 A tuple with lower and upper allowed bounds for the period of the 97 cycle. If not provided, the following default bounds are used: 98 (1) if no date / time information is provided, the frequency is 99 constrained to be between zero and :math:`\pi`, so the period is 100 constrained to be in [0.5, infinity]. 101 (2) If the date / time information is provided, the default bounds 102 allow the cyclical component to be between 1.5 and 12 years; depending 103 on the frequency of the endogenous variable, this will imply different 104 specific bounds. 105 mle_regression : bool, optional 106 Whether or not to estimate regression coefficients by maximum likelihood 107 as one of hyperparameters. Default is True. 108 If False, the regression coefficients are estimated by recursive OLS, 109 included in the state vector. 110 use_exact_diffuse : bool, optional 111 Whether or not to use exact diffuse initialization for non-stationary 112 states. Default is False (in which case approximate diffuse 113 initialization is used). 114 115 See Also 116 -------- 117 statsmodels.tsa.statespace.structural.UnobservedComponentsResults 118 statsmodels.tsa.statespace.mlemodel.MLEModel 119 120 Notes 121 ----- 122 123 These models take the general form (see [1]_ Chapter 3.2 for all details) 124 125 .. math:: 126 127 y_t = \mu_t + \gamma_t + c_t + \varepsilon_t 128 129 where :math:`y_t` refers to the observation vector at time :math:`t`, 130 :math:`\mu_t` refers to the trend component, :math:`\gamma_t` refers to the 131 seasonal component, :math:`c_t` refers to the cycle, and 132 :math:`\varepsilon_t` is the irregular. The modeling details of these 133 components are given below. 134 135 **Trend** 136 137 The trend component is a dynamic extension of a regression model that 138 includes an intercept and linear time-trend. It can be written: 139 140 .. math:: 141 142 \mu_t = \mu_{t-1} + \beta_{t-1} + \eta_{t-1} \\ 143 \beta_t = \beta_{t-1} + \zeta_{t-1} 144 145 where the level is a generalization of the intercept term that can 146 dynamically vary across time, and the trend is a generalization of the 147 time-trend such that the slope can dynamically vary across time. 148 149 Here :math:`\eta_t \sim N(0, \sigma_\eta^2)` and 150 :math:`\zeta_t \sim N(0, \sigma_\zeta^2)`. 151 152 For both elements (level and trend), we can consider models in which: 153 154 - The element is included vs excluded (if the trend is included, there must 155 also be a level included). 156 - The element is deterministic vs stochastic (i.e. whether or not the 157 variance on the error term is confined to be zero or not) 158 159 The only additional parameters to be estimated via MLE are the variances of 160 any included stochastic components. 161 162 The level/trend components can be specified using the boolean keyword 163 arguments `level`, `stochastic_level`, `trend`, etc., or all at once as a 164 string argument to `level`. The following table shows the available 165 model specifications: 166 167 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 168 | Model name | Full string syntax | Abbreviated syntax | Model | 169 +==================================+======================================+====================+==================================================+ 170 | No trend | `'irregular'` | `'ntrend'` | .. math:: y_t = \varepsilon_t | 171 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 172 | Fixed intercept | `'fixed intercept'` | | .. math:: y_t = \mu | 173 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 174 | Deterministic constant | `'deterministic constant'` | `'dconstant'` | .. math:: y_t = \mu + \varepsilon_t | 175 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 176 | Local level | `'local level'` | `'llevel'` | .. math:: y_t &= \mu_t + \varepsilon_t \\ | 177 | | | | \mu_t &= \mu_{t-1} + \eta_t | 178 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 179 | Random walk | `'random walk'` | `'rwalk'` | .. math:: y_t &= \mu_t \\ | 180 | | | | \mu_t &= \mu_{t-1} + \eta_t | 181 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 182 | Fixed slope | `'fixed slope'` | | .. math:: y_t &= \mu_t \\ | 183 | | | | \mu_t &= \mu_{t-1} + \beta | 184 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 185 | Deterministic trend | `'deterministic trend'` | `'dtrend'` | .. math:: y_t &= \mu_t + \varepsilon_t \\ | 186 | | | | \mu_t &= \mu_{t-1} + \beta | 187 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 188 | Local linear deterministic trend | `'local linear deterministic trend'` | `'lldtrend'` | .. math:: y_t &= \mu_t + \varepsilon_t \\ | 189 | | | | \mu_t &= \mu_{t-1} + \beta + \eta_t | 190 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 191 | Random walk with drift | `'random walk with drift'` | `'rwdrift'` | .. math:: y_t &= \mu_t \\ | 192 | | | | \mu_t &= \mu_{t-1} + \beta + \eta_t | 193 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 194 | Local linear trend | `'local linear trend'` | `'lltrend'` | .. math:: y_t &= \mu_t + \varepsilon_t \\ | 195 | | | | \mu_t &= \mu_{t-1} + \beta_{t-1} + \eta_t \\ | 196 | | | | \beta_t &= \beta_{t-1} + \zeta_t | 197 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 198 | Smooth trend | `'smooth trend'` | `'strend'` | .. math:: y_t &= \mu_t + \varepsilon_t \\ | 199 | | | | \mu_t &= \mu_{t-1} + \beta_{t-1} \\ | 200 | | | | \beta_t &= \beta_{t-1} + \zeta_t | 201 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 202 | Random trend | `'random trend'` | `'rtrend'` | .. math:: y_t &= \mu_t \\ | 203 | | | | \mu_t &= \mu_{t-1} + \beta_{t-1} \\ | 204 | | | | \beta_t &= \beta_{t-1} + \zeta_t | 205 +----------------------------------+--------------------------------------+--------------------+--------------------------------------------------+ 206 207 Following the fitting of the model, the unobserved level and trend 208 component time series are available in the results class in the 209 `level` and `trend` attributes, respectively. 210 211 **Seasonal (Time-domain)** 212 213 The seasonal component is modeled as: 214 215 .. math:: 216 217 \gamma_t = - \sum_{j=1}^{s-1} \gamma_{t+1-j} + \omega_t \\ 218 \omega_t \sim N(0, \sigma_\omega^2) 219 220 The periodicity (number of seasons) is s, and the defining character is 221 that (without the error term), the seasonal components sum to zero across 222 one complete cycle. The inclusion of an error term allows the seasonal 223 effects to vary over time (if this is not desired, :math:`\sigma_\omega^2` 224 can be set to zero using the `stochastic_seasonal=False` keyword argument). 225 226 This component results in one parameter to be selected via maximum 227 likelihood: :math:`\sigma_\omega^2`, and one parameter to be chosen, the 228 number of seasons `s`. 229 230 Following the fitting of the model, the unobserved seasonal component 231 time series is available in the results class in the `seasonal` 232 attribute. 233 234 **Frequency-domain Seasonal** 235 236 Each frequency-domain seasonal component is modeled as: 237 238 .. math:: 239 240 \gamma_t & = \sum_{j=1}^h \gamma_{j, t} \\ 241 \gamma_{j, t+1} & = \gamma_{j, t}\cos(\lambda_j) 242 + \gamma^{*}_{j, t}\sin(\lambda_j) + \omega_{j,t} \\ 243 \gamma^{*}_{j, t+1} & = -\gamma^{(1)}_{j, t}\sin(\lambda_j) 244 + \gamma^{*}_{j, t}\cos(\lambda_j) 245 + \omega^{*}_{j, t}, \\ 246 \omega^{*}_{j, t}, \omega_{j, t} & \sim N(0, \sigma_{\omega^2}) \\ 247 \lambda_j & = \frac{2 \pi j}{s} 248 249 where j ranges from 1 to h. 250 251 The periodicity (number of "seasons" in a "year") is s and the number of 252 harmonics is h. Note that h is configurable to be less than s/2, but 253 s/2 harmonics is sufficient to fully model all seasonal variations of 254 periodicity s. Like the time domain seasonal term (cf. Seasonal section, 255 above), the inclusion of the error terms allows for the seasonal effects to 256 vary over time. The argument stochastic_freq_seasonal can be used to set 257 one or more of the seasonal components of this type to be non-random, 258 meaning they will not vary over time. 259 260 This component results in one parameter to be fitted using maximum 261 likelihood: :math:`\sigma_{\omega^2}`, and up to two parameters to be 262 chosen, the number of seasons s and optionally the number of harmonics 263 h, with :math:`1 \leq h \leq \lfloor s/2 \rfloor`. 264 265 After fitting the model, each unobserved seasonal component modeled in the 266 frequency domain is available in the results class in the `freq_seasonal` 267 attribute. 268 269 **Cycle** 270 271 The cyclical component is intended to capture cyclical effects at time 272 frames much longer than captured by the seasonal component. For example, 273 in economics the cyclical term is often intended to capture the business 274 cycle, and is then expected to have a period between "1.5 and 12 years" 275 (see Durbin and Koopman). 276 277 .. math:: 278 279 c_{t+1} & = \rho_c (\tilde c_t \cos \lambda_c t 280 + \tilde c_t^* \sin \lambda_c) + 281 \tilde \omega_t \\ 282 c_{t+1}^* & = \rho_c (- \tilde c_t \sin \lambda_c t + 283 \tilde c_t^* \cos \lambda_c) + 284 \tilde \omega_t^* \\ 285 286 where :math:`\omega_t, \tilde \omega_t iid N(0, \sigma_{\tilde \omega}^2)` 287 288 The parameter :math:`\lambda_c` (the frequency of the cycle) is an 289 additional parameter to be estimated by MLE. 290 291 If the cyclical effect is stochastic (`stochastic_cycle=True`), then there 292 is another parameter to estimate (the variance of the error term - note 293 that both of the error terms here share the same variance, but are assumed 294 to have independent draws). 295 296 If the cycle is damped (`damped_cycle=True`), then there is a third 297 parameter to estimate, :math:`\rho_c`. 298 299 In order to achieve cycles with the appropriate frequencies, bounds are 300 imposed on the parameter :math:`\lambda_c` in estimation. These can be 301 controlled via the keyword argument `cycle_period_bounds`, which, if 302 specified, must be a tuple of bounds on the **period** `(lower, upper)`. 303 The bounds on the frequency are then calculated from those bounds. 304 305 The default bounds, if none are provided, are selected in the following 306 way: 307 308 1. If no date / time information is provided, the frequency is 309 constrained to be between zero and :math:`\pi`, so the period is 310 constrained to be in :math:`[0.5, \infty]`. 311 2. If the date / time information is provided, the default bounds 312 allow the cyclical component to be between 1.5 and 12 years; depending 313 on the frequency of the endogenous variable, this will imply different 314 specific bounds. 315 316 Following the fitting of the model, the unobserved cyclical component 317 time series is available in the results class in the `cycle` 318 attribute. 319 320 **Irregular** 321 322 The irregular components are independent and identically distributed (iid): 323 324 .. math:: 325 326 \varepsilon_t \sim N(0, \sigma_\varepsilon^2) 327 328 **Autoregressive Irregular** 329 330 An autoregressive component (often used as a replacement for the white 331 noise irregular term) can be specified as: 332 333 .. math:: 334 335 \varepsilon_t = \rho(L) \varepsilon_{t-1} + \epsilon_t \\ 336 \epsilon_t \sim N(0, \sigma_\epsilon^2) 337 338 In this case, the AR order is specified via the `autoregressive` keyword, 339 and the autoregressive coefficients are estimated. 340 341 Following the fitting of the model, the unobserved autoregressive component 342 time series is available in the results class in the `autoregressive` 343 attribute. 344 345 **Regression effects** 346 347 Exogenous regressors can be pass to the `exog` argument. The regression 348 coefficients will be estimated by maximum likelihood unless 349 `mle_regression=False`, in which case the regression coefficients will be 350 included in the state vector where they are essentially estimated via 351 recursive OLS. 352 353 If the regression_coefficients are included in the state vector, the 354 recursive estimates are available in the results class in the 355 `regression_coefficients` attribute. 356 357 References 358 ---------- 359 .. [1] Durbin, James, and Siem Jan Koopman. 2012. 360 Time Series Analysis by State Space Methods: Second Edition. 361 Oxford University Press. 362 """ # noqa:E501 363 364 def __init__(self, endog, level=False, trend=False, seasonal=None, 365 freq_seasonal=None, cycle=False, autoregressive=None, 366 exog=None, irregular=False, 367 stochastic_level=False, 368 stochastic_trend=False, 369 stochastic_seasonal=True, 370 stochastic_freq_seasonal=None, 371 stochastic_cycle=False, 372 damped_cycle=False, cycle_period_bounds=None, 373 mle_regression=True, use_exact_diffuse=False, 374 **kwargs): 375 376 # Model options 377 self.level = level 378 self.trend = trend 379 self.seasonal_periods = seasonal if seasonal is not None else 0 380 self.seasonal = self.seasonal_periods > 0 381 if freq_seasonal: 382 self.freq_seasonal_periods = [d['period'] for d in freq_seasonal] 383 self.freq_seasonal_harmonics = [d.get( 384 'harmonics', int(np.floor(d['period'] / 2))) for 385 d in freq_seasonal] 386 else: 387 self.freq_seasonal_periods = [] 388 self.freq_seasonal_harmonics = [] 389 self.freq_seasonal = any(x > 0 for x in self.freq_seasonal_periods) 390 self.cycle = cycle 391 self.ar_order = autoregressive if autoregressive is not None else 0 392 self.autoregressive = self.ar_order > 0 393 self.irregular = irregular 394 395 self.stochastic_level = stochastic_level 396 self.stochastic_trend = stochastic_trend 397 self.stochastic_seasonal = stochastic_seasonal 398 if stochastic_freq_seasonal is None: 399 self.stochastic_freq_seasonal = [True] * len( 400 self.freq_seasonal_periods) 401 else: 402 if len(stochastic_freq_seasonal) != len(freq_seasonal): 403 raise ValueError( 404 "Length of stochastic_freq_seasonal must equal length" 405 " of freq_seasonal: {!r} vs {!r}".format( 406 len(stochastic_freq_seasonal), len(freq_seasonal))) 407 self.stochastic_freq_seasonal = stochastic_freq_seasonal 408 self.stochastic_cycle = stochastic_cycle 409 410 self.damped_cycle = damped_cycle 411 self.mle_regression = mle_regression 412 self.use_exact_diffuse = use_exact_diffuse 413 414 # Check for string trend/level specification 415 self.trend_specification = None 416 if isinstance(self.level, str): 417 self.trend_specification = level 418 self.level = False 419 420 # Check if any of the trend/level components have been set, and 421 # reset everything to False 422 trend_attributes = ['irregular', 'level', 'trend', 423 'stochastic_level', 'stochastic_trend'] 424 for attribute in trend_attributes: 425 if not getattr(self, attribute) is False: 426 warn("Value of `%s` may be overridden when the trend" 427 " component is specified using a model string." 428 % attribute, SpecificationWarning) 429 setattr(self, attribute, False) 430 431 # Now set the correct specification 432 spec = self.trend_specification 433 if spec == 'irregular' or spec == 'ntrend': 434 self.irregular = True 435 self.trend_specification = 'irregular' 436 elif spec == 'fixed intercept': 437 self.level = True 438 elif spec == 'deterministic constant' or spec == 'dconstant': 439 self.irregular = True 440 self.level = True 441 self.trend_specification = 'deterministic constant' 442 elif spec == 'local level' or spec == 'llevel': 443 self.irregular = True 444 self.level = True 445 self.stochastic_level = True 446 self.trend_specification = 'local level' 447 elif spec == 'random walk' or spec == 'rwalk': 448 self.level = True 449 self.stochastic_level = True 450 self.trend_specification = 'random walk' 451 elif spec == 'fixed slope': 452 self.level = True 453 self.trend = True 454 elif spec == 'deterministic trend' or spec == 'dtrend': 455 self.irregular = True 456 self.level = True 457 self.trend = True 458 self.trend_specification = 'deterministic trend' 459 elif (spec == 'local linear deterministic trend' or 460 spec == 'lldtrend'): 461 self.irregular = True 462 self.level = True 463 self.stochastic_level = True 464 self.trend = True 465 self.trend_specification = 'local linear deterministic trend' 466 elif spec == 'random walk with drift' or spec == 'rwdrift': 467 self.level = True 468 self.stochastic_level = True 469 self.trend = True 470 self.trend_specification = 'random walk with drift' 471 elif spec == 'local linear trend' or spec == 'lltrend': 472 self.irregular = True 473 self.level = True 474 self.stochastic_level = True 475 self.trend = True 476 self.stochastic_trend = True 477 self.trend_specification = 'local linear trend' 478 elif spec == 'smooth trend' or spec == 'strend': 479 self.irregular = True 480 self.level = True 481 self.trend = True 482 self.stochastic_trend = True 483 self.trend_specification = 'smooth trend' 484 elif spec == 'random trend' or spec == 'rtrend': 485 self.level = True 486 self.trend = True 487 self.stochastic_trend = True 488 self.trend_specification = 'random trend' 489 else: 490 raise ValueError("Invalid level/trend specification: '%s'" 491 % spec) 492 493 # Check for a model that makes sense 494 if trend and not level: 495 warn("Trend component specified without level component;" 496 " deterministic level component added.", SpecificationWarning) 497 self.level = True 498 self.stochastic_level = False 499 500 if not (self.irregular or 501 (self.level and self.stochastic_level) or 502 (self.trend and self.stochastic_trend) or 503 (self.seasonal and self.stochastic_seasonal) or 504 (self.freq_seasonal and any( 505 self.stochastic_freq_seasonal)) or 506 (self.cycle and self.stochastic_cycle) or 507 self.autoregressive): 508 warn("Specified model does not contain a stochastic element;" 509 " irregular component added.", SpecificationWarning) 510 self.irregular = True 511 512 if self.seasonal and self.seasonal_periods < 2: 513 raise ValueError('Seasonal component must have a seasonal period' 514 ' of at least 2.') 515 516 if self.freq_seasonal: 517 for p in self.freq_seasonal_periods: 518 if p < 2: 519 raise ValueError( 520 'Frequency Domain seasonal component must have a ' 521 'seasonal period of at least 2.') 522 523 # Create a bitmask holding the level/trend specification 524 self.trend_mask = ( 525 self.irregular * 0x01 | 526 self.level * 0x02 | 527 self.level * self.stochastic_level * 0x04 | 528 self.trend * 0x08 | 529 self.trend * self.stochastic_trend * 0x10 530 ) 531 532 # Create the trend specification, if it was not given 533 if self.trend_specification is None: 534 # trend specification may be none, e.g. if the model is only 535 # a stochastic cycle, etc. 536 self.trend_specification = _mask_map.get(self.trend_mask, None) 537 538 # Exogenous component 539 (self.k_exog, exog) = prepare_exog(exog) 540 541 self.regression = self.k_exog > 0 542 543 # Model parameters 544 self._k_seasonal_states = (self.seasonal_periods - 1) * self.seasonal 545 self._k_freq_seas_states = ( 546 sum(2 * h for h in self.freq_seasonal_harmonics) 547 * self.freq_seasonal) 548 self._k_cycle_states = self.cycle * 2 549 k_states = ( 550 self.level + self.trend + 551 self._k_seasonal_states + 552 self._k_freq_seas_states + 553 self._k_cycle_states + 554 self.ar_order + 555 (not self.mle_regression) * self.k_exog 556 ) 557 k_posdef = ( 558 self.stochastic_level * self.level + 559 self.stochastic_trend * self.trend + 560 self.stochastic_seasonal * self.seasonal + 561 ((sum(2 * h if self.stochastic_freq_seasonal[ix] else 0 for 562 ix, h in enumerate(self.freq_seasonal_harmonics))) * 563 self.freq_seasonal) + 564 self.stochastic_cycle * (self._k_cycle_states) + 565 self.autoregressive 566 ) 567 568 # Handle non-default loglikelihood burn 569 self._loglikelihood_burn = kwargs.get('loglikelihood_burn', None) 570 571 # We can still estimate the model with just the irregular component, 572 # just need to have one state that does nothing. 573 self._unused_state = False 574 if k_states == 0: 575 if not self.irregular: 576 raise ValueError('Model has no components specified.') 577 k_states = 1 578 self._unused_state = True 579 if k_posdef == 0: 580 k_posdef = 1 581 582 # Setup the representation 583 super(UnobservedComponents, self).__init__( 584 endog, k_states, k_posdef=k_posdef, exog=exog, **kwargs 585 ) 586 self.setup() 587 588 # Set as time-varying model if we have exog 589 if self.k_exog > 0: 590 self.ssm._time_invariant = False 591 592 # Need to reset the MLE names (since when they were first set, `setup` 593 # had not been run (and could not have been at that point)) 594 self.data.param_names = self.param_names 595 596 # Get bounds for the frequency of the cycle, if we know the frequency 597 # of the data. 598 if cycle_period_bounds is None: 599 freq = self.data.freq[0] if self.data.freq is not None else '' 600 if freq == 'A': 601 cycle_period_bounds = (1.5, 12) 602 elif freq == 'Q': 603 cycle_period_bounds = (1.5*4, 12*4) 604 elif freq == 'M': 605 cycle_period_bounds = (1.5*12, 12*12) 606 else: 607 # If we have no information on data frequency, require the 608 # cycle frequency to be between 0 and pi 609 cycle_period_bounds = (2, np.inf) 610 611 self.cycle_frequency_bound = ( 612 2*np.pi / cycle_period_bounds[1], 2*np.pi / cycle_period_bounds[0] 613 ) 614 615 # Update _init_keys attached by super 616 self._init_keys += ['level', 'trend', 'seasonal', 'freq_seasonal', 617 'cycle', 'autoregressive', 'irregular', 618 'stochastic_level', 'stochastic_trend', 619 'stochastic_seasonal', 'stochastic_freq_seasonal', 620 'stochastic_cycle', 621 'damped_cycle', 'cycle_period_bounds', 622 'mle_regression'] + list(kwargs.keys()) 623 624 # Initialize the state 625 self.initialize_default() 626 627 def _get_init_kwds(self): 628 # Get keywords based on model attributes 629 kwds = super(UnobservedComponents, self)._get_init_kwds() 630 631 # Modifications 632 if self.trend_specification is not None: 633 kwds['level'] = self.trend_specification 634 635 for attr in ['irregular', 'trend', 'stochastic_level', 636 'stochastic_trend']: 637 kwds[attr] = False 638 639 kwds['seasonal'] = self.seasonal_periods 640 kwds['freq_seasonal'] = [ 641 {'period': p, 642 'harmonics': self.freq_seasonal_harmonics[ix]} for 643 ix, p in enumerate(self.freq_seasonal_periods)] 644 kwds['autoregressive'] = self.ar_order 645 646 return kwds 647 648 def setup(self): 649 """ 650 Setup the structural time series representation 651 """ 652 # Initialize the ordered sets of parameters 653 self.parameters = {} 654 self.parameters_obs_intercept = {} 655 self.parameters_obs_cov = {} 656 self.parameters_transition = {} 657 self.parameters_state_cov = {} 658 659 # Initialize the fixed components of the state space matrices, 660 i = 0 # state offset 661 j = 0 # state covariance offset 662 663 if self.irregular: 664 self.parameters_obs_cov['irregular_var'] = 1 665 if self.level: 666 self.ssm['design', 0, i] = 1. 667 self.ssm['transition', i, i] = 1. 668 if self.trend: 669 self.ssm['transition', i, i+1] = 1. 670 if self.stochastic_level: 671 self.ssm['selection', i, j] = 1. 672 self.parameters_state_cov['level_var'] = 1 673 j += 1 674 i += 1 675 if self.trend: 676 self.ssm['transition', i, i] = 1. 677 if self.stochastic_trend: 678 self.ssm['selection', i, j] = 1. 679 self.parameters_state_cov['trend_var'] = 1 680 j += 1 681 i += 1 682 if self.seasonal: 683 n = self.seasonal_periods - 1 684 self.ssm['design', 0, i] = 1. 685 self.ssm['transition', i:i + n, i:i + n] = ( 686 companion_matrix(np.r_[1, [1] * n]).transpose() 687 ) 688 if self.stochastic_seasonal: 689 self.ssm['selection', i, j] = 1. 690 self.parameters_state_cov['seasonal_var'] = 1 691 j += 1 692 i += n 693 if self.freq_seasonal: 694 for ix, h in enumerate(self.freq_seasonal_harmonics): 695 # These are the \gamma_jt and \gamma^*_jt terms in D&K (3.8) 696 n = 2 * h 697 p = self.freq_seasonal_periods[ix] 698 lambda_p = 2 * np.pi / float(p) 699 700 t = 0 # frequency transition matrix offset 701 for block in range(1, h + 1): 702 # ibid. eqn (3.7) 703 self.ssm['design', 0, i+t] = 1. 704 705 # ibid. eqn (3.8) 706 cos_lambda_block = np.cos(lambda_p * block) 707 sin_lambda_block = np.sin(lambda_p * block) 708 trans = np.array([[cos_lambda_block, sin_lambda_block], 709 [-sin_lambda_block, cos_lambda_block]]) 710 trans_s = np.s_[i + t:i + t + 2] 711 self.ssm['transition', trans_s, trans_s] = trans 712 t += 2 713 714 if self.stochastic_freq_seasonal[ix]: 715 self.ssm['selection', i:i + n, j:j + n] = np.eye(n) 716 cov_key = 'freq_seasonal_var_{!r}'.format(ix) 717 self.parameters_state_cov[cov_key] = 1 718 j += n 719 i += n 720 if self.cycle: 721 self.ssm['design', 0, i] = 1. 722 self.parameters_transition['cycle_freq'] = 1 723 if self.damped_cycle: 724 self.parameters_transition['cycle_damp'] = 1 725 if self.stochastic_cycle: 726 self.ssm['selection', i:i+2, j:j+2] = np.eye(2) 727 self.parameters_state_cov['cycle_var'] = 1 728 j += 2 729 self._idx_cycle_transition = np.s_['transition', i:i+2, i:i+2] 730 i += 2 731 if self.autoregressive: 732 self.ssm['design', 0, i] = 1. 733 self.parameters_transition['ar_coeff'] = self.ar_order 734 self.parameters_state_cov['ar_var'] = 1 735 self.ssm['selection', i, j] = 1 736 self.ssm['transition', i:i+self.ar_order, i:i+self.ar_order] = ( 737 companion_matrix(self.ar_order).T 738 ) 739 self._idx_ar_transition = ( 740 np.s_['transition', i, i:i+self.ar_order] 741 ) 742 j += 1 743 i += self.ar_order 744 if self.regression: 745 if self.mle_regression: 746 self.parameters_obs_intercept['reg_coeff'] = self.k_exog 747 else: 748 design = np.repeat(self.ssm['design', :, :, 0], self.nobs, 749 axis=0) 750 self.ssm['design'] = design.transpose()[np.newaxis, :, :] 751 self.ssm['design', 0, i:i+self.k_exog, :] = ( 752 self.exog.transpose()) 753 self.ssm['transition', i:i+self.k_exog, i:i+self.k_exog] = ( 754 np.eye(self.k_exog) 755 ) 756 757 i += self.k_exog 758 759 # Update to get the actual parameter set 760 self.parameters.update(self.parameters_obs_cov) 761 self.parameters.update(self.parameters_state_cov) 762 self.parameters.update(self.parameters_transition) # ordered last 763 self.parameters.update(self.parameters_obs_intercept) 764 765 self.k_obs_intercept = sum(self.parameters_obs_intercept.values()) 766 self.k_obs_cov = sum(self.parameters_obs_cov.values()) 767 self.k_transition = sum(self.parameters_transition.values()) 768 self.k_state_cov = sum(self.parameters_state_cov.values()) 769 self.k_params = sum(self.parameters.values()) 770 771 # Other indices 772 idx = np.diag_indices(self.ssm.k_posdef) 773 self._idx_state_cov = ('state_cov', idx[0], idx[1]) 774 775 # Some of the variances may be tied together (repeated parameter usage) 776 # Use list() for compatibility with python 3.5 777 param_keys = list(self.parameters_state_cov.keys()) 778 self._var_repetitions = np.ones(self.k_state_cov, dtype=int) 779 if self.freq_seasonal: 780 for ix, is_stochastic in enumerate(self.stochastic_freq_seasonal): 781 if is_stochastic: 782 num_harmonics = self.freq_seasonal_harmonics[ix] 783 repeat_times = 2 * num_harmonics 784 cov_key = 'freq_seasonal_var_{!r}'.format(ix) 785 cov_ix = param_keys.index(cov_key) 786 self._var_repetitions[cov_ix] = repeat_times 787 788 if self.stochastic_cycle and self.cycle: 789 cov_ix = param_keys.index('cycle_var') 790 self._var_repetitions[cov_ix] = 2 791 self._repeat_any_var = any(self._var_repetitions > 1) 792 793 def initialize_default(self, approximate_diffuse_variance=None): 794 if approximate_diffuse_variance is None: 795 approximate_diffuse_variance = self.ssm.initial_variance 796 if self.use_exact_diffuse: 797 diffuse_type = 'diffuse' 798 else: 799 diffuse_type = 'approximate_diffuse' 800 801 # Set the loglikelihood burn parameter, if not given in constructor 802 if self._loglikelihood_burn is None: 803 k_diffuse_states = ( 804 self.k_states - int(self._unused_state) - self.ar_order) 805 self.loglikelihood_burn = k_diffuse_states 806 807 init = Initialization( 808 self.k_states, 809 approximate_diffuse_variance=approximate_diffuse_variance) 810 811 if self._unused_state: 812 # If this flag is set, it means we have a model with just an 813 # irregular component and nothing else. The state is then 814 # irrelevant and we can't put it as diffuse, since then the filter 815 # will never leave the diffuse state. 816 init.set(0, 'known', constant=[0]) 817 elif self.autoregressive: 818 offset = (self.level + self.trend + 819 self._k_seasonal_states + 820 self._k_freq_seas_states + 821 self._k_cycle_states) 822 length = self.ar_order 823 init.set((0, offset), diffuse_type) 824 init.set((offset, offset + length), 'stationary') 825 init.set((offset + length, self.k_states), diffuse_type) 826 # If we do not have an autoregressive component, then everything has 827 # a diffuse initialization 828 else: 829 init.set(None, diffuse_type) 830 831 self.ssm.initialization = init 832 833 def clone(self, endog, exog=None, **kwargs): 834 return self._clone_from_init_kwds(endog, exog=exog, **kwargs) 835 836 @property 837 def _res_classes(self): 838 return {'fit': (UnobservedComponentsResults, 839 UnobservedComponentsResultsWrapper)} 840 841 @property 842 def start_params(self): 843 if not hasattr(self, 'parameters'): 844 return [] 845 846 # Eliminate missing data to estimate starting parameters 847 endog = self.endog 848 exog = self.exog 849 if np.any(np.isnan(endog)): 850 mask = ~np.isnan(endog).squeeze() 851 endog = endog[mask] 852 if exog is not None: 853 exog = exog[mask] 854 855 # Level / trend variances 856 # (Use the HP filter to get initial estimates of variances) 857 _start_params = {} 858 if self.level: 859 resid, trend1 = hpfilter(endog) 860 861 if self.stochastic_trend: 862 cycle2, trend2 = hpfilter(trend1) 863 _start_params['trend_var'] = np.std(trend2)**2 864 if self.stochastic_level: 865 _start_params['level_var'] = np.std(cycle2)**2 866 elif self.stochastic_level: 867 _start_params['level_var'] = np.std(trend1)**2 868 else: 869 resid = self.ssm.endog[0] 870 871 # Regression 872 if self.regression and self.mle_regression: 873 _start_params['reg_coeff'] = ( 874 np.linalg.pinv(exog).dot(resid).tolist() 875 ) 876 resid = np.squeeze( 877 resid - np.dot(exog, _start_params['reg_coeff']) 878 ) 879 880 # Autoregressive 881 if self.autoregressive: 882 Y = resid[self.ar_order:] 883 X = lagmat(resid, self.ar_order, trim='both') 884 _start_params['ar_coeff'] = np.linalg.pinv(X).dot(Y).tolist() 885 resid = np.squeeze(Y - np.dot(X, _start_params['ar_coeff'])) 886 _start_params['ar_var'] = np.var(resid) 887 888 # The variance of the residual term can be used for all variances, 889 # just to get something in the right order of magnitude. 890 var_resid = np.var(resid) 891 892 # Seasonal 893 if self.stochastic_seasonal: 894 _start_params['seasonal_var'] = var_resid 895 896 # Frequency domain seasonal 897 for ix, is_stochastic in enumerate(self.stochastic_freq_seasonal): 898 cov_key = 'freq_seasonal_var_{!r}'.format(ix) 899 _start_params[cov_key] = var_resid 900 901 # Cyclical 902 if self.cycle: 903 _start_params['cycle_var'] = var_resid 904 # Clip this to make sure it is positive and strictly stationary 905 # (i.e. do not want negative or 1) 906 _start_params['cycle_damp'] = np.clip( 907 np.linalg.pinv(resid[:-1, None]).dot(resid[1:])[0], 0, 0.99 908 ) 909 910 # Set initial period estimate to 3 year, if we know the frequency 911 # of the data observations 912 freq = self.data.freq[0] if self.data.freq is not None else '' 913 if freq == 'A': 914 _start_params['cycle_freq'] = 2 * np.pi / 3 915 elif freq == 'Q': 916 _start_params['cycle_freq'] = 2 * np.pi / 12 917 elif freq == 'M': 918 _start_params['cycle_freq'] = 2 * np.pi / 36 919 else: 920 if not np.any(np.isinf(self.cycle_frequency_bound)): 921 _start_params['cycle_freq'] = ( 922 np.mean(self.cycle_frequency_bound)) 923 elif np.isinf(self.cycle_frequency_bound[1]): 924 _start_params['cycle_freq'] = self.cycle_frequency_bound[0] 925 else: 926 _start_params['cycle_freq'] = self.cycle_frequency_bound[1] 927 928 # Irregular 929 if self.irregular: 930 _start_params['irregular_var'] = var_resid 931 932 # Create the starting parameter list 933 start_params = [] 934 for key in self.parameters.keys(): 935 if np.isscalar(_start_params[key]): 936 start_params.append(_start_params[key]) 937 else: 938 start_params += _start_params[key] 939 return start_params 940 941 @property 942 def param_names(self): 943 if not hasattr(self, 'parameters'): 944 return [] 945 param_names = [] 946 for key in self.parameters.keys(): 947 if key == 'irregular_var': 948 param_names.append('sigma2.irregular') 949 elif key == 'level_var': 950 param_names.append('sigma2.level') 951 elif key == 'trend_var': 952 param_names.append('sigma2.trend') 953 elif key == 'seasonal_var': 954 param_names.append('sigma2.seasonal') 955 elif key.startswith('freq_seasonal_var_'): 956 # There are potentially multiple frequency domain 957 # seasonal terms 958 idx_fseas_comp = int(key[-1]) 959 periodicity = self.freq_seasonal_periods[idx_fseas_comp] 960 harmonics = self.freq_seasonal_harmonics[idx_fseas_comp] 961 freq_seasonal_name = "{p}({h})".format( 962 p=repr(periodicity), 963 h=repr(harmonics)) 964 param_names.append( 965 'sigma2.' + 'freq_seasonal_' + freq_seasonal_name) 966 elif key == 'cycle_var': 967 param_names.append('sigma2.cycle') 968 elif key == 'cycle_freq': 969 param_names.append('frequency.cycle') 970 elif key == 'cycle_damp': 971 param_names.append('damping.cycle') 972 elif key == 'ar_coeff': 973 for i in range(self.ar_order): 974 param_names.append('ar.L%d' % (i+1)) 975 elif key == 'ar_var': 976 param_names.append('sigma2.ar') 977 elif key == 'reg_coeff': 978 param_names += [ 979 'beta.%s' % self.exog_names[i] 980 for i in range(self.k_exog) 981 ] 982 else: 983 param_names.append(key) 984 return param_names 985 986 @property 987 def state_names(self): 988 names = [] 989 if self.level: 990 names.append('level') 991 if self.trend: 992 names.append('trend') 993 if self.seasonal: 994 names.append('seasonal') 995 names += ['seasonal.L%d' % i 996 for i in range(1, self._k_seasonal_states)] 997 if self.freq_seasonal: 998 names += ['freq_seasonal.%d' % i 999 for i in range(self._k_freq_seas_states)] 1000 if self.cycle: 1001 names += ['cycle', 'cycle.auxilliary'] 1002 if self.ar_order > 0: 1003 names += ['ar.L%d' % i 1004 for i in range(1, self.ar_order + 1)] 1005 if self.k_exog > 0 and not self.mle_regression: 1006 names += ['beta.%s' % self.exog_names[i] 1007 for i in range(self.k_exog)] 1008 if self._unused_state: 1009 names += ['dummy'] 1010 1011 return names 1012 1013 def transform_params(self, unconstrained): 1014 """ 1015 Transform unconstrained parameters used by the optimizer to constrained 1016 parameters used in likelihood evaluation 1017 """ 1018 unconstrained = np.array(unconstrained, ndmin=1) 1019 constrained = np.zeros(unconstrained.shape, dtype=unconstrained.dtype) 1020 1021 # Positive parameters: obs_cov, state_cov 1022 offset = self.k_obs_cov + self.k_state_cov 1023 constrained[:offset] = unconstrained[:offset]**2 1024 1025 # Cycle parameters 1026 if self.cycle: 1027 # Cycle frequency must be between between our bounds 1028 low, high = self.cycle_frequency_bound 1029 constrained[offset] = ( 1030 1 / (1 + np.exp(-unconstrained[offset])) 1031 ) * (high - low) + low 1032 offset += 1 1033 1034 # Cycle damping (if present) must be between 0 and 1 1035 if self.damped_cycle: 1036 constrained[offset] = ( 1037 1 / (1 + np.exp(-unconstrained[offset])) 1038 ) 1039 offset += 1 1040 1041 # Autoregressive coefficients must be stationary 1042 if self.autoregressive: 1043 constrained[offset:offset + self.ar_order] = ( 1044 constrain_stationary_univariate( 1045 unconstrained[offset:offset + self.ar_order] 1046 ) 1047 ) 1048 offset += self.ar_order 1049 1050 # Nothing to do with betas 1051 constrained[offset:offset + self.k_exog] = ( 1052 unconstrained[offset:offset + self.k_exog] 1053 ) 1054 1055 return constrained 1056 1057 def untransform_params(self, constrained): 1058 """ 1059 Reverse the transformation 1060 """ 1061 constrained = np.array(constrained, ndmin=1) 1062 unconstrained = np.zeros(constrained.shape, dtype=constrained.dtype) 1063 1064 # Positive parameters: obs_cov, state_cov 1065 offset = self.k_obs_cov + self.k_state_cov 1066 unconstrained[:offset] = constrained[:offset]**0.5 1067 1068 # Cycle parameters 1069 if self.cycle: 1070 # Cycle frequency must be between between our bounds 1071 low, high = self.cycle_frequency_bound 1072 x = (constrained[offset] - low) / (high - low) 1073 unconstrained[offset] = np.log( 1074 x / (1 - x) 1075 ) 1076 offset += 1 1077 1078 # Cycle damping (if present) must be between 0 and 1 1079 if self.damped_cycle: 1080 unconstrained[offset] = np.log( 1081 constrained[offset] / (1 - constrained[offset]) 1082 ) 1083 offset += 1 1084 1085 # Autoregressive coefficients must be stationary 1086 if self.autoregressive: 1087 unconstrained[offset:offset + self.ar_order] = ( 1088 unconstrain_stationary_univariate( 1089 constrained[offset:offset + self.ar_order] 1090 ) 1091 ) 1092 offset += self.ar_order 1093 1094 # Nothing to do with betas 1095 unconstrained[offset:offset + self.k_exog] = ( 1096 constrained[offset:offset + self.k_exog] 1097 ) 1098 1099 return unconstrained 1100 1101 def _validate_can_fix_params(self, param_names): 1102 super(UnobservedComponents, self)._validate_can_fix_params(param_names) 1103 1104 if 'ar_coeff' in self.parameters: 1105 ar_names = ['ar.L%d' % (i+1) for i in range(self.ar_order)] 1106 fix_all_ar = param_names.issuperset(ar_names) 1107 fix_any_ar = len(param_names.intersection(ar_names)) > 0 1108 if fix_any_ar and not fix_all_ar: 1109 raise ValueError('Cannot fix individual autoregressive.' 1110 ' parameters. Must either fix all' 1111 ' autoregressive parameters or none.') 1112 1113 def update(self, params, transformed=True, includes_fixed=False, 1114 complex_step=False): 1115 params = self.handle_params(params, transformed=transformed, 1116 includes_fixed=includes_fixed) 1117 1118 offset = 0 1119 1120 # Observation covariance 1121 if self.irregular: 1122 self.ssm['obs_cov', 0, 0] = params[offset] 1123 offset += 1 1124 1125 # State covariance 1126 if self.k_state_cov > 0: 1127 variances = params[offset:offset+self.k_state_cov] 1128 if self._repeat_any_var: 1129 variances = np.repeat(variances, self._var_repetitions) 1130 self.ssm[self._idx_state_cov] = variances 1131 offset += self.k_state_cov 1132 1133 # Cycle transition 1134 if self.cycle: 1135 cos_freq = np.cos(params[offset]) 1136 sin_freq = np.sin(params[offset]) 1137 cycle_transition = np.array( 1138 [[cos_freq, sin_freq], 1139 [-sin_freq, cos_freq]] 1140 ) 1141 if self.damped_cycle: 1142 offset += 1 1143 cycle_transition *= params[offset] 1144 self.ssm[self._idx_cycle_transition] = cycle_transition 1145 offset += 1 1146 1147 # AR transition 1148 if self.autoregressive: 1149 self.ssm[self._idx_ar_transition] = ( 1150 params[offset:offset+self.ar_order] 1151 ) 1152 offset += self.ar_order 1153 1154 # Beta observation intercept 1155 if self.regression: 1156 if self.mle_regression: 1157 self.ssm['obs_intercept'] = np.dot( 1158 self.exog, 1159 params[offset:offset+self.k_exog] 1160 )[None, :] 1161 offset += self.k_exog 1162 1163 1164class UnobservedComponentsResults(MLEResults): 1165 """ 1166 Class to hold results from fitting an unobserved components model. 1167 1168 Parameters 1169 ---------- 1170 model : UnobservedComponents instance 1171 The fitted model instance 1172 1173 Attributes 1174 ---------- 1175 specification : dictionary 1176 Dictionary including all attributes from the unobserved components 1177 model instance. 1178 1179 See Also 1180 -------- 1181 statsmodels.tsa.statespace.kalman_filter.FilterResults 1182 statsmodels.tsa.statespace.mlemodel.MLEResults 1183 """ 1184 1185 def __init__(self, model, params, filter_results, cov_type=None, 1186 **kwargs): 1187 super(UnobservedComponentsResults, self).__init__( 1188 model, params, filter_results, cov_type, **kwargs) 1189 1190 self.df_resid = np.inf # attribute required for wald tests 1191 1192 # Save _init_kwds 1193 self._init_kwds = self.model._get_init_kwds() 1194 1195 # Save number of states by type 1196 self._k_states_by_type = { 1197 'seasonal': self.model._k_seasonal_states, 1198 'freq_seasonal': self.model._k_freq_seas_states, 1199 'cycle': self.model._k_cycle_states} 1200 1201 # Save the model specification 1202 self.specification = Bunch(**{ 1203 # Model options 1204 'level': self.model.level, 1205 'trend': self.model.trend, 1206 'seasonal_periods': self.model.seasonal_periods, 1207 'seasonal': self.model.seasonal, 1208 'freq_seasonal': self.model.freq_seasonal, 1209 'freq_seasonal_periods': self.model.freq_seasonal_periods, 1210 'freq_seasonal_harmonics': self.model.freq_seasonal_harmonics, 1211 'cycle': self.model.cycle, 1212 'ar_order': self.model.ar_order, 1213 'autoregressive': self.model.autoregressive, 1214 'irregular': self.model.irregular, 1215 'stochastic_level': self.model.stochastic_level, 1216 'stochastic_trend': self.model.stochastic_trend, 1217 'stochastic_seasonal': self.model.stochastic_seasonal, 1218 'stochastic_freq_seasonal': self.model.stochastic_freq_seasonal, 1219 'stochastic_cycle': self.model.stochastic_cycle, 1220 1221 'damped_cycle': self.model.damped_cycle, 1222 'regression': self.model.regression, 1223 'mle_regression': self.model.mle_regression, 1224 'k_exog': self.model.k_exog, 1225 1226 # Check for string trend/level specification 1227 'trend_specification': self.model.trend_specification 1228 }) 1229 1230 @property 1231 def level(self): 1232 """ 1233 Estimates of unobserved level component 1234 1235 Returns 1236 ------- 1237 out: Bunch 1238 Has the following attributes: 1239 1240 - `filtered`: a time series array with the filtered estimate of 1241 the component 1242 - `filtered_cov`: a time series array with the filtered estimate of 1243 the variance/covariance of the component 1244 - `smoothed`: a time series array with the smoothed estimate of 1245 the component 1246 - `smoothed_cov`: a time series array with the smoothed estimate of 1247 the variance/covariance of the component 1248 - `offset`: an integer giving the offset in the state vector where 1249 this component begins 1250 """ 1251 # If present, level is always the first component of the state vector 1252 out = None 1253 spec = self.specification 1254 if spec.level: 1255 offset = 0 1256 out = Bunch(filtered=self.filtered_state[offset], 1257 filtered_cov=self.filtered_state_cov[offset, offset], 1258 smoothed=None, smoothed_cov=None, 1259 offset=offset) 1260 if self.smoothed_state is not None: 1261 out.smoothed = self.smoothed_state[offset] 1262 if self.smoothed_state_cov is not None: 1263 out.smoothed_cov = self.smoothed_state_cov[offset, offset] 1264 return out 1265 1266 @property 1267 def trend(self): 1268 """ 1269 Estimates of of unobserved trend component 1270 1271 Returns 1272 ------- 1273 out: Bunch 1274 Has the following attributes: 1275 1276 - `filtered`: a time series array with the filtered estimate of 1277 the component 1278 - `filtered_cov`: a time series array with the filtered estimate of 1279 the variance/covariance of the component 1280 - `smoothed`: a time series array with the smoothed estimate of 1281 the component 1282 - `smoothed_cov`: a time series array with the smoothed estimate of 1283 the variance/covariance of the component 1284 - `offset`: an integer giving the offset in the state vector where 1285 this component begins 1286 """ 1287 # If present, trend is always the second component of the state vector 1288 # (because level is always present if trend is present) 1289 out = None 1290 spec = self.specification 1291 if spec.trend: 1292 offset = int(spec.level) 1293 out = Bunch(filtered=self.filtered_state[offset], 1294 filtered_cov=self.filtered_state_cov[offset, offset], 1295 smoothed=None, smoothed_cov=None, 1296 offset=offset) 1297 if self.smoothed_state is not None: 1298 out.smoothed = self.smoothed_state[offset] 1299 if self.smoothed_state_cov is not None: 1300 out.smoothed_cov = self.smoothed_state_cov[offset, offset] 1301 return out 1302 1303 @property 1304 def seasonal(self): 1305 """ 1306 Estimates of unobserved seasonal component 1307 1308 Returns 1309 ------- 1310 out: Bunch 1311 Has the following attributes: 1312 1313 - `filtered`: a time series array with the filtered estimate of 1314 the component 1315 - `filtered_cov`: a time series array with the filtered estimate of 1316 the variance/covariance of the component 1317 - `smoothed`: a time series array with the smoothed estimate of 1318 the component 1319 - `smoothed_cov`: a time series array with the smoothed estimate of 1320 the variance/covariance of the component 1321 - `offset`: an integer giving the offset in the state vector where 1322 this component begins 1323 """ 1324 # If present, seasonal always follows level/trend (if they are present) 1325 # Note that we return only the first seasonal state, but there are 1326 # in fact seasonal_periods-1 seasonal states, however latter states 1327 # are just lagged versions of the first seasonal state. 1328 out = None 1329 spec = self.specification 1330 if spec.seasonal: 1331 offset = int(spec.trend + spec.level) 1332 out = Bunch(filtered=self.filtered_state[offset], 1333 filtered_cov=self.filtered_state_cov[offset, offset], 1334 smoothed=None, smoothed_cov=None, 1335 offset=offset) 1336 if self.smoothed_state is not None: 1337 out.smoothed = self.smoothed_state[offset] 1338 if self.smoothed_state_cov is not None: 1339 out.smoothed_cov = self.smoothed_state_cov[offset, offset] 1340 return out 1341 1342 @property 1343 def freq_seasonal(self): 1344 """ 1345 Estimates of unobserved frequency domain seasonal component(s) 1346 1347 Returns 1348 ------- 1349 out: list of Bunch instances 1350 Each item has the following attributes: 1351 1352 - `filtered`: a time series array with the filtered estimate of 1353 the component 1354 - `filtered_cov`: a time series array with the filtered estimate of 1355 the variance/covariance of the component 1356 - `smoothed`: a time series array with the smoothed estimate of 1357 the component 1358 - `smoothed_cov`: a time series array with the smoothed estimate of 1359 the variance/covariance of the component 1360 - `offset`: an integer giving the offset in the state vector where 1361 this component begins 1362 """ 1363 # If present, freq_seasonal components always follows level/trend 1364 # and seasonal. 1365 1366 # There are 2 * (harmonics) seasonal states per freq_seasonal 1367 # component. 1368 # The sum of every other state enters the measurement equation. 1369 # Additionally, there can be multiple components of this type. 1370 # These facts make this property messier in implementation than the 1371 # others. 1372 # Fortunately, the states are conditionally mutually independent 1373 # (conditional on previous timestep's states), so that the calculations 1374 # of the variances are simple summations of individual variances and 1375 # the calculation of the returned state is likewise a summation. 1376 out = [] 1377 spec = self.specification 1378 if spec.freq_seasonal: 1379 previous_states_offset = int(spec.trend + spec.level 1380 + self._k_states_by_type['seasonal']) 1381 previous_f_seas_offset = 0 1382 for ix, h in enumerate(spec.freq_seasonal_harmonics): 1383 offset = previous_states_offset + previous_f_seas_offset 1384 1385 period = spec.freq_seasonal_periods[ix] 1386 1387 # Only the gamma_jt terms enter the measurement equation (cf. 1388 # D&K 2012 (3.7)) 1389 states_in_sum = np.arange(0, 2 * h, 2) 1390 1391 filtered_state = np.sum( 1392 [self.filtered_state[offset + j] for j in states_in_sum], 1393 axis=0) 1394 filtered_cov = np.sum( 1395 [self.filtered_state_cov[offset + j, offset + j] for j in 1396 states_in_sum], axis=0) 1397 1398 item = Bunch( 1399 filtered=filtered_state, 1400 filtered_cov=filtered_cov, 1401 smoothed=None, smoothed_cov=None, 1402 offset=offset, 1403 pretty_name='seasonal {p}({h})'.format(p=repr(period), 1404 h=repr(h))) 1405 if self.smoothed_state is not None: 1406 item.smoothed = np.sum( 1407 [self.smoothed_state[offset+j] for j in states_in_sum], 1408 axis=0) 1409 if self.smoothed_state_cov is not None: 1410 item.smoothed_cov = np.sum( 1411 [self.smoothed_state_cov[offset+j, offset+j] 1412 for j in states_in_sum], axis=0) 1413 out.append(item) 1414 previous_f_seas_offset += 2 * h 1415 return out 1416 1417 @property 1418 def cycle(self): 1419 """ 1420 Estimates of unobserved cycle component 1421 1422 Returns 1423 ------- 1424 out: Bunch 1425 Has the following attributes: 1426 1427 - `filtered`: a time series array with the filtered estimate of 1428 the component 1429 - `filtered_cov`: a time series array with the filtered estimate of 1430 the variance/covariance of the component 1431 - `smoothed`: a time series array with the smoothed estimate of 1432 the component 1433 - `smoothed_cov`: a time series array with the smoothed estimate of 1434 the variance/covariance of the component 1435 - `offset`: an integer giving the offset in the state vector where 1436 this component begins 1437 """ 1438 # If present, cycle always follows level/trend, seasonal, and freq 1439 # seasonal. 1440 # Note that we return only the first cyclical state, but there are 1441 # in fact 2 cyclical states. The second cyclical state is not simply 1442 # a lag of the first cyclical state, but the first cyclical state is 1443 # the one that enters the measurement equation. 1444 out = None 1445 spec = self.specification 1446 if spec.cycle: 1447 offset = int(spec.trend + spec.level 1448 + self._k_states_by_type['seasonal'] 1449 + self._k_states_by_type['freq_seasonal']) 1450 out = Bunch(filtered=self.filtered_state[offset], 1451 filtered_cov=self.filtered_state_cov[offset, offset], 1452 smoothed=None, smoothed_cov=None, 1453 offset=offset) 1454 if self.smoothed_state is not None: 1455 out.smoothed = self.smoothed_state[offset] 1456 if self.smoothed_state_cov is not None: 1457 out.smoothed_cov = self.smoothed_state_cov[offset, offset] 1458 return out 1459 1460 @property 1461 def autoregressive(self): 1462 """ 1463 Estimates of unobserved autoregressive component 1464 1465 Returns 1466 ------- 1467 out: Bunch 1468 Has the following attributes: 1469 1470 - `filtered`: a time series array with the filtered estimate of 1471 the component 1472 - `filtered_cov`: a time series array with the filtered estimate of 1473 the variance/covariance of the component 1474 - `smoothed`: a time series array with the smoothed estimate of 1475 the component 1476 - `smoothed_cov`: a time series array with the smoothed estimate of 1477 the variance/covariance of the component 1478 - `offset`: an integer giving the offset in the state vector where 1479 this component begins 1480 """ 1481 # If present, autoregressive always follows level/trend, seasonal, 1482 # freq seasonal, and cyclical. 1483 # If it is an AR(p) model, then there are p associated 1484 # states, but the second - pth states are just lags of the first state. 1485 out = None 1486 spec = self.specification 1487 if spec.autoregressive: 1488 offset = int(spec.trend + spec.level 1489 + self._k_states_by_type['seasonal'] 1490 + self._k_states_by_type['freq_seasonal'] 1491 + self._k_states_by_type['cycle']) 1492 out = Bunch(filtered=self.filtered_state[offset], 1493 filtered_cov=self.filtered_state_cov[offset, offset], 1494 smoothed=None, smoothed_cov=None, 1495 offset=offset) 1496 if self.smoothed_state is not None: 1497 out.smoothed = self.smoothed_state[offset] 1498 if self.smoothed_state_cov is not None: 1499 out.smoothed_cov = self.smoothed_state_cov[offset, offset] 1500 return out 1501 1502 @property 1503 def regression_coefficients(self): 1504 """ 1505 Estimates of unobserved regression coefficients 1506 1507 Returns 1508 ------- 1509 out: Bunch 1510 Has the following attributes: 1511 1512 - `filtered`: a time series array with the filtered estimate of 1513 the component 1514 - `filtered_cov`: a time series array with the filtered estimate of 1515 the variance/covariance of the component 1516 - `smoothed`: a time series array with the smoothed estimate of 1517 the component 1518 - `smoothed_cov`: a time series array with the smoothed estimate of 1519 the variance/covariance of the component 1520 - `offset`: an integer giving the offset in the state vector where 1521 this component begins 1522 """ 1523 # If present, state-vector regression coefficients always are last 1524 # (i.e. they follow level/trend, seasonal, freq seasonal, cyclical, and 1525 # autoregressive states). There is one state associated with each 1526 # regressor, and all are returned here. 1527 out = None 1528 spec = self.specification 1529 if spec.regression: 1530 if spec.mle_regression: 1531 import warnings 1532 warnings.warn('Regression coefficients estimated via maximum' 1533 ' likelihood. Estimated coefficients are' 1534 ' available in the parameters list, not as part' 1535 ' of the state vector.', OutputWarning) 1536 else: 1537 offset = int(spec.trend + spec.level 1538 + self._k_states_by_type['seasonal'] 1539 + self._k_states_by_type['freq_seasonal'] 1540 + self._k_states_by_type['cycle'] 1541 + spec.ar_order) 1542 start = offset 1543 end = offset + spec.k_exog 1544 out = Bunch( 1545 filtered=self.filtered_state[start:end], 1546 filtered_cov=self.filtered_state_cov[start:end, start:end], 1547 smoothed=None, smoothed_cov=None, 1548 offset=offset 1549 ) 1550 if self.smoothed_state is not None: 1551 out.smoothed = self.smoothed_state[start:end] 1552 if self.smoothed_state_cov is not None: 1553 out.smoothed_cov = ( 1554 self.smoothed_state_cov[start:end, start:end]) 1555 return out 1556 1557 def plot_components(self, which=None, alpha=0.05, 1558 observed=True, level=True, trend=True, 1559 seasonal=True, freq_seasonal=True, 1560 cycle=True, autoregressive=True, 1561 legend_loc='upper right', fig=None, figsize=None): 1562 """ 1563 Plot the estimated components of the model. 1564 1565 Parameters 1566 ---------- 1567 which : {'filtered', 'smoothed'}, or None, optional 1568 Type of state estimate to plot. Default is 'smoothed' if smoothed 1569 results are available otherwise 'filtered'. 1570 alpha : float, optional 1571 The confidence intervals for the components are (1 - alpha) % 1572 observed : bool, optional 1573 Whether or not to plot the observed series against 1574 one-step-ahead predictions. 1575 Default is True. 1576 level : bool, optional 1577 Whether or not to plot the level component, if applicable. 1578 Default is True. 1579 trend : bool, optional 1580 Whether or not to plot the trend component, if applicable. 1581 Default is True. 1582 seasonal : bool, optional 1583 Whether or not to plot the seasonal component, if applicable. 1584 Default is True. 1585 freq_seasonal : bool, optional 1586 Whether or not to plot the frequency domain seasonal component(s), 1587 if applicable. Default is True. 1588 cycle : bool, optional 1589 Whether or not to plot the cyclical component, if applicable. 1590 Default is True. 1591 autoregressive : bool, optional 1592 Whether or not to plot the autoregressive state, if applicable. 1593 Default is True. 1594 fig : Figure, optional 1595 If given, subplots are created in this figure instead of in a new 1596 figure. Note that the grid will be created in the provided 1597 figure using `fig.add_subplot()`. 1598 figsize : tuple, optional 1599 If a figure is created, this argument allows specifying a size. 1600 The tuple is (width, height). 1601 1602 Notes 1603 ----- 1604 If all options are included in the model and selected, this produces 1605 a 6x1 plot grid with the following plots (ordered top-to-bottom): 1606 1607 0. Observed series against predicted series 1608 1. Level 1609 2. Trend 1610 3. Seasonal 1611 4. Freq Seasonal 1612 5. Cycle 1613 6. Autoregressive 1614 1615 Specific subplots will be removed if the component is not present in 1616 the estimated model or if the corresponding keyword argument is set to 1617 False. 1618 1619 All plots contain (1 - `alpha`) % confidence intervals. 1620 """ 1621 from scipy.stats import norm 1622 from statsmodels.graphics.utils import _import_mpl, create_mpl_fig 1623 plt = _import_mpl() 1624 fig = create_mpl_fig(fig, figsize) 1625 1626 # Determine which results we have 1627 if which is None: 1628 which = 'filtered' if self.smoothed_state is None else 'smoothed' 1629 1630 # Determine which plots we have 1631 spec = self.specification 1632 1633 comp = [ 1634 ('level', level and spec.level), 1635 ('trend', trend and spec.trend), 1636 ('seasonal', seasonal and spec.seasonal), 1637 ] 1638 1639 if freq_seasonal and spec.freq_seasonal: 1640 for ix, _ in enumerate(spec.freq_seasonal_periods): 1641 key = 'freq_seasonal_{!r}'.format(ix) 1642 comp.append((key, True)) 1643 1644 comp.extend( 1645 [('cycle', cycle and spec.cycle), 1646 ('autoregressive', autoregressive and spec.autoregressive)]) 1647 1648 components = dict(comp) 1649 1650 llb = self.filter_results.loglikelihood_burn 1651 1652 # Number of plots 1653 k_plots = observed + np.sum(list(components.values())) 1654 1655 # Get dates, if applicable 1656 if hasattr(self.data, 'dates') and self.data.dates is not None: 1657 dates = self.data.dates._mpl_repr() 1658 else: 1659 dates = np.arange(len(self.data.endog)) 1660 1661 # Get the critical value for confidence intervals 1662 critical_value = norm.ppf(1 - alpha / 2.) 1663 1664 plot_idx = 1 1665 1666 # Observed, predicted, confidence intervals 1667 if observed: 1668 ax = fig.add_subplot(k_plots, 1, plot_idx) 1669 plot_idx += 1 1670 1671 # Plot the observed dataset 1672 ax.plot(dates[llb:], self.model.endog[llb:], color='k', 1673 label='Observed') 1674 1675 # Get the predicted values and confidence intervals 1676 predict = self.filter_results.forecasts[0] 1677 std_errors = np.sqrt(self.filter_results.forecasts_error_cov[0, 0]) 1678 ci_lower = predict - critical_value * std_errors 1679 ci_upper = predict + critical_value * std_errors 1680 1681 # Plot 1682 ax.plot(dates[llb:], predict[llb:], 1683 label='One-step-ahead predictions') 1684 ci_poly = ax.fill_between( 1685 dates[llb:], ci_lower[llb:], ci_upper[llb:], alpha=0.2 1686 ) 1687 ci_label = '$%.3g \\%%$ confidence interval' % ((1 - alpha) * 100) 1688 1689 # Proxy artist for fill_between legend entry 1690 # See e.g. https://matplotlib.org/1.3.1/users/legend_guide.html 1691 p = plt.Rectangle((0, 0), 1, 1, fc=ci_poly.get_facecolor()[0]) 1692 1693 # Legend 1694 handles, labels = ax.get_legend_handles_labels() 1695 handles.append(p) 1696 labels.append(ci_label) 1697 ax.legend(handles, labels, loc=legend_loc) 1698 1699 ax.set_title('Predicted vs observed') 1700 1701 # Plot each component 1702 for component, is_plotted in components.items(): 1703 if not is_plotted: 1704 continue 1705 1706 ax = fig.add_subplot(k_plots, 1, plot_idx) 1707 plot_idx += 1 1708 1709 try: 1710 component_bunch = getattr(self, component) 1711 title = component.title() 1712 except AttributeError: 1713 # This might be a freq_seasonal component, of which there are 1714 # possibly multiple bagged up in property freq_seasonal 1715 if component.startswith('freq_seasonal_'): 1716 ix = int(component.replace('freq_seasonal_', '')) 1717 big_bunch = getattr(self, 'freq_seasonal') 1718 component_bunch = big_bunch[ix] 1719 title = component_bunch.pretty_name 1720 else: 1721 raise 1722 1723 # Check for a valid estimation type 1724 if which not in component_bunch: 1725 raise ValueError('Invalid type of state estimate.') 1726 1727 which_cov = '%s_cov' % which 1728 1729 # Get the predicted values 1730 value = component_bunch[which] 1731 1732 # Plot 1733 state_label = '%s (%s)' % (title, which) 1734 ax.plot(dates[llb:], value[llb:], label=state_label) 1735 1736 # Get confidence intervals 1737 if which_cov in component_bunch: 1738 std_errors = np.sqrt(component_bunch['%s_cov' % which]) 1739 ci_lower = value - critical_value * std_errors 1740 ci_upper = value + critical_value * std_errors 1741 ci_poly = ax.fill_between( 1742 dates[llb:], ci_lower[llb:], ci_upper[llb:], alpha=0.2 1743 ) 1744 ci_label = ('$%.3g \\%%$ confidence interval' 1745 % ((1 - alpha) * 100)) 1746 1747 # Legend 1748 ax.legend(loc=legend_loc) 1749 1750 ax.set_title('%s component' % title) 1751 1752 # Add a note if first observations excluded 1753 if llb > 0: 1754 text = ('Note: The first %d observations are not shown, due to' 1755 ' approximate diffuse initialization.') 1756 fig.text(0.1, 0.01, text % llb, fontsize='large') 1757 1758 return fig 1759 1760 @Appender(MLEResults.summary.__doc__) 1761 def summary(self, alpha=.05, start=None): 1762 # Create the model name 1763 1764 model_name = [self.specification.trend_specification] 1765 1766 if self.specification.seasonal: 1767 seasonal_name = ('seasonal(%d)' 1768 % self.specification.seasonal_periods) 1769 if self.specification.stochastic_seasonal: 1770 seasonal_name = 'stochastic ' + seasonal_name 1771 model_name.append(seasonal_name) 1772 1773 if self.specification.freq_seasonal: 1774 for ix, is_stochastic in enumerate( 1775 self.specification.stochastic_freq_seasonal): 1776 periodicity = self.specification.freq_seasonal_periods[ix] 1777 harmonics = self.specification.freq_seasonal_harmonics[ix] 1778 freq_seasonal_name = "freq_seasonal({p}({h}))".format( 1779 p=repr(periodicity), 1780 h=repr(harmonics)) 1781 if is_stochastic: 1782 freq_seasonal_name = 'stochastic ' + freq_seasonal_name 1783 model_name.append(freq_seasonal_name) 1784 1785 if self.specification.cycle: 1786 cycle_name = 'cycle' 1787 if self.specification.stochastic_cycle: 1788 cycle_name = 'stochastic ' + cycle_name 1789 if self.specification.damped_cycle: 1790 cycle_name = 'damped ' + cycle_name 1791 model_name.append(cycle_name) 1792 1793 if self.specification.autoregressive: 1794 autoregressive_name = 'AR(%d)' % self.specification.ar_order 1795 model_name.append(autoregressive_name) 1796 1797 return super(UnobservedComponentsResults, self).summary( 1798 alpha=alpha, start=start, title='Unobserved Components Results', 1799 model_name=model_name 1800 ) 1801 1802 1803class UnobservedComponentsResultsWrapper(MLEResultsWrapper): 1804 _attrs = {} 1805 _wrap_attrs = wrap.union_dicts(MLEResultsWrapper._wrap_attrs, 1806 _attrs) 1807 _methods = {} 1808 _wrap_methods = wrap.union_dicts(MLEResultsWrapper._wrap_methods, 1809 _methods) 1810wrap.populate_wrapper(UnobservedComponentsResultsWrapper, # noqa:E305 1811 UnobservedComponentsResults) 1812