1# -*- coding: utf-8 -*-
2"""
3State Space Model
4
5Author: Chad Fulton
6License: Simplified-BSD
7"""
8from statsmodels.compat.pandas import NumericIndex, is_int_index
9
10import contextlib
11import warnings
12
13import datetime as dt
14from types import SimpleNamespace
15import numpy as np
16import pandas as pd
17from scipy.stats import norm
18
19from statsmodels.tools.tools import pinv_extended, Bunch
20from statsmodels.tools.sm_exceptions import PrecisionWarning, ValueWarning
21from statsmodels.tools.numdiff import (_get_epsilon, approx_hess_cs,
22                                       approx_fprime_cs, approx_fprime)
23from statsmodels.tools.decorators import cache_readonly
24from statsmodels.tools.eval_measures import aic, aicc, bic, hqic
25
26import statsmodels.base.wrapper as wrap
27
28import statsmodels.tsa.base.prediction as pred
29
30from statsmodels.base.data import PandasData
31import statsmodels.tsa.base.tsa_model as tsbase
32
33from .news import NewsResults
34from .simulation_smoother import SimulationSmoother
35from .kalman_smoother import SmootherResults
36from .kalman_filter import INVERT_UNIVARIATE, SOLVE_LU, MEMORY_CONSERVE
37from .initialization import Initialization
38from .tools import prepare_exog, concat, _safe_cond
39
40
41def _handle_args(names, defaults, *args, **kwargs):
42    output_args = []
43    # We need to handle positional arguments in two ways, in case this was
44    # called by a Scipy optimization routine
45    if len(args) > 0:
46        # the fit() method will pass a dictionary
47        if isinstance(args[0], dict):
48            flags = args[0]
49        # otherwise, a user may have just used positional arguments...
50        else:
51            flags = dict(zip(names, args))
52        for i in range(len(names)):
53            output_args.append(flags.get(names[i], defaults[i]))
54
55        for name, value in flags.items():
56            if name in kwargs:
57                raise TypeError("loglike() got multiple values for keyword"
58                                " argument '%s'" % name)
59    else:
60        for i in range(len(names)):
61            output_args.append(kwargs.pop(names[i], defaults[i]))
62
63    return tuple(output_args) + (kwargs,)
64
65
66def _check_index(desired_index, dta, title='data'):
67    given_index = None
68    if isinstance(dta, (pd.Series, pd.DataFrame)):
69        given_index = dta.index
70    if given_index is not None and not desired_index.equals(given_index):
71        desired_freq = getattr(desired_index, 'freq', None)
72        given_freq = getattr(given_index, 'freq', None)
73        if ((desired_freq is not None or given_freq is not None) and
74                desired_freq != given_freq):
75            raise ValueError('Given %s does not have an index'
76                             ' that extends the index of the'
77                             ' model. Expected index frequency is'
78                             ' "%s", but got "%s".'
79                             % (title, desired_freq, given_freq))
80        else:
81            raise ValueError('Given %s does not have an index'
82                             ' that extends the index of the'
83                             ' model.' % title)
84
85
86class MLEModel(tsbase.TimeSeriesModel):
87    r"""
88    State space model for maximum likelihood estimation
89
90    Parameters
91    ----------
92    endog : array_like
93        The observed time-series process :math:`y`
94    k_states : int
95        The dimension of the unobserved state process.
96    exog : array_like, optional
97        Array of exogenous regressors, shaped nobs x k. Default is no
98        exogenous regressors.
99    dates : array_like of datetime, optional
100        An array-like object of datetime objects. If a Pandas object is given
101        for endog, it is assumed to have a DateIndex.
102    freq : str, optional
103        The frequency of the time-series. A Pandas offset or 'B', 'D', 'W',
104        'M', 'A', or 'Q'. This is optional if dates are given.
105    **kwargs
106        Keyword arguments may be used to provide default values for state space
107        matrices or for Kalman filtering options. See `Representation`, and
108        `KalmanFilter` for more details.
109
110    Attributes
111    ----------
112    ssm : statsmodels.tsa.statespace.kalman_filter.KalmanFilter
113        Underlying state space representation.
114
115    See Also
116    --------
117    statsmodels.tsa.statespace.mlemodel.MLEResults
118    statsmodels.tsa.statespace.kalman_filter.KalmanFilter
119    statsmodels.tsa.statespace.representation.Representation
120
121    Notes
122    -----
123    This class wraps the state space model with Kalman filtering to add in
124    functionality for maximum likelihood estimation. In particular, it adds
125    the concept of updating the state space representation based on a defined
126    set of parameters, through the `update` method or `updater` attribute (see
127    below for more details on which to use when), and it adds a `fit` method
128    which uses a numerical optimizer to select the parameters that maximize
129    the likelihood of the model.
130
131    The `start_params` `update` method must be overridden in the
132    child class (and the `transform` and `untransform` methods, if needed).
133    """
134
135    def __init__(self, endog, k_states, exog=None, dates=None, freq=None,
136                 **kwargs):
137        # Initialize the model base
138        super(MLEModel, self).__init__(endog=endog, exog=exog,
139                                       dates=dates, freq=freq,
140                                       missing='none')
141
142        # Store kwargs to recreate model
143        self._init_kwargs = kwargs
144
145        # Prepared the endog array: C-ordered, shape=(nobs x k_endog)
146        self.endog, self.exog = self.prepare_data()
147
148        # Dimensions
149        self.nobs = self.endog.shape[0]
150        self.k_states = k_states
151
152        # Initialize the state-space representation
153        self.initialize_statespace(**kwargs)
154
155        # Setup holder for fixed parameters
156        self._has_fixed_params = False
157        self._fixed_params = None
158        self._params_index = None
159        self._fixed_params_index = None
160        self._free_params_index = None
161
162    def prepare_data(self):
163        """
164        Prepare data for use in the state space representation
165        """
166        endog = np.array(self.data.orig_endog, order='C')
167        exog = self.data.orig_exog
168        if exog is not None:
169            exog = np.array(exog)
170
171        # Base class may allow 1-dim data, whereas we need 2-dim
172        if endog.ndim == 1:
173            endog.shape = (endog.shape[0], 1)  # this will be C-contiguous
174
175        return endog, exog
176
177    def initialize_statespace(self, **kwargs):
178        """
179        Initialize the state space representation
180
181        Parameters
182        ----------
183        **kwargs
184            Additional keyword arguments to pass to the state space class
185            constructor.
186        """
187        # (Now self.endog is C-ordered and in long format (nobs x k_endog). To
188        # get F-ordered and in wide format just need to transpose)
189        endog = self.endog.T
190
191        # Instantiate the state space object
192        self.ssm = SimulationSmoother(endog.shape[0], self.k_states,
193                                      nobs=endog.shape[1], **kwargs)
194        # Bind the data to the model
195        self.ssm.bind(endog)
196
197        # Other dimensions, now that `ssm` is available
198        self.k_endog = self.ssm.k_endog
199
200    def _get_index_with_final_state(self):
201        # The index we inherit from `TimeSeriesModel` will only cover the
202        # data sample itself, but we will also need an index value for the
203        # final state which is the next time step to the last datapoint.
204        # This method figures out an appropriate value for the three types of
205        # supported indexes: date-based, Int64Index, or RangeIndex
206        if self._index_dates:
207            if isinstance(self._index, pd.DatetimeIndex):
208                index = pd.date_range(
209                    start=self._index[0], periods=len(self._index) + 1,
210                    freq=self._index.freq)
211            elif isinstance(self._index, pd.PeriodIndex):
212                index = pd.period_range(
213                    start=self._index[0], periods=len(self._index) + 1,
214                    freq=self._index.freq)
215            else:
216                raise NotImplementedError
217        elif isinstance(self._index, pd.RangeIndex):
218            # COMPAT: pd.RangeIndex does not have start, stop, step prior to
219            #         pandas 0.25
220            try:
221                start = self._index.start
222                stop = self._index.stop
223                step = self._index.step
224            except AttributeError:
225                start = self._index._start
226                stop = self._index._stop
227                step = self._index._step
228            index = pd.RangeIndex(start, stop + step, step)
229        elif is_int_index(self._index):
230            # The only valid Int64Index is a full, incrementing index, so this
231            # is general
232            value = self._index[-1] + 1
233            index = NumericIndex(self._index.tolist() + [value])
234        else:
235            raise NotImplementedError
236        return index
237
238    def __setitem__(self, key, value):
239        return self.ssm.__setitem__(key, value)
240
241    def __getitem__(self, key):
242        return self.ssm.__getitem__(key)
243
244    def _get_init_kwds(self):
245        # Get keywords based on model attributes
246        kwds = super(MLEModel, self)._get_init_kwds()
247
248        for key, value in kwds.items():
249            if value is None and hasattr(self.ssm, key):
250                kwds[key] = getattr(self.ssm, key)
251
252        return kwds
253
254    def clone(self, endog, exog=None, **kwargs):
255        """
256        Clone state space model with new data and optionally new specification
257
258        Parameters
259        ----------
260        endog : array_like
261            The observed time-series process :math:`y`
262        k_states : int
263            The dimension of the unobserved state process.
264        exog : array_like, optional
265            Array of exogenous regressors, shaped nobs x k. Default is no
266            exogenous regressors.
267        kwargs
268            Keyword arguments to pass to the new model class to change the
269            model specification.
270
271        Returns
272        -------
273        model : MLEModel subclass
274
275        Notes
276        -----
277        This method must be implemented
278        """
279        raise NotImplementedError('This method is not implemented in the base'
280                                  ' class and must be set up by each specific'
281                                  ' model.')
282
283    def _clone_from_init_kwds(self, endog, **kwargs):
284        # Cannot make this the default, because there is extra work required
285        # for subclasses to make _get_init_kwds useful.
286        use_kwargs = self._get_init_kwds()
287        use_kwargs.update(kwargs)
288
289        # Check for `exog`
290        if getattr(self, 'k_exog', 0) > 0 and kwargs.get('exog', None) is None:
291            raise ValueError('Cloning a model with an exogenous component'
292                             ' requires specifying a new exogenous array using'
293                             ' the `exog` argument.')
294
295        mod = self.__class__(endog, **use_kwargs)
296        return mod
297
298    def set_filter_method(self, filter_method=None, **kwargs):
299        """
300        Set the filtering method
301
302        The filtering method controls aspects of which Kalman filtering
303        approach will be used.
304
305        Parameters
306        ----------
307        filter_method : int, optional
308            Bitmask value to set the filter method to. See notes for details.
309        **kwargs
310            Keyword arguments may be used to influence the filter method by
311            setting individual boolean flags. See notes for details.
312
313        Notes
314        -----
315        This method is rarely used. See the corresponding function in the
316        `KalmanFilter` class for details.
317        """
318        self.ssm.set_filter_method(filter_method, **kwargs)
319
320    def set_inversion_method(self, inversion_method=None, **kwargs):
321        """
322        Set the inversion method
323
324        The Kalman filter may contain one matrix inversion: that of the
325        forecast error covariance matrix. The inversion method controls how and
326        if that inverse is performed.
327
328        Parameters
329        ----------
330        inversion_method : int, optional
331            Bitmask value to set the inversion method to. See notes for
332            details.
333        **kwargs
334            Keyword arguments may be used to influence the inversion method by
335            setting individual boolean flags. See notes for details.
336
337        Notes
338        -----
339        This method is rarely used. See the corresponding function in the
340        `KalmanFilter` class for details.
341        """
342        self.ssm.set_inversion_method(inversion_method, **kwargs)
343
344    def set_stability_method(self, stability_method=None, **kwargs):
345        """
346        Set the numerical stability method
347
348        The Kalman filter is a recursive algorithm that may in some cases
349        suffer issues with numerical stability. The stability method controls
350        what, if any, measures are taken to promote stability.
351
352        Parameters
353        ----------
354        stability_method : int, optional
355            Bitmask value to set the stability method to. See notes for
356            details.
357        **kwargs
358            Keyword arguments may be used to influence the stability method by
359            setting individual boolean flags. See notes for details.
360
361        Notes
362        -----
363        This method is rarely used. See the corresponding function in the
364        `KalmanFilter` class for details.
365        """
366        self.ssm.set_stability_method(stability_method, **kwargs)
367
368    def set_conserve_memory(self, conserve_memory=None, **kwargs):
369        """
370        Set the memory conservation method
371
372        By default, the Kalman filter computes a number of intermediate
373        matrices at each iteration. The memory conservation options control
374        which of those matrices are stored.
375
376        Parameters
377        ----------
378        conserve_memory : int, optional
379            Bitmask value to set the memory conservation method to. See notes
380            for details.
381        **kwargs
382            Keyword arguments may be used to influence the memory conservation
383            method by setting individual boolean flags.
384
385        Notes
386        -----
387        This method is rarely used. See the corresponding function in the
388        `KalmanFilter` class for details.
389        """
390        self.ssm.set_conserve_memory(conserve_memory, **kwargs)
391
392    def set_smoother_output(self, smoother_output=None, **kwargs):
393        """
394        Set the smoother output
395
396        The smoother can produce several types of results. The smoother output
397        variable controls which are calculated and returned.
398
399        Parameters
400        ----------
401        smoother_output : int, optional
402            Bitmask value to set the smoother output to. See notes for details.
403        **kwargs
404            Keyword arguments may be used to influence the smoother output by
405            setting individual boolean flags.
406
407        Notes
408        -----
409        This method is rarely used. See the corresponding function in the
410        `KalmanSmoother` class for details.
411        """
412        self.ssm.set_smoother_output(smoother_output, **kwargs)
413
414    def initialize_known(self, initial_state, initial_state_cov):
415        """Initialize known"""
416        self.ssm.initialize_known(initial_state, initial_state_cov)
417
418    def initialize_approximate_diffuse(self, variance=None):
419        """Initialize approximate diffuse"""
420        self.ssm.initialize_approximate_diffuse(variance)
421
422    def initialize_stationary(self):
423        """Initialize stationary"""
424        self.ssm.initialize_stationary()
425
426    @property
427    def initialization(self):
428        return self.ssm.initialization
429
430    @initialization.setter
431    def initialization(self, value):
432        self.ssm.initialization = value
433
434    @property
435    def initial_variance(self):
436        return self.ssm.initial_variance
437
438    @initial_variance.setter
439    def initial_variance(self, value):
440        self.ssm.initial_variance = value
441
442    @property
443    def loglikelihood_burn(self):
444        return self.ssm.loglikelihood_burn
445
446    @loglikelihood_burn.setter
447    def loglikelihood_burn(self, value):
448        self.ssm.loglikelihood_burn = value
449
450    @property
451    def tolerance(self):
452        return self.ssm.tolerance
453
454    @tolerance.setter
455    def tolerance(self, value):
456        self.ssm.tolerance = value
457
458    def _validate_can_fix_params(self, param_names):
459        for param_name in param_names:
460            if param_name not in self.param_names:
461                raise ValueError('Invalid parameter name passed: "%s".'
462                                 % param_name)
463
464    @contextlib.contextmanager
465    def fix_params(self, params):
466        """
467        Fix parameters to specific values (context manager)
468
469        Parameters
470        ----------
471        params : dict
472            Dictionary describing the fixed parameter values, of the form
473            `param_name: fixed_value`. See the `param_names` property for valid
474            parameter names.
475
476        Examples
477        --------
478        >>> mod = sm.tsa.SARIMAX(endog, order=(1, 0, 1))
479        >>> with mod.fix_params({'ar.L1': 0.5}):
480                res = mod.fit()
481        """
482        k_params = len(self.param_names)
483        # Initialization (this is done here rather than in the constructor
484        # because param_names may not be available at that point)
485        if self._fixed_params is None:
486            self._fixed_params = {}
487            self._params_index = dict(
488                zip(self.param_names, np.arange(k_params)))
489
490        # Cache the current fixed parameters
491        cache_fixed_params = self._fixed_params.copy()
492        cache_has_fixed_params = self._has_fixed_params
493        cache_fixed_params_index = self._fixed_params_index
494        cache_free_params_index = self._free_params_index
495
496        # Validate parameter names and values
497        all_fixed_param_names = (
498            set(params.keys()) | set(self._fixed_params.keys())
499        )
500        self._validate_can_fix_params(all_fixed_param_names)
501
502        # Set the new fixed parameters, keeping the order as given by
503        # param_names
504        self._fixed_params.update(params)
505        self._fixed_params = dict([
506            (name, self._fixed_params[name]) for name in self.param_names
507            if name in self._fixed_params])
508
509        # Update associated values
510        self._has_fixed_params = True
511        self._fixed_params_index = [self._params_index[key]
512                                    for key in self._fixed_params.keys()]
513        self._free_params_index = list(
514            set(np.arange(k_params)).difference(self._fixed_params_index))
515
516        try:
517            yield
518        finally:
519            # Reset the fixed parameters
520            self._has_fixed_params = cache_has_fixed_params
521            self._fixed_params = cache_fixed_params
522            self._fixed_params_index = cache_fixed_params_index
523            self._free_params_index = cache_free_params_index
524
525    def fit(self, start_params=None, transformed=True, includes_fixed=False,
526            cov_type=None, cov_kwds=None, method='lbfgs', maxiter=50,
527            full_output=1, disp=5, callback=None, return_params=False,
528            optim_score=None, optim_complex_step=None, optim_hessian=None,
529            flags=None, low_memory=False, **kwargs):
530        """
531        Fits the model by maximum likelihood via Kalman filter.
532
533        Parameters
534        ----------
535        start_params : array_like, optional
536            Initial guess of the solution for the loglikelihood maximization.
537            If None, the default is given by Model.start_params.
538        transformed : bool, optional
539            Whether or not `start_params` is already transformed. Default is
540            True.
541        includes_fixed : bool, optional
542            If parameters were previously fixed with the `fix_params` method,
543            this argument describes whether or not `start_params` also includes
544            the fixed parameters, in addition to the free parameters. Default
545            is False.
546        cov_type : str, optional
547            The `cov_type` keyword governs the method for calculating the
548            covariance matrix of parameter estimates. Can be one of:
549
550            - 'opg' for the outer product of gradient estimator
551            - 'oim' for the observed information matrix estimator, calculated
552              using the method of Harvey (1989)
553            - 'approx' for the observed information matrix estimator,
554              calculated using a numerical approximation of the Hessian matrix.
555            - 'robust' for an approximate (quasi-maximum likelihood) covariance
556              matrix that may be valid even in the presence of some
557              misspecifications. Intermediate calculations use the 'oim'
558              method.
559            - 'robust_approx' is the same as 'robust' except that the
560              intermediate calculations use the 'approx' method.
561            - 'none' for no covariance matrix calculation.
562
563            Default is 'opg' unless memory conservation is used to avoid
564            computing the loglikelihood values for each observation, in which
565            case the default is 'approx'.
566        cov_kwds : dict or None, optional
567            A dictionary of arguments affecting covariance matrix computation.
568
569            **opg, oim, approx, robust, robust_approx**
570
571            - 'approx_complex_step' : bool, optional - If True, numerical
572              approximations are computed using complex-step methods. If False,
573              numerical approximations are computed using finite difference
574              methods. Default is True.
575            - 'approx_centered' : bool, optional - If True, numerical
576              approximations computed using finite difference methods use a
577              centered approximation. Default is False.
578        method : str, optional
579            The `method` determines which solver from `scipy.optimize`
580            is used, and it can be chosen from among the following strings:
581
582            - 'newton' for Newton-Raphson
583            - 'nm' for Nelder-Mead
584            - 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS)
585            - 'lbfgs' for limited-memory BFGS with optional box constraints
586            - 'powell' for modified Powell's method
587            - 'cg' for conjugate gradient
588            - 'ncg' for Newton-conjugate gradient
589            - 'basinhopping' for global basin-hopping solver
590
591            The explicit arguments in `fit` are passed to the solver,
592            with the exception of the basin-hopping solver. Each
593            solver has several optional arguments that are not the same across
594            solvers. See the notes section below (or scipy.optimize) for the
595            available arguments and for the list of explicit arguments that the
596            basin-hopping solver supports.
597        maxiter : int, optional
598            The maximum number of iterations to perform.
599        full_output : bool, optional
600            Set to True to have all available output in the Results object's
601            mle_retvals attribute. The output is dependent on the solver.
602            See LikelihoodModelResults notes section for more information.
603        disp : bool, optional
604            Set to True to print convergence messages.
605        callback : callable callback(xk), optional
606            Called after each iteration, as callback(xk), where xk is the
607            current parameter vector.
608        return_params : bool, optional
609            Whether or not to return only the array of maximizing parameters.
610            Default is False.
611        optim_score : {'harvey', 'approx'} or None, optional
612            The method by which the score vector is calculated. 'harvey' uses
613            the method from Harvey (1989), 'approx' uses either finite
614            difference or complex step differentiation depending upon the
615            value of `optim_complex_step`, and None uses the built-in gradient
616            approximation of the optimizer. Default is None. This keyword is
617            only relevant if the optimization method uses the score.
618        optim_complex_step : bool, optional
619            Whether or not to use complex step differentiation when
620            approximating the score; if False, finite difference approximation
621            is used. Default is True. This keyword is only relevant if
622            `optim_score` is set to 'harvey' or 'approx'.
623        optim_hessian : {'opg','oim','approx'}, optional
624            The method by which the Hessian is numerically approximated. 'opg'
625            uses outer product of gradients, 'oim' uses the information
626            matrix formula from Harvey (1989), and 'approx' uses numerical
627            approximation. This keyword is only relevant if the
628            optimization method uses the Hessian matrix.
629        low_memory : bool, optional
630            If set to True, techniques are applied to substantially reduce
631            memory usage. If used, some features of the results object will
632            not be available (including smoothed results and in-sample
633            prediction), although out-of-sample forecasting is possible.
634            Default is False.
635        **kwargs
636            Additional keyword arguments to pass to the optimizer.
637
638        Returns
639        -------
640        results
641            Results object holding results from fitting a state space model.
642
643        See Also
644        --------
645        statsmodels.base.model.LikelihoodModel.fit
646        statsmodels.tsa.statespace.mlemodel.MLEResults
647        statsmodels.tsa.statespace.structural.UnobservedComponentsResults
648        """
649        if start_params is None:
650            start_params = self.start_params
651            transformed = True
652            includes_fixed = True
653
654        # Update the score method
655        if optim_score is None and method == 'lbfgs':
656            kwargs.setdefault('approx_grad', True)
657            kwargs.setdefault('epsilon', 1e-5)
658        elif optim_score is None:
659            optim_score = 'approx'
660
661        # Check for complex step differentiation
662        if optim_complex_step is None:
663            optim_complex_step = not self.ssm._complex_endog
664        elif optim_complex_step and self.ssm._complex_endog:
665            raise ValueError('Cannot use complex step derivatives when data'
666                             ' or parameters are complex.')
667
668        # Standardize starting parameters
669        start_params = self.handle_params(start_params, transformed=True,
670                                          includes_fixed=includes_fixed)
671
672        # Unconstrain the starting parameters
673        if transformed:
674            start_params = self.untransform_params(start_params)
675
676        # Remove any fixed parameters
677        if self._has_fixed_params:
678            start_params = start_params[self._free_params_index]
679
680        # If all parameters are fixed, we are done
681        if self._has_fixed_params and len(start_params) == 0:
682            mlefit = Bunch(params=[], mle_retvals=None,
683                           mle_settings=None)
684        else:
685            # Remove disallowed kwargs
686            disallow = (
687                "concentrate_scale",
688                "enforce_stationarity",
689                "enforce_invertibility"
690            )
691            kwargs = {k: v for k, v in kwargs.items() if k not in disallow}
692            # Maximum likelihood estimation
693            if flags is None:
694                flags = {}
695            flags.update({
696                'transformed': False,
697                'includes_fixed': False,
698                'score_method': optim_score,
699                'approx_complex_step': optim_complex_step
700            })
701            if optim_hessian is not None:
702                flags['hessian_method'] = optim_hessian
703            fargs = (flags,)
704            mlefit = super(MLEModel, self).fit(start_params, method=method,
705                                               fargs=fargs,
706                                               maxiter=maxiter,
707                                               full_output=full_output,
708                                               disp=disp, callback=callback,
709                                               skip_hessian=True, **kwargs)
710
711        # Just return the fitted parameters if requested
712        if return_params:
713            return self.handle_params(mlefit.params, transformed=False,
714                                      includes_fixed=False)
715        # Otherwise construct the results class if desired
716        else:
717            # Handle memory conservation option
718            if low_memory:
719                conserve_memory = self.ssm.conserve_memory
720                self.ssm.set_conserve_memory(MEMORY_CONSERVE)
721
722            # Perform filtering / smoothing
723            if (self.ssm.memory_no_predicted or self.ssm.memory_no_gain
724                    or self.ssm.memory_no_smoothing):
725                func = self.filter
726            else:
727                func = self.smooth
728            res = func(mlefit.params, transformed=False, includes_fixed=False,
729                       cov_type=cov_type, cov_kwds=cov_kwds)
730
731            res.mlefit = mlefit
732            res.mle_retvals = mlefit.mle_retvals
733            res.mle_settings = mlefit.mle_settings
734
735            # Reset memory conservation
736            if low_memory:
737                self.ssm.set_conserve_memory(conserve_memory)
738
739            return res
740
741    def fit_constrained(self, constraints, start_params=None, **fit_kwds):
742        """
743        Fit the model with some parameters subject to equality constraints.
744
745        Parameters
746        ----------
747        constraints : dict
748            Dictionary of constraints, of the form `param_name: fixed_value`.
749            See the `param_names` property for valid parameter names.
750        start_params : array_like, optional
751            Initial guess of the solution for the loglikelihood maximization.
752            If None, the default is given by Model.start_params.
753        **fit_kwds : keyword arguments
754            fit_kwds are used in the optimization of the remaining parameters.
755
756        Returns
757        -------
758        results : Results instance
759
760        Examples
761        --------
762        >>> mod = sm.tsa.SARIMAX(endog, order=(1, 0, 1))
763        >>> res = mod.fit_constrained({'ar.L1': 0.5})
764        """
765        with self.fix_params(constraints):
766            res = self.fit(start_params, **fit_kwds)
767        return res
768
769    @property
770    def _res_classes(self):
771        return {'fit': (MLEResults, MLEResultsWrapper)}
772
773    def _wrap_results(self, params, result, return_raw, cov_type=None,
774                      cov_kwds=None, results_class=None, wrapper_class=None):
775        if not return_raw:
776            # Wrap in a results object
777            result_kwargs = {}
778            if cov_type is not None:
779                result_kwargs['cov_type'] = cov_type
780            if cov_kwds is not None:
781                result_kwargs['cov_kwds'] = cov_kwds
782
783            if results_class is None:
784                results_class = self._res_classes['fit'][0]
785            if wrapper_class is None:
786                wrapper_class = self._res_classes['fit'][1]
787
788            res = results_class(self, params, result, **result_kwargs)
789            result = wrapper_class(res)
790        return result
791
792    def filter(self, params, transformed=True, includes_fixed=False,
793               complex_step=False, cov_type=None, cov_kwds=None,
794               return_ssm=False, results_class=None,
795               results_wrapper_class=None, low_memory=False, **kwargs):
796        """
797        Kalman filtering
798
799        Parameters
800        ----------
801        params : array_like
802            Array of parameters at which to evaluate the loglikelihood
803            function.
804        transformed : bool, optional
805            Whether or not `params` is already transformed. Default is True.
806        return_ssm : bool,optional
807            Whether or not to return only the state space output or a full
808            results object. Default is to return a full results object.
809        cov_type : str, optional
810            See `MLEResults.fit` for a description of covariance matrix types
811            for results object.
812        cov_kwds : dict or None, optional
813            See `MLEResults.get_robustcov_results` for a description required
814            keywords for alternative covariance estimators
815        low_memory : bool, optional
816            If set to True, techniques are applied to substantially reduce
817            memory usage. If used, some features of the results object will
818            not be available (including in-sample prediction), although
819            out-of-sample forecasting is possible. Default is False.
820        **kwargs
821            Additional keyword arguments to pass to the Kalman filter. See
822            `KalmanFilter.filter` for more details.
823        """
824        params = self.handle_params(params, transformed=transformed,
825                                    includes_fixed=includes_fixed)
826        self.update(params, transformed=True, includes_fixed=True,
827                    complex_step=complex_step)
828
829        # Save the parameter names
830        self.data.param_names = self.param_names
831
832        if complex_step:
833            kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
834
835        # Handle memory conservation
836        if low_memory:
837            kwargs['conserve_memory'] = MEMORY_CONSERVE
838
839        # Get the state space output
840        result = self.ssm.filter(complex_step=complex_step, **kwargs)
841
842        # Wrap in a results object
843        return self._wrap_results(params, result, return_ssm, cov_type,
844                                  cov_kwds, results_class,
845                                  results_wrapper_class)
846
847    def smooth(self, params, transformed=True, includes_fixed=False,
848               complex_step=False, cov_type=None, cov_kwds=None,
849               return_ssm=False, results_class=None,
850               results_wrapper_class=None, **kwargs):
851        """
852        Kalman smoothing
853
854        Parameters
855        ----------
856        params : array_like
857            Array of parameters at which to evaluate the loglikelihood
858            function.
859        transformed : bool, optional
860            Whether or not `params` is already transformed. Default is True.
861        return_ssm : bool,optional
862            Whether or not to return only the state space output or a full
863            results object. Default is to return a full results object.
864        cov_type : str, optional
865            See `MLEResults.fit` for a description of covariance matrix types
866            for results object.
867        cov_kwds : dict or None, optional
868            See `MLEResults.get_robustcov_results` for a description required
869            keywords for alternative covariance estimators
870        **kwargs
871            Additional keyword arguments to pass to the Kalman filter. See
872            `KalmanFilter.filter` for more details.
873        """
874        params = self.handle_params(params, transformed=transformed,
875                                    includes_fixed=includes_fixed)
876        self.update(params, transformed=True, includes_fixed=True,
877                    complex_step=complex_step)
878
879        # Save the parameter names
880        self.data.param_names = self.param_names
881
882        if complex_step:
883            kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
884
885        # Get the state space output
886        result = self.ssm.smooth(complex_step=complex_step, **kwargs)
887
888        # Wrap in a results object
889        return self._wrap_results(params, result, return_ssm, cov_type,
890                                  cov_kwds, results_class,
891                                  results_wrapper_class)
892
893    _loglike_param_names = ['transformed', 'includes_fixed', 'complex_step']
894    _loglike_param_defaults = [True, False, False]
895
896    def loglike(self, params, *args, **kwargs):
897        """
898        Loglikelihood evaluation
899
900        Parameters
901        ----------
902        params : array_like
903            Array of parameters at which to evaluate the loglikelihood
904            function.
905        transformed : bool, optional
906            Whether or not `params` is already transformed. Default is True.
907        **kwargs
908            Additional keyword arguments to pass to the Kalman filter. See
909            `KalmanFilter.filter` for more details.
910
911        See Also
912        --------
913        update : modifies the internal state of the state space model to
914                 reflect new params
915
916        Notes
917        -----
918        [1]_ recommend maximizing the average likelihood to avoid scale issues;
919        this is done automatically by the base Model fit method.
920
921        References
922        ----------
923        .. [1] Koopman, Siem Jan, Neil Shephard, and Jurgen A. Doornik. 1999.
924           Statistical Algorithms for Models in State Space Using SsfPack 2.2.
925           Econometrics Journal 2 (1): 107-60. doi:10.1111/1368-423X.00023.
926        """
927        transformed, includes_fixed, complex_step, kwargs = _handle_args(
928            MLEModel._loglike_param_names, MLEModel._loglike_param_defaults,
929            *args, **kwargs)
930
931        params = self.handle_params(params, transformed=transformed,
932                                    includes_fixed=includes_fixed)
933        self.update(params, transformed=True, includes_fixed=True,
934                    complex_step=complex_step)
935
936        if complex_step:
937            kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
938
939        loglike = self.ssm.loglike(complex_step=complex_step, **kwargs)
940
941        # Koopman, Shephard, and Doornik recommend maximizing the average
942        # likelihood to avoid scale issues, but the averaging is done
943        # automatically in the base model `fit` method
944        return loglike
945
946    def loglikeobs(self, params, transformed=True, includes_fixed=False,
947                   complex_step=False, **kwargs):
948        """
949        Loglikelihood evaluation
950
951        Parameters
952        ----------
953        params : array_like
954            Array of parameters at which to evaluate the loglikelihood
955            function.
956        transformed : bool, optional
957            Whether or not `params` is already transformed. Default is True.
958        **kwargs
959            Additional keyword arguments to pass to the Kalman filter. See
960            `KalmanFilter.filter` for more details.
961
962        See Also
963        --------
964        update : modifies the internal state of the Model to reflect new params
965
966        Notes
967        -----
968        [1]_ recommend maximizing the average likelihood to avoid scale issues;
969        this is done automatically by the base Model fit method.
970
971        References
972        ----------
973        .. [1] Koopman, Siem Jan, Neil Shephard, and Jurgen A. Doornik. 1999.
974           Statistical Algorithms for Models in State Space Using SsfPack 2.2.
975           Econometrics Journal 2 (1): 107-60. doi:10.1111/1368-423X.00023.
976        """
977        params = self.handle_params(params, transformed=transformed,
978                                    includes_fixed=includes_fixed)
979
980        # If we're using complex-step differentiation, then we cannot use
981        # Cholesky factorization
982        if complex_step:
983            kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
984
985        self.update(params, transformed=True, includes_fixed=True,
986                    complex_step=complex_step)
987
988        return self.ssm.loglikeobs(complex_step=complex_step, **kwargs)
989
990    def simulation_smoother(self, simulation_output=None, **kwargs):
991        r"""
992        Retrieve a simulation smoother for the state space model.
993
994        Parameters
995        ----------
996        simulation_output : int, optional
997            Determines which simulation smoother output is calculated.
998            Default is all (including state and disturbances).
999        **kwargs
1000            Additional keyword arguments, used to set the simulation output.
1001            See `set_simulation_output` for more details.
1002
1003        Returns
1004        -------
1005        SimulationSmoothResults
1006        """
1007        return self.ssm.simulation_smoother(
1008            simulation_output=simulation_output, **kwargs)
1009
1010    def _forecasts_error_partial_derivatives(self, params, transformed=True,
1011                                             includes_fixed=False,
1012                                             approx_complex_step=None,
1013                                             approx_centered=False,
1014                                             res=None, **kwargs):
1015        params = np.array(params, ndmin=1)
1016
1017        # We cannot use complex-step differentiation with non-transformed
1018        # parameters
1019        if approx_complex_step is None:
1020            approx_complex_step = transformed
1021        if not transformed and approx_complex_step:
1022            raise ValueError("Cannot use complex-step approximations to"
1023                             " calculate the observed_information_matrix"
1024                             " with untransformed parameters.")
1025
1026        # If we're using complex-step differentiation, then we cannot use
1027        # Cholesky factorization
1028        if approx_complex_step:
1029            kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
1030
1031        # Get values at the params themselves
1032        if res is None:
1033            self.update(params, transformed=transformed,
1034                        includes_fixed=includes_fixed,
1035                        complex_step=approx_complex_step)
1036            res = self.ssm.filter(complex_step=approx_complex_step, **kwargs)
1037
1038        # Setup
1039        n = len(params)
1040
1041        # Compute partial derivatives w.r.t. forecast error and forecast
1042        # error covariance
1043        partials_forecasts_error = (
1044            np.zeros((self.k_endog, self.nobs, n))
1045        )
1046        partials_forecasts_error_cov = (
1047            np.zeros((self.k_endog, self.k_endog, self.nobs, n))
1048        )
1049        if approx_complex_step:
1050            epsilon = _get_epsilon(params, 2, None, n)
1051            increments = np.identity(n) * 1j * epsilon
1052
1053            for i, ih in enumerate(increments):
1054                self.update(params + ih, transformed=transformed,
1055                            includes_fixed=includes_fixed,
1056                            complex_step=True)
1057                _res = self.ssm.filter(complex_step=True, **kwargs)
1058
1059                partials_forecasts_error[:, :, i] = (
1060                    _res.forecasts_error.imag / epsilon[i]
1061                )
1062
1063                partials_forecasts_error_cov[:, :, :, i] = (
1064                    _res.forecasts_error_cov.imag / epsilon[i]
1065                )
1066        elif not approx_centered:
1067            epsilon = _get_epsilon(params, 2, None, n)
1068            ei = np.zeros((n,), float)
1069            for i in range(n):
1070                ei[i] = epsilon[i]
1071                self.update(params + ei, transformed=transformed,
1072                            includes_fixed=includes_fixed, complex_step=False)
1073                _res = self.ssm.filter(complex_step=False, **kwargs)
1074
1075                partials_forecasts_error[:, :, i] = (
1076                    _res.forecasts_error - res.forecasts_error) / epsilon[i]
1077
1078                partials_forecasts_error_cov[:, :, :, i] = (
1079                    _res.forecasts_error_cov -
1080                    res.forecasts_error_cov) / epsilon[i]
1081                ei[i] = 0.0
1082        else:
1083            epsilon = _get_epsilon(params, 3, None, n) / 2.
1084            ei = np.zeros((n,), float)
1085            for i in range(n):
1086                ei[i] = epsilon[i]
1087
1088                self.update(params + ei, transformed=transformed,
1089                            includes_fixed=includes_fixed, complex_step=False)
1090                _res1 = self.ssm.filter(complex_step=False, **kwargs)
1091
1092                self.update(params - ei, transformed=transformed,
1093                            includes_fixed=includes_fixed, complex_step=False)
1094                _res2 = self.ssm.filter(complex_step=False, **kwargs)
1095
1096                partials_forecasts_error[:, :, i] = (
1097                    (_res1.forecasts_error - _res2.forecasts_error) /
1098                    (2 * epsilon[i]))
1099
1100                partials_forecasts_error_cov[:, :, :, i] = (
1101                    (_res1.forecasts_error_cov - _res2.forecasts_error_cov) /
1102                    (2 * epsilon[i]))
1103
1104                ei[i] = 0.0
1105
1106        return partials_forecasts_error, partials_forecasts_error_cov
1107
1108    def observed_information_matrix(self, params, transformed=True,
1109                                    includes_fixed=False,
1110                                    approx_complex_step=None,
1111                                    approx_centered=False, **kwargs):
1112        """
1113        Observed information matrix
1114
1115        Parameters
1116        ----------
1117        params : array_like, optional
1118            Array of parameters at which to evaluate the loglikelihood
1119            function.
1120        **kwargs
1121            Additional keyword arguments to pass to the Kalman filter. See
1122            `KalmanFilter.filter` for more details.
1123
1124        Notes
1125        -----
1126        This method is from Harvey (1989), which shows that the information
1127        matrix only depends on terms from the gradient. This implementation is
1128        partially analytic and partially numeric approximation, therefore,
1129        because it uses the analytic formula for the information matrix, with
1130        numerically computed elements of the gradient.
1131
1132        References
1133        ----------
1134        Harvey, Andrew C. 1990.
1135        Forecasting, Structural Time Series Models and the Kalman Filter.
1136        Cambridge University Press.
1137        """
1138        params = np.array(params, ndmin=1)
1139
1140        # Setup
1141        n = len(params)
1142
1143        # We cannot use complex-step differentiation with non-transformed
1144        # parameters
1145        if approx_complex_step is None:
1146            approx_complex_step = transformed
1147        if not transformed and approx_complex_step:
1148            raise ValueError("Cannot use complex-step approximations to"
1149                             " calculate the observed_information_matrix"
1150                             " with untransformed parameters.")
1151
1152        # Get values at the params themselves
1153        params = self.handle_params(params, transformed=transformed,
1154                                    includes_fixed=includes_fixed)
1155        self.update(params, transformed=True, includes_fixed=True,
1156                    complex_step=approx_complex_step)
1157        # If we're using complex-step differentiation, then we cannot use
1158        # Cholesky factorization
1159        if approx_complex_step:
1160            kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
1161        res = self.ssm.filter(complex_step=approx_complex_step, **kwargs)
1162        dtype = self.ssm.dtype
1163
1164        # Save this for inversion later
1165        inv_forecasts_error_cov = res.forecasts_error_cov.copy()
1166
1167        partials_forecasts_error, partials_forecasts_error_cov = (
1168            self._forecasts_error_partial_derivatives(
1169                params, transformed=transformed, includes_fixed=includes_fixed,
1170                approx_complex_step=approx_complex_step,
1171                approx_centered=approx_centered, res=res, **kwargs))
1172
1173        # Compute the information matrix
1174        tmp = np.zeros((self.k_endog, self.k_endog, self.nobs, n), dtype=dtype)
1175
1176        information_matrix = np.zeros((n, n), dtype=dtype)
1177        d = np.maximum(self.ssm.loglikelihood_burn, res.nobs_diffuse)
1178        for t in range(d, self.nobs):
1179            inv_forecasts_error_cov[:, :, t] = (
1180                np.linalg.inv(res.forecasts_error_cov[:, :, t])
1181            )
1182            for i in range(n):
1183                tmp[:, :, t, i] = np.dot(
1184                    inv_forecasts_error_cov[:, :, t],
1185                    partials_forecasts_error_cov[:, :, t, i]
1186                )
1187            for i in range(n):
1188                for j in range(n):
1189                    information_matrix[i, j] += (
1190                        0.5 * np.trace(np.dot(tmp[:, :, t, i],
1191                                              tmp[:, :, t, j]))
1192                    )
1193                    information_matrix[i, j] += np.inner(
1194                        partials_forecasts_error[:, t, i],
1195                        np.dot(inv_forecasts_error_cov[:, :, t],
1196                               partials_forecasts_error[:, t, j])
1197                    )
1198        return information_matrix / (self.nobs - self.ssm.loglikelihood_burn)
1199
1200    def opg_information_matrix(self, params, transformed=True,
1201                               includes_fixed=False, approx_complex_step=None,
1202                               **kwargs):
1203        """
1204        Outer product of gradients information matrix
1205
1206        Parameters
1207        ----------
1208        params : array_like, optional
1209            Array of parameters at which to evaluate the loglikelihood
1210            function.
1211        **kwargs
1212            Additional arguments to the `loglikeobs` method.
1213
1214        References
1215        ----------
1216        Berndt, Ernst R., Bronwyn Hall, Robert Hall, and Jerry Hausman. 1974.
1217        Estimation and Inference in Nonlinear Structural Models.
1218        NBER Chapters. National Bureau of Economic Research, Inc.
1219        """
1220        # We cannot use complex-step differentiation with non-transformed
1221        # parameters
1222        if approx_complex_step is None:
1223            approx_complex_step = transformed
1224        if not transformed and approx_complex_step:
1225            raise ValueError("Cannot use complex-step approximations to"
1226                             " calculate the observed_information_matrix"
1227                             " with untransformed parameters.")
1228
1229        score_obs = self.score_obs(params, transformed=transformed,
1230                                   includes_fixed=includes_fixed,
1231                                   approx_complex_step=approx_complex_step,
1232                                   **kwargs).transpose()
1233        return (
1234            np.inner(score_obs, score_obs) /
1235            (self.nobs - self.ssm.loglikelihood_burn)
1236        )
1237
1238    def _score_complex_step(self, params, **kwargs):
1239        # the default epsilon can be too small
1240        # inversion_method = INVERT_UNIVARIATE | SOLVE_LU
1241        epsilon = _get_epsilon(params, 2., None, len(params))
1242        kwargs['transformed'] = True
1243        kwargs['complex_step'] = True
1244        return approx_fprime_cs(params, self.loglike, epsilon=epsilon,
1245                                kwargs=kwargs)
1246
1247    def _score_finite_difference(self, params, approx_centered=False,
1248                                 **kwargs):
1249        kwargs['transformed'] = True
1250        return approx_fprime(params, self.loglike, kwargs=kwargs,
1251                             centered=approx_centered)
1252
1253    def _score_harvey(self, params, approx_complex_step=True, **kwargs):
1254        score_obs = self._score_obs_harvey(
1255            params, approx_complex_step=approx_complex_step, **kwargs)
1256        return np.sum(score_obs, axis=0)
1257
1258    def _score_obs_harvey(self, params, approx_complex_step=True,
1259                          approx_centered=False, includes_fixed=False,
1260                          **kwargs):
1261        """
1262        Score
1263
1264        Parameters
1265        ----------
1266        params : array_like, optional
1267            Array of parameters at which to evaluate the loglikelihood
1268            function.
1269        **kwargs
1270            Additional keyword arguments to pass to the Kalman filter. See
1271            `KalmanFilter.filter` for more details.
1272
1273        Notes
1274        -----
1275        This method is from Harvey (1989), section 3.4.5
1276
1277        References
1278        ----------
1279        Harvey, Andrew C. 1990.
1280        Forecasting, Structural Time Series Models and the Kalman Filter.
1281        Cambridge University Press.
1282        """
1283        params = np.array(params, ndmin=1)
1284        n = len(params)
1285
1286        # Get values at the params themselves
1287        self.update(params, transformed=True, includes_fixed=includes_fixed,
1288                    complex_step=approx_complex_step)
1289        if approx_complex_step:
1290            kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
1291        if 'transformed' in kwargs:
1292            del kwargs['transformed']
1293        res = self.ssm.filter(complex_step=approx_complex_step, **kwargs)
1294
1295        # Get forecasts error partials
1296        partials_forecasts_error, partials_forecasts_error_cov = (
1297            self._forecasts_error_partial_derivatives(
1298                params, transformed=True, includes_fixed=includes_fixed,
1299                approx_complex_step=approx_complex_step,
1300                approx_centered=approx_centered, res=res, **kwargs))
1301
1302        # Compute partial derivatives w.r.t. likelihood function
1303        partials = np.zeros((self.nobs, n))
1304        k_endog = self.k_endog
1305        for t in range(self.nobs):
1306            inv_forecasts_error_cov = np.linalg.inv(
1307                    res.forecasts_error_cov[:, :, t])
1308
1309            for i in range(n):
1310                partials[t, i] += np.trace(np.dot(
1311                    np.dot(inv_forecasts_error_cov,
1312                           partials_forecasts_error_cov[:, :, t, i]),
1313                    (np.eye(k_endog) -
1314                     np.dot(inv_forecasts_error_cov,
1315                            np.outer(res.forecasts_error[:, t],
1316                                     res.forecasts_error[:, t])))))
1317                # 2 * dv / di * F^{-1} v_t
1318                # where x = F^{-1} v_t or F x = v
1319                partials[t, i] += 2 * np.dot(
1320                    partials_forecasts_error[:, t, i],
1321                    np.dot(inv_forecasts_error_cov, res.forecasts_error[:, t]))
1322
1323        return -partials / 2.
1324
1325    _score_param_names = ['transformed', 'includes_fixed', 'score_method',
1326                          'approx_complex_step', 'approx_centered']
1327    _score_param_defaults = [True, False, 'approx', None, False]
1328
1329    def score(self, params, *args, **kwargs):
1330        """
1331        Compute the score function at params.
1332
1333        Parameters
1334        ----------
1335        params : array_like
1336            Array of parameters at which to evaluate the score.
1337        *args
1338            Additional positional arguments to the `loglike` method.
1339        **kwargs
1340            Additional keyword arguments to the `loglike` method.
1341
1342        Returns
1343        -------
1344        score : ndarray
1345            Score, evaluated at `params`.
1346
1347        Notes
1348        -----
1349        This is a numerical approximation, calculated using first-order complex
1350        step differentiation on the `loglike` method.
1351
1352        Both args and kwargs are necessary because the optimizer from
1353        `fit` must call this function and only supports passing arguments via
1354        args (for example `scipy.optimize.fmin_l_bfgs`).
1355        """
1356        (transformed, includes_fixed, method, approx_complex_step,
1357         approx_centered, kwargs) = (
1358            _handle_args(MLEModel._score_param_names,
1359                         MLEModel._score_param_defaults, *args, **kwargs))
1360        # For fit() calls, the method is called 'score_method' (to distinguish
1361        # it from the method used for fit) but generally in kwargs the method
1362        # will just be called 'method'
1363        if 'method' in kwargs:
1364            method = kwargs.pop('method')
1365
1366        if approx_complex_step is None:
1367            approx_complex_step = not self.ssm._complex_endog
1368        if approx_complex_step and self.ssm._complex_endog:
1369            raise ValueError('Cannot use complex step derivatives when data'
1370                             ' or parameters are complex.')
1371
1372        out = self.handle_params(
1373            params, transformed=transformed, includes_fixed=includes_fixed,
1374            return_jacobian=not transformed)
1375        if transformed:
1376            params = out
1377        else:
1378            params, transform_score = out
1379
1380        if method == 'harvey':
1381            kwargs['includes_fixed'] = True
1382            score = self._score_harvey(
1383                params, approx_complex_step=approx_complex_step, **kwargs)
1384        elif method == 'approx' and approx_complex_step:
1385            kwargs['includes_fixed'] = True
1386            score = self._score_complex_step(params, **kwargs)
1387        elif method == 'approx':
1388            kwargs['includes_fixed'] = True
1389            score = self._score_finite_difference(
1390                params, approx_centered=approx_centered, **kwargs)
1391        else:
1392            raise NotImplementedError('Invalid score method.')
1393
1394        if not transformed:
1395            score = np.dot(transform_score, score)
1396
1397        if self._has_fixed_params and not includes_fixed:
1398            score = score[self._free_params_index]
1399
1400        return score
1401
1402    def score_obs(self, params, method='approx', transformed=True,
1403                  includes_fixed=False, approx_complex_step=None,
1404                  approx_centered=False, **kwargs):
1405        """
1406        Compute the score per observation, evaluated at params
1407
1408        Parameters
1409        ----------
1410        params : array_like
1411            Array of parameters at which to evaluate the score.
1412        **kwargs
1413            Additional arguments to the `loglike` method.
1414
1415        Returns
1416        -------
1417        score : ndarray
1418            Score per observation, evaluated at `params`.
1419
1420        Notes
1421        -----
1422        This is a numerical approximation, calculated using first-order complex
1423        step differentiation on the `loglikeobs` method.
1424        """
1425        if not transformed and approx_complex_step:
1426            raise ValueError("Cannot use complex-step approximations to"
1427                             " calculate the score at each observation"
1428                             " with untransformed parameters.")
1429
1430        if approx_complex_step is None:
1431            approx_complex_step = not self.ssm._complex_endog
1432        if approx_complex_step and self.ssm._complex_endog:
1433            raise ValueError('Cannot use complex step derivatives when data'
1434                             ' or parameters are complex.')
1435
1436        params = self.handle_params(params, transformed=True,
1437                                    includes_fixed=includes_fixed)
1438        kwargs['transformed'] = transformed
1439        kwargs['includes_fixed'] = True
1440
1441        if method == 'harvey':
1442            score = self._score_obs_harvey(
1443                params, approx_complex_step=approx_complex_step, **kwargs)
1444        elif method == 'approx' and approx_complex_step:
1445            # the default epsilon can be too small
1446            epsilon = _get_epsilon(params, 2., None, len(params))
1447            kwargs['complex_step'] = True
1448            score = approx_fprime_cs(params, self.loglikeobs, epsilon=epsilon,
1449                                     kwargs=kwargs)
1450        elif method == 'approx':
1451            score = approx_fprime(params, self.loglikeobs, kwargs=kwargs,
1452                                  centered=approx_centered)
1453        else:
1454            raise NotImplementedError('Invalid scoreobs method.')
1455
1456        return score
1457
1458    _hessian_param_names = ['transformed', 'hessian_method',
1459                            'approx_complex_step', 'approx_centered']
1460    _hessian_param_defaults = [True, 'approx', None, False]
1461
1462    def hessian(self, params, *args, **kwargs):
1463        r"""
1464        Hessian matrix of the likelihood function, evaluated at the given
1465        parameters
1466
1467        Parameters
1468        ----------
1469        params : array_like
1470            Array of parameters at which to evaluate the hessian.
1471        *args
1472            Additional positional arguments to the `loglike` method.
1473        **kwargs
1474            Additional keyword arguments to the `loglike` method.
1475
1476        Returns
1477        -------
1478        hessian : ndarray
1479            Hessian matrix evaluated at `params`
1480
1481        Notes
1482        -----
1483        This is a numerical approximation.
1484
1485        Both args and kwargs are necessary because the optimizer from
1486        `fit` must call this function and only supports passing arguments via
1487        args (for example `scipy.optimize.fmin_l_bfgs`).
1488        """
1489        transformed, method, approx_complex_step, approx_centered, kwargs = (
1490            _handle_args(MLEModel._hessian_param_names,
1491                         MLEModel._hessian_param_defaults,
1492                         *args, **kwargs))
1493        # For fit() calls, the method is called 'hessian_method' (to
1494        # distinguish it from the method used for fit) but generally in kwargs
1495        # the method will just be called 'method'
1496        if 'method' in kwargs:
1497            method = kwargs.pop('method')
1498
1499        if not transformed and approx_complex_step:
1500            raise ValueError("Cannot use complex-step approximations to"
1501                             " calculate the hessian with untransformed"
1502                             " parameters.")
1503
1504        if approx_complex_step is None:
1505            approx_complex_step = not self.ssm._complex_endog
1506        if approx_complex_step and self.ssm._complex_endog:
1507            raise ValueError('Cannot use complex step derivatives when data'
1508                             ' or parameters are complex.')
1509
1510        if method == 'oim':
1511            hessian = self._hessian_oim(
1512                params, transformed=transformed,
1513                approx_complex_step=approx_complex_step,
1514                approx_centered=approx_centered, **kwargs)
1515        elif method == 'opg':
1516            hessian = self._hessian_opg(
1517                params, transformed=transformed,
1518                approx_complex_step=approx_complex_step,
1519                approx_centered=approx_centered, **kwargs)
1520        elif method == 'approx' and approx_complex_step:
1521            hessian = self._hessian_complex_step(
1522                params, transformed=transformed, **kwargs)
1523        elif method == 'approx':
1524            hessian = self._hessian_finite_difference(
1525                params, transformed=transformed,
1526                approx_centered=approx_centered, **kwargs)
1527        else:
1528            raise NotImplementedError('Invalid Hessian calculation method.')
1529        return hessian
1530
1531    def _hessian_oim(self, params, **kwargs):
1532        """
1533        Hessian matrix computed using the Harvey (1989) information matrix
1534        """
1535        return -self.observed_information_matrix(params, **kwargs)
1536
1537    def _hessian_opg(self, params, **kwargs):
1538        """
1539        Hessian matrix computed using the outer product of gradients
1540        information matrix
1541        """
1542        return -self.opg_information_matrix(params, **kwargs)
1543
1544    def _hessian_finite_difference(self, params, approx_centered=False,
1545                                   **kwargs):
1546        params = np.array(params, ndmin=1)
1547
1548        warnings.warn('Calculation of the Hessian using finite differences'
1549                      ' is usually subject to substantial approximation'
1550                      ' errors.', PrecisionWarning)
1551
1552        if not approx_centered:
1553            epsilon = _get_epsilon(params, 3, None, len(params))
1554        else:
1555            epsilon = _get_epsilon(params, 4, None, len(params)) / 2
1556        hessian = approx_fprime(params, self._score_finite_difference,
1557                                epsilon=epsilon, kwargs=kwargs,
1558                                centered=approx_centered)
1559
1560        return hessian / (self.nobs - self.ssm.loglikelihood_burn)
1561
1562    def _hessian_complex_step(self, params, **kwargs):
1563        """
1564        Hessian matrix computed by second-order complex-step differentiation
1565        on the `loglike` function.
1566        """
1567        # the default epsilon can be too small
1568        epsilon = _get_epsilon(params, 3., None, len(params))
1569        kwargs['transformed'] = True
1570        kwargs['complex_step'] = True
1571        hessian = approx_hess_cs(
1572            params, self.loglike, epsilon=epsilon, kwargs=kwargs)
1573
1574        return hessian / (self.nobs - self.ssm.loglikelihood_burn)
1575
1576    @property
1577    def start_params(self):
1578        """
1579        (array) Starting parameters for maximum likelihood estimation.
1580        """
1581        if hasattr(self, '_start_params'):
1582            return self._start_params
1583        else:
1584            raise NotImplementedError
1585
1586    @property
1587    def param_names(self):
1588        """
1589        (list of str) List of human readable parameter names (for parameters
1590        actually included in the model).
1591        """
1592        if hasattr(self, '_param_names'):
1593            return self._param_names
1594        else:
1595            try:
1596                names = ['param.%d' % i for i in range(len(self.start_params))]
1597            except NotImplementedError:
1598                names = []
1599            return names
1600
1601    @property
1602    def state_names(self):
1603        """
1604        (list of str) List of human readable names for unobserved states.
1605        """
1606        if hasattr(self, '_state_names'):
1607            return self._state_names
1608        else:
1609            names = ['state.%d' % i for i in range(self.k_states)]
1610        return names
1611
1612    def transform_jacobian(self, unconstrained, approx_centered=False):
1613        """
1614        Jacobian matrix for the parameter transformation function
1615
1616        Parameters
1617        ----------
1618        unconstrained : array_like
1619            Array of unconstrained parameters used by the optimizer.
1620
1621        Returns
1622        -------
1623        jacobian : ndarray
1624            Jacobian matrix of the transformation, evaluated at `unconstrained`
1625
1626        See Also
1627        --------
1628        transform_params
1629
1630        Notes
1631        -----
1632        This is a numerical approximation using finite differences. Note that
1633        in general complex step methods cannot be used because it is not
1634        guaranteed that the `transform_params` method is a real function (e.g.
1635        if Cholesky decomposition is used).
1636        """
1637        return approx_fprime(unconstrained, self.transform_params,
1638                             centered=approx_centered)
1639
1640    def transform_params(self, unconstrained):
1641        """
1642        Transform unconstrained parameters used by the optimizer to constrained
1643        parameters used in likelihood evaluation
1644
1645        Parameters
1646        ----------
1647        unconstrained : array_like
1648            Array of unconstrained parameters used by the optimizer, to be
1649            transformed.
1650
1651        Returns
1652        -------
1653        constrained : array_like
1654            Array of constrained parameters which may be used in likelihood
1655            evaluation.
1656
1657        Notes
1658        -----
1659        This is a noop in the base class, subclasses should override where
1660        appropriate.
1661        """
1662        return np.array(unconstrained, ndmin=1)
1663
1664    def untransform_params(self, constrained):
1665        """
1666        Transform constrained parameters used in likelihood evaluation
1667        to unconstrained parameters used by the optimizer
1668
1669        Parameters
1670        ----------
1671        constrained : array_like
1672            Array of constrained parameters used in likelihood evaluation, to
1673            be transformed.
1674
1675        Returns
1676        -------
1677        unconstrained : array_like
1678            Array of unconstrained parameters used by the optimizer.
1679
1680        Notes
1681        -----
1682        This is a noop in the base class, subclasses should override where
1683        appropriate.
1684        """
1685        return np.array(constrained, ndmin=1)
1686
1687    def handle_params(self, params, transformed=True, includes_fixed=False,
1688                      return_jacobian=False):
1689        """
1690        Ensure model parameters satisfy shape and other requirements
1691        """
1692        params = np.array(params, ndmin=1)
1693
1694        # Never want integer dtype, so convert to floats
1695        if np.issubdtype(params.dtype, np.integer):
1696            params = params.astype(np.float64)
1697
1698        if not includes_fixed and self._has_fixed_params:
1699            k_params = len(self.param_names)
1700            new_params = np.zeros(k_params, dtype=params.dtype) * np.nan
1701            new_params[self._free_params_index] = params
1702            params = new_params
1703
1704        if not transformed:
1705            # It may be the case that the transformation relies on having
1706            # "some" (non-NaN) values for the fixed parameters, even if we will
1707            # not actually be transforming the fixed parameters (as they will)
1708            # be set below regardless
1709            if not includes_fixed and self._has_fixed_params:
1710                params[self._fixed_params_index] = (
1711                    list(self._fixed_params.values()))
1712
1713            if return_jacobian:
1714                transform_score = self.transform_jacobian(params)
1715            params = self.transform_params(params)
1716
1717        if not includes_fixed and self._has_fixed_params:
1718            params[self._fixed_params_index] = (
1719                list(self._fixed_params.values()))
1720
1721        return (params, transform_score) if return_jacobian else params
1722
1723    def update(self, params, transformed=True, includes_fixed=False,
1724               complex_step=False):
1725        """
1726        Update the parameters of the model
1727
1728        Parameters
1729        ----------
1730        params : array_like
1731            Array of new parameters.
1732        transformed : bool, optional
1733            Whether or not `params` is already transformed. If set to False,
1734            `transform_params` is called. Default is True.
1735
1736        Returns
1737        -------
1738        params : array_like
1739            Array of parameters.
1740
1741        Notes
1742        -----
1743        Since Model is a base class, this method should be overridden by
1744        subclasses to perform actual updating steps.
1745        """
1746        return self.handle_params(params=params, transformed=transformed,
1747                                  includes_fixed=includes_fixed)
1748
1749    def _validate_out_of_sample_exog(self, exog, out_of_sample):
1750        """
1751        Validate given `exog` as satisfactory for out-of-sample operations
1752
1753        Parameters
1754        ----------
1755        exog : array_like or None
1756            New observations of exogenous regressors, if applicable.
1757        out_of_sample : int
1758            Number of new observations required.
1759
1760        Returns
1761        -------
1762        exog : array or None
1763            A numpy array of shape (out_of_sample, k_exog) if the model
1764            contains an `exog` component, or None if it does not.
1765        """
1766        if out_of_sample and self.k_exog > 0:
1767            if exog is None:
1768                raise ValueError('Out-of-sample operations in a model'
1769                                 ' with a regression component require'
1770                                 ' additional exogenous values via the'
1771                                 ' `exog` argument.')
1772            exog = np.array(exog)
1773            required_exog_shape = (out_of_sample, self.k_exog)
1774            try:
1775                exog = exog.reshape(required_exog_shape)
1776            except ValueError:
1777                raise ValueError('Provided exogenous values are not of the'
1778                                 ' appropriate shape. Required %s, got %s.'
1779                                 % (str(required_exog_shape),
1780                                    str(exog.shape)))
1781        elif self.k_exog > 0 and exog is not None:
1782            exog = None
1783            warnings.warn('Exogenous array provided, but additional data'
1784                          ' is not required. `exog` argument ignored.',
1785                          ValueWarning)
1786
1787        return exog
1788
1789    def _get_extension_time_varying_matrices(
1790            self, params, exog, out_of_sample, extend_kwargs=None,
1791            transformed=True, includes_fixed=False, **kwargs):
1792        """
1793        Get updated time-varying state space system matrices
1794
1795        Parameters
1796        ----------
1797        params : array_like
1798            Array of parameters used to construct the time-varying system
1799            matrices.
1800        exog : array_like or None
1801            New observations of exogenous regressors, if applicable.
1802        out_of_sample : int
1803            Number of new observations required.
1804        extend_kwargs : dict, optional
1805            Dictionary of keyword arguments to pass to the state space model
1806            constructor. For example, for an SARIMAX state space model, this
1807            could be used to pass the `concentrate_scale=True` keyword
1808            argument. Any arguments that are not explicitly set in this
1809            dictionary will be copied from the current model instance.
1810        transformed : bool, optional
1811            Whether or not `start_params` is already transformed. Default is
1812            True.
1813        includes_fixed : bool, optional
1814            If parameters were previously fixed with the `fix_params` method,
1815            this argument describes whether or not `start_params` also includes
1816            the fixed parameters, in addition to the free parameters. Default
1817            is False.
1818        """
1819        # Get the appropriate exog for the extended sample
1820        exog = self._validate_out_of_sample_exog(exog, out_of_sample)
1821
1822        # Create extended model
1823        if extend_kwargs is None:
1824            extend_kwargs = {}
1825
1826        # Handle trend offset for extended model
1827        if getattr(self, 'k_trend', 0) > 0 and hasattr(self, 'trend_offset'):
1828            extend_kwargs.setdefault(
1829                'trend_offset', self.trend_offset + self.nobs)
1830
1831        mod_extend = self.clone(
1832            endog=np.zeros((out_of_sample, self.k_endog)), exog=exog,
1833            **extend_kwargs)
1834        mod_extend.update(params, transformed=transformed,
1835                          includes_fixed=includes_fixed)
1836
1837        # Retrieve the extensions to the time-varying system matrices and
1838        # put them in kwargs
1839        for name in self.ssm.shapes.keys():
1840            if name == 'obs' or name in kwargs:
1841                continue
1842            original = getattr(self.ssm, name)
1843            extended = getattr(mod_extend.ssm, name)
1844            so = original.shape[-1]
1845            se = extended.shape[-1]
1846            if ((so > 1 or se > 1) or (
1847                    so == 1 and self.nobs == 1 and
1848                    np.any(original[..., 0] != extended[..., 0]))):
1849                kwargs[name] = extended[..., -out_of_sample:]
1850
1851        return kwargs
1852
1853    def simulate(self, params, nsimulations, measurement_shocks=None,
1854                 state_shocks=None, initial_state=None, anchor=None,
1855                 repetitions=None, exog=None, extend_model=None,
1856                 extend_kwargs=None, transformed=True, includes_fixed=False,
1857                 **kwargs):
1858        r"""
1859        Simulate a new time series following the state space model
1860
1861        Parameters
1862        ----------
1863        params : array_like
1864            Array of parameters to use in constructing the state space
1865            representation to use when simulating.
1866        nsimulations : int
1867            The number of observations to simulate. If the model is
1868            time-invariant this can be any number. If the model is
1869            time-varying, then this number must be less than or equal to the
1870            number of observations.
1871        measurement_shocks : array_like, optional
1872            If specified, these are the shocks to the measurement equation,
1873            :math:`\varepsilon_t`. If unspecified, these are automatically
1874            generated using a pseudo-random number generator. If specified,
1875            must be shaped `nsimulations` x `k_endog`, where `k_endog` is the
1876            same as in the state space model.
1877        state_shocks : array_like, optional
1878            If specified, these are the shocks to the state equation,
1879            :math:`\eta_t`. If unspecified, these are automatically
1880            generated using a pseudo-random number generator. If specified,
1881            must be shaped `nsimulations` x `k_posdef` where `k_posdef` is the
1882            same as in the state space model.
1883        initial_state : array_like, optional
1884            If specified, this is the initial state vector to use in
1885            simulation, which should be shaped (`k_states` x 1), where
1886            `k_states` is the same as in the state space model. If unspecified,
1887            but the model has been initialized, then that initialization is
1888            used. This must be specified if `anchor` is anything other than
1889            "start" or 0 (or else you can use the `simulate` method on a
1890            results object rather than on the model object).
1891        anchor : int, str, or datetime, optional
1892            First period for simulation. The simulation will be conditional on
1893            all existing datapoints prior to the `anchor`.  Type depends on the
1894            index of the given `endog` in the model. Two special cases are the
1895            strings 'start' and 'end'. `start` refers to beginning the
1896            simulation at the first period of the sample, and `end` refers to
1897            beginning the simulation at the first period after the sample.
1898            Integer values can run from 0 to `nobs`, or can be negative to
1899            apply negative indexing. Finally, if a date/time index was provided
1900            to the model, then this argument can be a date string to parse or a
1901            datetime type. Default is 'start'.
1902        repetitions : int, optional
1903            Number of simulated paths to generate. Default is 1 simulated path.
1904        exog : array_like, optional
1905            New observations of exogenous regressors, if applicable.
1906        transformed : bool, optional
1907            Whether or not `params` is already transformed. Default is
1908            True.
1909        includes_fixed : bool, optional
1910            If parameters were previously fixed with the `fix_params` method,
1911            this argument describes whether or not `params` also includes
1912            the fixed parameters, in addition to the free parameters. Default
1913            is False.
1914
1915        Returns
1916        -------
1917        simulated_obs : ndarray
1918            An array of simulated observations. If `repetitions=None`, then it
1919            will be shaped (nsimulations x k_endog) or (nsimulations,) if
1920            `k_endog=1`. Otherwise it will be shaped
1921            (nsimulations x k_endog x repetitions). If the model was given
1922            Pandas input then the output will be a Pandas object. If
1923            `k_endog > 1` and `repetitions` is not None, then the output will
1924            be a Pandas DataFrame that has a MultiIndex for the columns, with
1925            the first level containing the names of the `endog` variables and
1926            the second level containing the repetition number.
1927        """
1928        # Make sure the model class has the current parameters
1929        self.update(params, transformed=transformed,
1930                    includes_fixed=includes_fixed)
1931
1932        # Get the starting location
1933        if anchor is None or anchor == 'start':
1934            iloc = 0
1935        elif anchor == 'end':
1936            iloc = self.nobs
1937        else:
1938            iloc, _, _ = self._get_index_loc(anchor)
1939            if isinstance(iloc, slice):
1940                iloc = iloc.start
1941
1942        if iloc < 0:
1943            iloc = self.nobs + iloc
1944        if iloc > self.nobs:
1945            raise ValueError('Cannot anchor simulation outside of the sample.')
1946
1947        if iloc > 0 and initial_state is None:
1948            raise ValueError('If `anchor` is after the start of the sample,'
1949                             ' must provide a value for `initial_state`.')
1950
1951        # Get updated time-varying system matrices in **kwargs, if necessary
1952        out_of_sample = max(iloc + nsimulations - self.nobs, 0)
1953        if extend_model is None:
1954            extend_model = self.exog is not None or not self.ssm.time_invariant
1955        if out_of_sample and extend_model:
1956            kwargs = self._get_extension_time_varying_matrices(
1957                params, exog, out_of_sample, extend_kwargs,
1958                transformed=transformed, includes_fixed=includes_fixed,
1959                **kwargs)
1960
1961        # Standardize the dimensions of the initial state
1962        if initial_state is not None:
1963            initial_state = np.array(initial_state)
1964            if initial_state.ndim < 2:
1965                initial_state = np.atleast_2d(initial_state).T
1966
1967        # Construct a model that represents the simulation period
1968        end = min(self.nobs, iloc + nsimulations)
1969        nextend = iloc + nsimulations - end
1970        sim_model = self.ssm.extend(np.empty((nextend, self.k_endog)),
1971                                    start=iloc, end=end, **kwargs)
1972
1973        # Simulate the data
1974        _repetitions = 1 if repetitions is None else repetitions
1975        sim = np.zeros((nsimulations, self.k_endog, _repetitions))
1976
1977        for i in range(_repetitions):
1978            initial_state_variates = None
1979            if initial_state is not None:
1980                if initial_state.shape[1] == 1:
1981                    initial_state_variates = initial_state[:, 0]
1982                else:
1983                    initial_state_variates = initial_state[:, i]
1984
1985            # TODO: allow specifying measurement / state shocks for each
1986            # repetition?
1987
1988            out, _ = sim_model.simulate(
1989                nsimulations, measurement_shocks, state_shocks,
1990                initial_state_variates)
1991
1992            sim[:, :, i] = out
1993
1994        # Wrap data / squeeze where appropriate
1995        use_pandas = isinstance(self.data, PandasData)
1996        index = None
1997        if use_pandas:
1998            _, _, _, index = self._get_prediction_index(
1999                iloc, iloc + nsimulations - 1)
2000        # If `repetitions` isn't set, we squeeze the last dimension(s)
2001        if repetitions is None:
2002            if self.k_endog == 1:
2003                sim = sim[:, 0, 0]
2004                if use_pandas:
2005                    sim = pd.Series(sim, index=index, name=self.endog_names)
2006            else:
2007                sim = sim[:, :, 0]
2008                if use_pandas:
2009                    sim = pd.DataFrame(sim, index=index,
2010                                       columns=self.endog_names)
2011        elif use_pandas:
2012            shape = sim.shape
2013            endog_names = self.endog_names
2014            if not isinstance(endog_names, list):
2015                endog_names = [endog_names]
2016            columns = pd.MultiIndex.from_product([endog_names,
2017                                                  np.arange(shape[2])])
2018            sim = pd.DataFrame(sim.reshape(shape[0], shape[1] * shape[2]),
2019                               index=index, columns=columns)
2020
2021        return sim
2022
2023    def impulse_responses(self, params, steps=1, impulse=0,
2024                          orthogonalized=False, cumulative=False, anchor=None,
2025                          exog=None, extend_model=None, extend_kwargs=None,
2026                          transformed=True, includes_fixed=False, **kwargs):
2027        """
2028        Impulse response function
2029
2030        Parameters
2031        ----------
2032        params : array_like
2033            Array of model parameters.
2034        steps : int, optional
2035            The number of steps for which impulse responses are calculated.
2036            Default is 1. Note that for time-invariant models, the initial
2037            impulse is not counted as a step, so if `steps=1`, the output will
2038            have 2 entries.
2039        impulse : int, str or array_like
2040            If an integer, the state innovation to pulse; must be between 0
2041            and `k_posdef-1`. If a str, it indicates which column of df
2042            the unit (1) impulse is given.
2043            Alternatively, a custom impulse vector may be provided; must be
2044            shaped `k_posdef x 1`.
2045        orthogonalized : bool, optional
2046            Whether or not to perform impulse using orthogonalized innovations.
2047            Note that this will also affect custum `impulse` vectors. Default
2048            is False.
2049        cumulative : bool, optional
2050            Whether or not to return cumulative impulse responses. Default is
2051            False.
2052        anchor : int, str, or datetime, optional
2053            Time point within the sample for the state innovation impulse. Type
2054            depends on the index of the given `endog` in the model. Two special
2055            cases are the strings 'start' and 'end', which refer to setting the
2056            impulse at the first and last points of the sample, respectively.
2057            Integer values can run from 0 to `nobs - 1`, or can be negative to
2058            apply negative indexing. Finally, if a date/time index was provided
2059            to the model, then this argument can be a date string to parse or a
2060            datetime type. Default is 'start'.
2061        exog : array_like, optional
2062            New observations of exogenous regressors for our-of-sample periods,
2063            if applicable.
2064        transformed : bool, optional
2065            Whether or not `params` is already transformed. Default is
2066            True.
2067        includes_fixed : bool, optional
2068            If parameters were previously fixed with the `fix_params` method,
2069            this argument describes whether or not `params` also includes
2070            the fixed parameters, in addition to the free parameters. Default
2071            is False.
2072        **kwargs
2073            If the model has time-varying design or transition matrices and the
2074            combination of `anchor` and `steps` implies creating impulse
2075            responses for the out-of-sample period, then these matrices must
2076            have updated values provided for the out-of-sample steps. For
2077            example, if `design` is a time-varying component, `nobs` is 10,
2078            `anchor=1`, and `steps` is 15, a (`k_endog` x `k_states` x 7)
2079            matrix must be provided with the new design matrix values.
2080
2081        Returns
2082        -------
2083        impulse_responses : ndarray
2084            Responses for each endogenous variable due to the impulse
2085            given by the `impulse` argument. For a time-invariant model, the
2086            impulse responses are given for `steps + 1` elements (this gives
2087            the "initial impulse" followed by `steps` responses for the
2088            important cases of VAR and SARIMAX models), while for time-varying
2089            models the impulse responses are only given for `steps` elements
2090            (to avoid having to unexpectedly provide updated time-varying
2091            matrices).
2092
2093        Notes
2094        -----
2095        Intercepts in the measurement and state equation are ignored when
2096        calculating impulse responses.
2097
2098        TODO: add an option to allow changing the ordering for the
2099              orthogonalized option. Will require permuting matrices when
2100              constructing the extended model.
2101        """
2102        # Make sure the model class has the current parameters
2103        self.update(params, transformed=transformed,
2104                    includes_fixed=includes_fixed)
2105
2106        # For time-invariant models, add an additional `step`. This is the
2107        # default for time-invariant models based on the expected behavior for
2108        # ARIMA and VAR models: we want to record the initial impulse and also
2109        # `steps` values of the responses afterwards.
2110        # Note: we don't modify `steps` itself, because
2111        # `KalmanFilter.impulse_responses` also adds an additional step in this
2112        # case (this is so that there isn't different behavior when calling
2113        # this method versus that method). We just need to also keep track of
2114        # this here because we need to generate the correct extended model.
2115        additional_steps = 0
2116        if (self.ssm._design.shape[2] == 1 and
2117                self.ssm._transition.shape[2] == 1 and
2118                self.ssm._selection.shape[2] == 1):
2119            additional_steps = 1
2120
2121        # Get the starting location
2122        if anchor is None or anchor == 'start':
2123            iloc = 0
2124        elif anchor == 'end':
2125            iloc = self.nobs - 1
2126        else:
2127            iloc, _, _ = self._get_index_loc(anchor)
2128            if isinstance(iloc, slice):
2129                iloc = iloc.start
2130
2131        if iloc < 0:
2132            iloc = self.nobs + iloc
2133        if iloc >= self.nobs:
2134            raise ValueError('Cannot anchor impulse responses outside of the'
2135                             ' sample.')
2136
2137        time_invariant = (
2138            self.ssm._design.shape[2] == self.ssm._obs_cov.shape[2] ==
2139            self.ssm._transition.shape[2] == self.ssm._selection.shape[2] ==
2140            self.ssm._state_cov.shape[2] == 1)
2141
2142        # Get updated time-varying system matrices in **kwargs, if necessary
2143        # (Note: KalmanFilter adds 1 to steps to account for the first impulse)
2144        out_of_sample = max(
2145            iloc + (steps + additional_steps + 1) - self.nobs, 0)
2146        if extend_model is None:
2147            extend_model = self.exog is not None and not time_invariant
2148        if out_of_sample and extend_model:
2149            kwargs = self._get_extension_time_varying_matrices(
2150                params, exog, out_of_sample, extend_kwargs,
2151                transformed=transformed, includes_fixed=includes_fixed,
2152                **kwargs)
2153
2154        # Special handling for matrix terms that are time-varying but
2155        # irrelevant for impulse response functions. Must be set since
2156        # ssm.extend() requires that we pass new matrices for these, but they
2157        # are ignored for IRF purposes.
2158        end = min(self.nobs, iloc + steps + additional_steps)
2159        nextend = iloc + (steps + additional_steps + 1) - end
2160        if ('obs_intercept' not in kwargs and
2161                self.ssm._obs_intercept.shape[1] > 1):
2162            kwargs['obs_intercept'] = np.zeros((self.k_endog, nextend))
2163        if ('state_intercept' not in kwargs and
2164                self.ssm._state_intercept.shape[1] > 1):
2165            kwargs['state_intercept'] = np.zeros((self.k_states, nextend))
2166        if 'obs_cov' not in kwargs and self.ssm._obs_cov.shape[2] > 1:
2167            kwargs['obs_cov'] = np.zeros((self.k_endog, self.k_endog, nextend))
2168        # Special handling for matrix terms that are time-varying but
2169        # only the value at the anchor matters for IRF purposes.
2170        if 'state_cov' not in kwargs and self.ssm._state_cov.shape[2] > 1:
2171            tmp = np.zeros((self.ssm.k_posdef, self.ssm.k_posdef, nextend))
2172            tmp[:] = self['state_cov', :, :, iloc:iloc + 1]
2173            kwargs['state_cov'] = tmp
2174        if 'selection' not in kwargs and self.ssm._selection.shape[2] > 1:
2175            tmp = np.zeros((self.k_states, self.ssm.k_posdef, nextend))
2176            tmp[:] = self['selection', :, :, iloc:iloc + 1]
2177            kwargs['selection'] = tmp
2178
2179        # Construct a model that represents the simulation period
2180        sim_model = self.ssm.extend(np.empty((nextend, self.k_endog)),
2181                                    start=iloc, end=end, **kwargs)
2182
2183        # Compute the impulse responses
2184
2185        # Convert endog name to index
2186        use_pandas = isinstance(self.data, PandasData)
2187        if type(impulse) == str:
2188            if not use_pandas:
2189                raise ValueError('Endog must be pd.DataFrame.')
2190            impulse = self.endog_names.index(impulse)
2191
2192        irfs = sim_model.impulse_responses(
2193            steps, impulse, orthogonalized, cumulative)
2194
2195        # IRF is (nobs x k_endog); do not want to squeeze in case of steps = 1
2196        if irfs.shape[1] == 1:
2197            irfs = irfs[:, 0]
2198
2199        # Wrap data / squeeze where appropriate
2200        if use_pandas:
2201            if self.k_endog == 1:
2202                irfs = pd.Series(irfs, name=self.endog_names)
2203            else:
2204                irfs = pd.DataFrame(irfs, columns=self.endog_names)
2205        return irfs
2206
2207    @classmethod
2208    def from_formula(cls, formula, data, subset=None):
2209        """
2210        Not implemented for state space models
2211        """
2212        raise NotImplementedError
2213
2214
2215class MLEResults(tsbase.TimeSeriesModelResults):
2216    r"""
2217    Class to hold results from fitting a state space model.
2218
2219    Parameters
2220    ----------
2221    model : MLEModel instance
2222        The fitted model instance
2223    params : ndarray
2224        Fitted parameters
2225    filter_results : KalmanFilter instance
2226        The underlying state space model and Kalman filter output
2227
2228    Attributes
2229    ----------
2230    model : Model instance
2231        A reference to the model that was fit.
2232    filter_results : KalmanFilter instance
2233        The underlying state space model and Kalman filter output
2234    nobs : float
2235        The number of observations used to fit the model.
2236    params : ndarray
2237        The parameters of the model.
2238    scale : float
2239        This is currently set to 1.0 unless the model uses concentrated
2240        filtering.
2241
2242    See Also
2243    --------
2244    MLEModel
2245    statsmodels.tsa.statespace.kalman_filter.FilterResults
2246    statsmodels.tsa.statespace.representation.FrozenRepresentation
2247    """
2248    def __init__(self, model, params, results, cov_type=None, cov_kwds=None,
2249                 **kwargs):
2250        self.data = model.data
2251        scale = results.scale
2252
2253        tsbase.TimeSeriesModelResults.__init__(self, model, params,
2254                                               normalized_cov_params=None,
2255                                               scale=scale)
2256
2257        # Save the fixed parameters
2258        self._has_fixed_params = self.model._has_fixed_params
2259        self._fixed_params_index = self.model._fixed_params_index
2260        self._free_params_index = self.model._free_params_index
2261        # TODO: seems like maybe self.fixed_params should be the dictionary
2262        # itself, not just the keys?
2263        if self._has_fixed_params:
2264            self._fixed_params = self.model._fixed_params.copy()
2265            self.fixed_params = list(self._fixed_params.keys())
2266        else:
2267            self._fixed_params = None
2268            self.fixed_params = []
2269        self.param_names = [
2270            '%s (fixed)' % name if name in self.fixed_params else name
2271            for name in (self.data.param_names or [])]
2272
2273        # Save the state space representation output
2274        self.filter_results = results
2275        if isinstance(results, SmootherResults):
2276            self.smoother_results = results
2277        else:
2278            self.smoother_results = None
2279
2280        # Dimensions
2281        self.nobs = self.filter_results.nobs
2282        self.nobs_diffuse = self.filter_results.nobs_diffuse
2283        if self.nobs_diffuse > 0 and self.loglikelihood_burn > 0:
2284            warnings.warn('Care should be used when applying a loglikelihood'
2285                          ' burn to a model with exact diffuse initialization.'
2286                          ' Some results objects, e.g. degrees of freedom,'
2287                          ' expect only one of the two to be set.')
2288        # This only excludes explicitly burned (usually approximate diffuse)
2289        # periods but does not exclude exact diffuse periods. This is
2290        # because the loglikelihood remains valid for the initial periods in
2291        # the exact diffuse case (see DK, 2012, section 7.2) and so also do
2292        # e.g. information criteria (see DK, 2012, section 7.4) and the score
2293        # vector (see DK, 2012, section 7.3.3, equation 7.15).
2294        # However, other objects should be excluded in the diffuse periods
2295        # (e.g. the diffuse forecast errors, so in some cases a different
2296        # nobs_effective will have to be computed and used)
2297        self.nobs_effective = self.nobs - self.loglikelihood_burn
2298
2299        P = self.filter_results.initial_diffuse_state_cov
2300        self.k_diffuse_states = 0 if P is None else np.sum(np.diagonal(P) == 1)
2301
2302        # Degrees of freedom (see DK 2012, section 7.4)
2303        k_free_params = self.params.size - len(self.fixed_params)
2304        self.df_model = (k_free_params + self.k_diffuse_states
2305                         + self.filter_results.filter_concentrated)
2306        self.df_resid = self.nobs_effective - self.df_model
2307
2308        # Setup covariance matrix notes dictionary
2309        if not hasattr(self, 'cov_kwds'):
2310            self.cov_kwds = {}
2311        if cov_type is None:
2312            cov_type = 'approx' if results.memory_no_likelihood else 'opg'
2313        self.cov_type = cov_type
2314
2315        # Setup the cache
2316        self._cache = {}
2317
2318        # Handle covariance matrix calculation
2319        if cov_kwds is None:
2320            cov_kwds = {}
2321        self._cov_approx_complex_step = (
2322            cov_kwds.pop('approx_complex_step', True))
2323        self._cov_approx_centered = cov_kwds.pop('approx_centered', False)
2324        try:
2325            self._rank = None
2326            self._get_robustcov_results(cov_type=cov_type, use_self=True,
2327                                        **cov_kwds)
2328        except np.linalg.LinAlgError:
2329            self._rank = 0
2330            k_params = len(self.params)
2331            self.cov_params_default = np.zeros((k_params, k_params)) * np.nan
2332            self.cov_kwds['cov_type'] = (
2333                'Covariance matrix could not be calculated: singular.'
2334                ' information matrix.')
2335        self.model.update(self.params, transformed=True, includes_fixed=True)
2336
2337        # References of filter and smoother output
2338        extra_arrays = [
2339            'filtered_state', 'filtered_state_cov', 'predicted_state',
2340            'predicted_state_cov', 'forecasts', 'forecasts_error',
2341            'forecasts_error_cov', 'standardized_forecasts_error',
2342            'forecasts_error_diffuse_cov', 'predicted_diffuse_state_cov',
2343            'scaled_smoothed_estimator',
2344            'scaled_smoothed_estimator_cov', 'smoothing_error',
2345            'smoothed_state',
2346            'smoothed_state_cov', 'smoothed_state_autocov',
2347            'smoothed_measurement_disturbance',
2348            'smoothed_state_disturbance',
2349            'smoothed_measurement_disturbance_cov',
2350            'smoothed_state_disturbance_cov']
2351        for name in extra_arrays:
2352            setattr(self, name, getattr(self.filter_results, name, None))
2353
2354        # Remove too-short results when memory conservation was used
2355        if self.filter_results.memory_no_forecast_mean:
2356            self.forecasts = None
2357            self.forecasts_error = None
2358        if self.filter_results.memory_no_forecast_cov:
2359            self.forecasts_error_cov = None
2360        if self.filter_results.memory_no_predicted_mean:
2361            self.predicted_state = None
2362        if self.filter_results.memory_no_predicted_cov:
2363            self.predicted_state_cov = None
2364        if self.filter_results.memory_no_filtered_mean:
2365            self.filtered_state = None
2366        if self.filter_results.memory_no_filtered_cov:
2367            self.filtered_state_cov = None
2368        if self.filter_results.memory_no_gain:
2369            pass
2370        if self.filter_results.memory_no_smoothing:
2371            pass
2372        if self.filter_results.memory_no_std_forecast:
2373            self.standardized_forecasts_error = None
2374
2375        # Save more convenient access to states
2376        # (will create a private attribute _states here and provide actual
2377        # access via a getter, so that we can e.g. issue a warning in the case
2378        # that a useless Pandas index was given in the model specification)
2379        self._states = SimpleNamespace()
2380
2381        use_pandas = isinstance(self.data, PandasData)
2382        index = self.model._index
2383        columns = self.model.state_names
2384
2385        # Predicted states
2386        # Note: a complication here is that we also include the initial values
2387        # here, so that we need an extended index in the Pandas case
2388        if (self.predicted_state is None or
2389                self.filter_results.memory_no_predicted_mean):
2390            self._states.predicted = None
2391        elif use_pandas:
2392            extended_index = self.model._get_index_with_final_state()
2393            self._states.predicted = pd.DataFrame(
2394                self.predicted_state.T, index=extended_index, columns=columns)
2395        else:
2396            self._states.predicted = self.predicted_state.T
2397        if (self.predicted_state_cov is None or
2398                self.filter_results.memory_no_predicted_cov):
2399            self._states.predicted_cov = None
2400        elif use_pandas:
2401            extended_index = self.model._get_index_with_final_state()
2402            tmp = np.transpose(self.predicted_state_cov, (2, 0, 1))
2403            self._states.predicted_cov = pd.DataFrame(
2404                np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])),
2405                index=pd.MultiIndex.from_product(
2406                    [extended_index, columns]).swaplevel(),
2407                columns=columns)
2408        else:
2409            self._states.predicted_cov = np.transpose(
2410                self.predicted_state_cov, (2, 0, 1))
2411
2412        # Filtered states
2413        if (self.filtered_state is None or
2414                self.filter_results.memory_no_filtered_mean):
2415            self._states.filtered = None
2416        elif use_pandas:
2417            self._states.filtered = pd.DataFrame(
2418                self.filtered_state.T, index=index, columns=columns)
2419        else:
2420            self._states.filtered = self.filtered_state.T
2421        if (self.filtered_state_cov is None or
2422                self.filter_results.memory_no_filtered_cov):
2423            self._states.filtered_cov = None
2424        elif use_pandas:
2425            tmp = np.transpose(self.filtered_state_cov, (2, 0, 1))
2426            self._states.filtered_cov = pd.DataFrame(
2427                np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])),
2428                index=pd.MultiIndex.from_product([index, columns]).swaplevel(),
2429                columns=columns)
2430        else:
2431            self._states.filtered_cov = np.transpose(
2432                self.filtered_state_cov, (2, 0, 1))
2433
2434        # Smoothed states
2435        if self.smoothed_state is None:
2436            self._states.smoothed = None
2437        elif use_pandas:
2438            self._states.smoothed = pd.DataFrame(
2439                self.smoothed_state.T, index=index, columns=columns)
2440        else:
2441            self._states.smoothed = self.smoothed_state.T
2442        if self.smoothed_state_cov is None:
2443            self._states.smoothed_cov = None
2444        elif use_pandas:
2445            tmp = np.transpose(self.smoothed_state_cov, (2, 0, 1))
2446            self._states.smoothed_cov = pd.DataFrame(
2447                np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])),
2448                index=pd.MultiIndex.from_product([index, columns]).swaplevel(),
2449                columns=columns)
2450        else:
2451            self._states.smoothed_cov = np.transpose(
2452                self.smoothed_state_cov, (2, 0, 1))
2453
2454        # Handle removing data
2455        self._data_attr_model = getattr(self, '_data_attr_model', [])
2456        self._data_attr_model.extend(['ssm'])
2457        self._data_attr.extend(extra_arrays)
2458        self._data_attr.extend(['filter_results', 'smoother_results'])
2459
2460    def _get_robustcov_results(self, cov_type='opg', **kwargs):
2461        """
2462        Create new results instance with specified covariance estimator as
2463        default
2464
2465        Note: creating new results instance currently not supported.
2466
2467        Parameters
2468        ----------
2469        cov_type : str
2470            the type of covariance matrix estimator to use. See Notes below
2471        kwargs : depends on cov_type
2472            Required or optional arguments for covariance calculation.
2473            See Notes below.
2474
2475        Returns
2476        -------
2477        results : results instance
2478            This method creates a new results instance with the requested
2479            covariance as the default covariance of the parameters.
2480            Inferential statistics like p-values and hypothesis tests will be
2481            based on this covariance matrix.
2482
2483        Notes
2484        -----
2485        The following covariance types and required or optional arguments are
2486        currently available:
2487
2488        - 'opg' for the outer product of gradient estimator
2489        - 'oim' for the observed information matrix estimator, calculated
2490          using the method of Harvey (1989)
2491        - 'approx' for the observed information matrix estimator,
2492          calculated using a numerical approximation of the Hessian matrix.
2493          Uses complex step approximation by default, or uses finite
2494          differences if `approx_complex_step=False` in the `cov_kwds`
2495          dictionary.
2496        - 'robust' for an approximate (quasi-maximum likelihood) covariance
2497          matrix that may be valid even in the presence of some
2498          misspecifications. Intermediate calculations use the 'oim'
2499          method.
2500        - 'robust_approx' is the same as 'robust' except that the
2501          intermediate calculations use the 'approx' method.
2502        - 'none' for no covariance matrix calculation.
2503        """
2504        from statsmodels.base.covtype import descriptions
2505
2506        use_self = kwargs.pop('use_self', False)
2507        if use_self:
2508            res = self
2509        else:
2510            raise NotImplementedError
2511            res = self.__class__(
2512                self.model, self.params,
2513                normalized_cov_params=self.normalized_cov_params,
2514                scale=self.scale)
2515
2516        # Set the new covariance type
2517        res.cov_type = cov_type
2518        res.cov_kwds = {}
2519
2520        # Calculate the new covariance matrix
2521        approx_complex_step = self._cov_approx_complex_step
2522        if approx_complex_step:
2523            approx_type_str = 'complex-step'
2524        elif self._cov_approx_centered:
2525            approx_type_str = 'centered finite differences'
2526        else:
2527            approx_type_str = 'finite differences'
2528
2529        k_params = len(self.params)
2530        if k_params == 0:
2531            res.cov_params_default = np.zeros((0, 0))
2532            res._rank = 0
2533            res.cov_kwds['description'] = 'No parameters estimated.'
2534        elif cov_type == 'custom':
2535            res.cov_type = kwargs['custom_cov_type']
2536            res.cov_params_default = kwargs['custom_cov_params']
2537            res.cov_kwds['description'] = kwargs['custom_description']
2538            if len(self.fixed_params) > 0:
2539                mask = np.ix_(self._free_params_index, self._free_params_index)
2540            else:
2541                mask = np.s_[...]
2542            res._rank = np.linalg.matrix_rank(res.cov_params_default[mask])
2543        elif cov_type == 'none':
2544            res.cov_params_default = np.zeros((k_params, k_params)) * np.nan
2545            res._rank = np.nan
2546            res.cov_kwds['description'] = descriptions['none']
2547        elif self.cov_type == 'approx':
2548            res.cov_params_default = res.cov_params_approx
2549            res.cov_kwds['description'] = descriptions['approx'].format(
2550                                                approx_type=approx_type_str)
2551        elif self.cov_type == 'oim':
2552            res.cov_params_default = res.cov_params_oim
2553            res.cov_kwds['description'] = descriptions['OIM'].format(
2554                                                approx_type=approx_type_str)
2555        elif self.cov_type == 'opg':
2556            res.cov_params_default = res.cov_params_opg
2557            res.cov_kwds['description'] = descriptions['OPG'].format(
2558                                                approx_type=approx_type_str)
2559        elif self.cov_type == 'robust' or self.cov_type == 'robust_oim':
2560            res.cov_params_default = res.cov_params_robust_oim
2561            res.cov_kwds['description'] = descriptions['robust-OIM'].format(
2562                                                approx_type=approx_type_str)
2563        elif self.cov_type == 'robust_approx':
2564            res.cov_params_default = res.cov_params_robust_approx
2565            res.cov_kwds['description'] = descriptions['robust-approx'].format(
2566                                                approx_type=approx_type_str)
2567        else:
2568            raise NotImplementedError('Invalid covariance matrix type.')
2569
2570        return res
2571
2572    @cache_readonly
2573    def aic(self):
2574        """
2575        (float) Akaike Information Criterion
2576        """
2577        return aic(self.llf, self.nobs_effective, self.df_model)
2578
2579    @cache_readonly
2580    def aicc(self):
2581        """
2582        (float) Akaike Information Criterion with small sample correction
2583        """
2584        return aicc(self.llf, self.nobs_effective, self.df_model)
2585
2586    @cache_readonly
2587    def bic(self):
2588        """
2589        (float) Bayes Information Criterion
2590        """
2591        return bic(self.llf, self.nobs_effective, self.df_model)
2592
2593    def _cov_params_approx(self, approx_complex_step=True,
2594                           approx_centered=False):
2595        evaluated_hessian = self.nobs_effective * self.model.hessian(
2596            params=self.params, transformed=True, includes_fixed=True,
2597            method='approx', approx_complex_step=approx_complex_step,
2598            approx_centered=approx_centered)
2599        # TODO: Case with "not approx_complex_step" is not hit in
2600        # tests as of 2017-05-19
2601
2602        if len(self.fixed_params) > 0:
2603            mask = np.ix_(self._free_params_index, self._free_params_index)
2604            (tmp, singular_values) = pinv_extended(evaluated_hessian[mask])
2605            neg_cov = np.zeros_like(evaluated_hessian) * np.nan
2606            neg_cov[mask] = tmp
2607        else:
2608            (neg_cov, singular_values) = pinv_extended(evaluated_hessian)
2609
2610        self.model.update(self.params, transformed=True, includes_fixed=True)
2611        if self._rank is None:
2612            self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2613        return -neg_cov
2614
2615    @cache_readonly
2616    def cov_params_approx(self):
2617        """
2618        (array) The variance / covariance matrix. Computed using the numerical
2619        Hessian approximated by complex step or finite differences methods.
2620        """
2621        return self._cov_params_approx(self._cov_approx_complex_step,
2622                                       self._cov_approx_centered)
2623
2624    def _cov_params_oim(self, approx_complex_step=True, approx_centered=False):
2625        evaluated_hessian = self.nobs_effective * self.model.hessian(
2626            self.params, hessian_method='oim', transformed=True,
2627            includes_fixed=True, approx_complex_step=approx_complex_step,
2628            approx_centered=approx_centered)
2629
2630        if len(self.fixed_params) > 0:
2631            mask = np.ix_(self._free_params_index, self._free_params_index)
2632            (tmp, singular_values) = pinv_extended(evaluated_hessian[mask])
2633            neg_cov = np.zeros_like(evaluated_hessian) * np.nan
2634            neg_cov[mask] = tmp
2635        else:
2636            (neg_cov, singular_values) = pinv_extended(evaluated_hessian)
2637
2638        self.model.update(self.params, transformed=True, includes_fixed=True)
2639        if self._rank is None:
2640            self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2641        return -neg_cov
2642
2643    @cache_readonly
2644    def cov_params_oim(self):
2645        """
2646        (array) The variance / covariance matrix. Computed using the method
2647        from Harvey (1989).
2648        """
2649        return self._cov_params_oim(self._cov_approx_complex_step,
2650                                    self._cov_approx_centered)
2651
2652    def _cov_params_opg(self, approx_complex_step=True, approx_centered=False):
2653        evaluated_hessian = self.nobs_effective * self.model._hessian_opg(
2654            self.params, transformed=True, includes_fixed=True,
2655            approx_complex_step=approx_complex_step,
2656            approx_centered=approx_centered)
2657
2658        no_free_params = (self._free_params_index is not None and
2659                          len(self._free_params_index) == 0)
2660
2661        if no_free_params:
2662            neg_cov = np.zeros_like(evaluated_hessian) * np.nan
2663            singular_values = np.empty(0)
2664        elif len(self.fixed_params) > 0:
2665            mask = np.ix_(self._free_params_index, self._free_params_index)
2666            (tmp, singular_values) = pinv_extended(evaluated_hessian[mask])
2667            neg_cov = np.zeros_like(evaluated_hessian) * np.nan
2668            neg_cov[mask] = tmp
2669        else:
2670            (neg_cov, singular_values) = pinv_extended(evaluated_hessian)
2671
2672        self.model.update(self.params, transformed=True, includes_fixed=True)
2673        if self._rank is None:
2674            if no_free_params:
2675                self._rank = 0
2676            else:
2677                self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2678        return -neg_cov
2679
2680    @cache_readonly
2681    def cov_params_opg(self):
2682        """
2683        (array) The variance / covariance matrix. Computed using the outer
2684        product of gradients method.
2685        """
2686        return self._cov_params_opg(self._cov_approx_complex_step,
2687                                    self._cov_approx_centered)
2688
2689    @cache_readonly
2690    def cov_params_robust(self):
2691        """
2692        (array) The QMLE variance / covariance matrix. Alias for
2693        `cov_params_robust_oim`
2694        """
2695        return self.cov_params_robust_oim
2696
2697    def _cov_params_robust_oim(self, approx_complex_step=True,
2698                               approx_centered=False):
2699        cov_opg = self._cov_params_opg(approx_complex_step=approx_complex_step,
2700                                       approx_centered=approx_centered)
2701
2702        evaluated_hessian = self.nobs_effective * self.model.hessian(
2703            self.params, hessian_method='oim', transformed=True,
2704            includes_fixed=True, approx_complex_step=approx_complex_step,
2705            approx_centered=approx_centered)
2706
2707        if len(self.fixed_params) > 0:
2708            mask = np.ix_(self._free_params_index, self._free_params_index)
2709            cov_params = np.zeros_like(evaluated_hessian) * np.nan
2710
2711            cov_opg = cov_opg[mask]
2712            evaluated_hessian = evaluated_hessian[mask]
2713
2714            tmp, singular_values = pinv_extended(
2715                np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian))
2716
2717            cov_params[mask] = tmp
2718        else:
2719            (cov_params, singular_values) = pinv_extended(
2720                np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian))
2721
2722        self.model.update(self.params, transformed=True, includes_fixed=True)
2723        if self._rank is None:
2724            self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2725        return cov_params
2726
2727    @cache_readonly
2728    def cov_params_robust_oim(self):
2729        """
2730        (array) The QMLE variance / covariance matrix. Computed using the
2731        method from Harvey (1989) as the evaluated hessian.
2732        """
2733        return self._cov_params_robust_oim(self._cov_approx_complex_step,
2734                                           self._cov_approx_centered)
2735
2736    def _cov_params_robust_approx(self, approx_complex_step=True,
2737                                  approx_centered=False):
2738        cov_opg = self._cov_params_opg(approx_complex_step=approx_complex_step,
2739                                       approx_centered=approx_centered)
2740
2741        evaluated_hessian = self.nobs_effective * self.model.hessian(
2742            self.params, transformed=True, includes_fixed=True,
2743            method='approx', approx_complex_step=approx_complex_step)
2744        # TODO: Case with "not approx_complex_step" is not
2745        # hit in tests as of 2017-05-19
2746
2747        if len(self.fixed_params) > 0:
2748            mask = np.ix_(self._free_params_index, self._free_params_index)
2749            cov_params = np.zeros_like(evaluated_hessian) * np.nan
2750
2751            cov_opg = cov_opg[mask]
2752            evaluated_hessian = evaluated_hessian[mask]
2753
2754            tmp, singular_values = pinv_extended(
2755                np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian))
2756
2757            cov_params[mask] = tmp
2758        else:
2759            (cov_params, singular_values) = pinv_extended(
2760                np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian))
2761
2762        self.model.update(self.params, transformed=True, includes_fixed=True)
2763        if self._rank is None:
2764            self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2765        return cov_params
2766
2767    @cache_readonly
2768    def cov_params_robust_approx(self):
2769        """
2770        (array) The QMLE variance / covariance matrix. Computed using the
2771        numerical Hessian as the evaluated hessian.
2772        """
2773        return self._cov_params_robust_approx(self._cov_approx_complex_step,
2774                                              self._cov_approx_centered)
2775
2776    def info_criteria(self, criteria, method='standard'):
2777        r"""
2778        Information criteria
2779
2780        Parameters
2781        ----------
2782        criteria : {'aic', 'bic', 'hqic'}
2783            The information criteria to compute.
2784        method : {'standard', 'lutkepohl'}
2785            The method for information criteria computation. Default is
2786            'standard' method; 'lutkepohl' computes the information criteria
2787            as in Lütkepohl (2007). See Notes for formulas.
2788
2789        Notes
2790        -----
2791        The `'standard'` formulas are:
2792
2793        .. math::
2794
2795            AIC & = -2 \log L(Y_n | \hat \psi) + 2 k \\
2796            BIC & = -2 \log L(Y_n | \hat \psi) + k \log n \\
2797            HQIC & = -2 \log L(Y_n | \hat \psi) + 2 k \log \log n \\
2798
2799        where :math:`\hat \psi` are the maximum likelihood estimates of the
2800        parameters, :math:`n` is the number of observations, and `k` is the
2801        number of estimated parameters.
2802
2803        Note that the `'standard'` formulas are returned from the `aic`, `bic`,
2804        and `hqic` results attributes.
2805
2806        The `'lutkepohl'` formulas are (Lütkepohl, 2010):
2807
2808        .. math::
2809
2810            AIC_L & = \log | Q | + \frac{2 k}{n} \\
2811            BIC_L & = \log | Q | + \frac{k \log n}{n} \\
2812            HQIC_L & = \log | Q | + \frac{2 k \log \log n}{n} \\
2813
2814        where :math:`Q` is the state covariance matrix. Note that the Lütkepohl
2815        definitions do not apply to all state space models, and should be used
2816        with care outside of SARIMAX and VARMAX models.
2817
2818        References
2819        ----------
2820        .. [*] Lütkepohl, Helmut. 2007. *New Introduction to Multiple Time*
2821           *Series Analysis.* Berlin: Springer.
2822        """
2823        criteria = criteria.lower()
2824        method = method.lower()
2825
2826        if method == 'standard':
2827            out = getattr(self, criteria)
2828        elif method == 'lutkepohl':
2829            if self.filter_results.state_cov.shape[-1] > 1:
2830                raise ValueError('Cannot compute Lütkepohl statistics for'
2831                                 ' models with time-varying state covariance'
2832                                 ' matrix.')
2833
2834            cov = self.filter_results.state_cov[:, :, 0]
2835            if criteria == 'aic':
2836                out = np.squeeze(np.linalg.slogdet(cov)[1] +
2837                                 2 * self.df_model / self.nobs_effective)
2838            elif criteria == 'bic':
2839                out = np.squeeze(np.linalg.slogdet(cov)[1] +
2840                                 self.df_model * np.log(self.nobs_effective) /
2841                                 self.nobs_effective)
2842            elif criteria == 'hqic':
2843                out = np.squeeze(np.linalg.slogdet(cov)[1] +
2844                                 2 * self.df_model *
2845                                 np.log(np.log(self.nobs_effective)) /
2846                                 self.nobs_effective)
2847            else:
2848                raise ValueError('Invalid information criteria')
2849
2850        else:
2851            raise ValueError('Invalid information criteria computation method')
2852
2853        return out
2854
2855    @cache_readonly
2856    def fittedvalues(self):
2857        """
2858        (array) The predicted values of the model. An (nobs x k_endog) array.
2859        """
2860        # This is a (k_endog x nobs array; do not want to squeeze in case of
2861        # the corner case where nobs = 1 (mostly a concern in the predict or
2862        # forecast functions, but here also to maintain consistency)
2863        fittedvalues = self.forecasts
2864        if fittedvalues is None:
2865            pass
2866        elif fittedvalues.shape[0] == 1:
2867            fittedvalues = fittedvalues[0, :]
2868        else:
2869            fittedvalues = fittedvalues.T
2870        return fittedvalues
2871
2872    @cache_readonly
2873    def hqic(self):
2874        """
2875        (float) Hannan-Quinn Information Criterion
2876        """
2877        # return (-2 * self.llf +
2878        #         2 * np.log(np.log(self.nobs_effective)) * self.df_model)
2879        return hqic(self.llf, self.nobs_effective, self.df_model)
2880
2881    @cache_readonly
2882    def llf_obs(self):
2883        """
2884        (float) The value of the log-likelihood function evaluated at `params`.
2885        """
2886        return self.filter_results.llf_obs
2887
2888    @cache_readonly
2889    def llf(self):
2890        """
2891        (float) The value of the log-likelihood function evaluated at `params`.
2892        """
2893        return self.filter_results.llf
2894
2895    @cache_readonly
2896    def loglikelihood_burn(self):
2897        """
2898        (float) The number of observations during which the likelihood is not
2899        evaluated.
2900        """
2901        return self.filter_results.loglikelihood_burn
2902
2903    @cache_readonly
2904    def mae(self):
2905        """
2906        (float) Mean absolute error
2907        """
2908        return np.mean(np.abs(self.resid))
2909
2910    @cache_readonly
2911    def mse(self):
2912        """
2913        (float) Mean squared error
2914        """
2915        return self.sse / self.nobs
2916
2917    @cache_readonly
2918    def pvalues(self):
2919        """
2920        (array) The p-values associated with the z-statistics of the
2921        coefficients. Note that the coefficients are assumed to have a Normal
2922        distribution.
2923        """
2924        pvalues = np.zeros_like(self.zvalues) * np.nan
2925        mask = np.ones_like(pvalues, dtype=bool)
2926        mask[self._free_params_index] = True
2927        mask &= ~np.isnan(self.zvalues)
2928        pvalues[mask] = norm.sf(np.abs(self.zvalues[mask])) * 2
2929        return pvalues
2930
2931    @cache_readonly
2932    def resid(self):
2933        """
2934        (array) The model residuals. An (nobs x k_endog) array.
2935        """
2936        # This is a (k_endog x nobs array; do not want to squeeze in case of
2937        # the corner case where nobs = 1 (mostly a concern in the predict or
2938        # forecast functions, but here also to maintain consistency)
2939        resid = self.forecasts_error
2940        if resid is None:
2941            pass
2942        elif resid.shape[0] == 1:
2943            resid = resid[0, :]
2944        else:
2945            resid = resid.T
2946        return resid
2947
2948    @property
2949    def states(self):
2950        if self.model._index_generated and not self.model._index_none:
2951            warnings.warn('No supported index is available. The `states`'
2952                          ' DataFrame uses a generated integer index',
2953                          ValueWarning)
2954        return self._states
2955
2956    @cache_readonly
2957    def sse(self):
2958        """
2959        (float) Sum of squared errors
2960        """
2961        return np.sum(self.resid**2)
2962
2963    @cache_readonly
2964    def zvalues(self):
2965        """
2966        (array) The z-statistics for the coefficients.
2967        """
2968        return self.params / self.bse
2969
2970    def test_normality(self, method):
2971        """
2972        Test for normality of standardized residuals.
2973
2974        Null hypothesis is normality.
2975
2976        Parameters
2977        ----------
2978        method : {'jarquebera', None}
2979            The statistical test for normality. Must be 'jarquebera' for
2980            Jarque-Bera normality test. If None, an attempt is made to select
2981            an appropriate test.
2982
2983        See Also
2984        --------
2985        statsmodels.stats.stattools.jarque_bera
2986            The Jarque-Bera test of normality.
2987
2988        Notes
2989        -----
2990        Let `d` = max(loglikelihood_burn, nobs_diffuse); this test is
2991        calculated ignoring the first `d` residuals.
2992
2993        In the case of missing data, the maintained hypothesis is that the
2994        data are missing completely at random. This test is then run on the
2995        standardized residuals excluding those corresponding to missing
2996        observations.
2997        """
2998        if method is None:
2999            method = 'jarquebera'
3000
3001        if self.standardized_forecasts_error is None:
3002            raise ValueError('Cannot compute test statistic when standardized'
3003                             ' forecast errors have not been computed.')
3004
3005        if method == 'jarquebera':
3006            from statsmodels.stats.stattools import jarque_bera
3007            d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse)
3008            output = []
3009            for i in range(self.model.k_endog):
3010                resid = self.filter_results.standardized_forecasts_error[i, d:]
3011                mask = ~np.isnan(resid)
3012                output.append(jarque_bera(resid[mask]))
3013        else:
3014            raise NotImplementedError('Invalid normality test method.')
3015
3016        return np.array(output)
3017
3018    def test_heteroskedasticity(self, method, alternative='two-sided',
3019                                use_f=True):
3020        r"""
3021        Test for heteroskedasticity of standardized residuals
3022
3023        Tests whether the sum-of-squares in the first third of the sample is
3024        significantly different than the sum-of-squares in the last third
3025        of the sample. Analogous to a Goldfeld-Quandt test. The null hypothesis
3026        is of no heteroskedasticity.
3027
3028        Parameters
3029        ----------
3030        method : {'breakvar', None}
3031            The statistical test for heteroskedasticity. Must be 'breakvar'
3032            for test of a break in the variance. If None, an attempt is
3033            made to select an appropriate test.
3034        alternative : str, 'increasing', 'decreasing' or 'two-sided'
3035            This specifies the alternative for the p-value calculation. Default
3036            is two-sided.
3037        use_f : bool, optional
3038            Whether or not to compare against the asymptotic distribution
3039            (chi-squared) or the approximate small-sample distribution (F).
3040            Default is True (i.e. default is to compare against an F
3041            distribution).
3042
3043        Returns
3044        -------
3045        output : ndarray
3046            An array with `(test_statistic, pvalue)` for each endogenous
3047            variable. The array is then sized `(k_endog, 2)`. If the method is
3048            called as `het = res.test_heteroskedasticity()`, then `het[0]` is
3049            an array of size 2 corresponding to the first endogenous variable,
3050            where `het[0][0]` is the test statistic, and `het[0][1]` is the
3051            p-value.
3052
3053        See Also
3054        --------
3055        statsmodels.tsa.stattools.breakvar_heteroskedasticity_test
3056
3057        Notes
3058        -----
3059        The null hypothesis is of no heteroskedasticity.
3060
3061        For :math:`h = [T/3]`, the test statistic is:
3062
3063        .. math::
3064
3065            H(h) = \sum_{t=T-h+1}^T  \tilde v_t^2
3066            \Bigg / \sum_{t=d+1}^{d+1+h} \tilde v_t^2
3067
3068        where :math:`d` = max(loglikelihood_burn, nobs_diffuse)` (usually
3069        corresponding to diffuse initialization under either the approximate
3070        or exact approach).
3071
3072        This statistic can be tested against an :math:`F(h,h)` distribution.
3073        Alternatively, :math:`h H(h)` is asymptotically distributed according
3074        to :math:`\chi_h^2`; this second test can be applied by passing
3075        `use_f=True` as an argument.
3076
3077        See section 5.4 of [1]_ for the above formula and discussion, as well
3078        as additional details.
3079
3080        TODO
3081
3082        - Allow specification of :math:`h`
3083
3084        References
3085        ----------
3086        .. [1] Harvey, Andrew C. 1990. *Forecasting, Structural Time Series*
3087               *Models and the Kalman Filter.* Cambridge University Press.
3088        """
3089        if method is None:
3090            method = 'breakvar'
3091
3092        if self.standardized_forecasts_error is None:
3093            raise ValueError('Cannot compute test statistic when standardized'
3094                             ' forecast errors have not been computed.')
3095
3096        if method == 'breakvar':
3097            from statsmodels.tsa.stattools import (
3098                breakvar_heteroskedasticity_test
3099                )
3100            # Store some values
3101            resid = self.filter_results.standardized_forecasts_error
3102            d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse)
3103            # This differs from self.nobs_effective because here we want to
3104            # exclude exact diffuse periods, whereas self.nobs_effective only
3105            # excludes explicitly burned (usually approximate diffuse) periods.
3106            nobs_effective = self.nobs - d
3107            h = int(np.round(nobs_effective / 3))
3108
3109            test_statistics = []
3110            p_values = []
3111            for i in range(self.model.k_endog):
3112                test_statistic, p_value = breakvar_heteroskedasticity_test(
3113                    resid[i, d:],
3114                    subset_length=h,
3115                    alternative=alternative,
3116                    use_f=use_f
3117                    )
3118                test_statistics.append(test_statistic)
3119                p_values.append(p_value)
3120
3121            output = np.c_[test_statistics, p_values]
3122        else:
3123            raise NotImplementedError('Invalid heteroskedasticity test'
3124                                      ' method.')
3125
3126        return output
3127
3128    def test_serial_correlation(self, method, df_adjust=False, lags=None):
3129        """
3130        Ljung-Box test for no serial correlation of standardized residuals
3131
3132        Null hypothesis is no serial correlation.
3133
3134        Parameters
3135        ----------
3136        method : {'ljungbox','boxpierece', None}
3137            The statistical test for serial correlation. If None, an attempt is
3138            made to select an appropriate test.
3139        lags : None, int or array_like
3140            If lags is an integer then this is taken to be the largest lag
3141            that is included, the test result is reported for all smaller lag
3142            length.
3143            If lags is a list or array, then all lags are included up to the
3144            largest lag in the list, however only the tests for the lags in the
3145            list are reported.
3146            If lags is None, then the default maxlag is 12*(nobs/100)^{1/4}.
3147            After 0.12 the default maxlag will change to min(10, nobs // 5) for
3148            non-seasonal models and min(2*m, nobs // 5) for seasonal time
3149            series where m is the seasonal period.
3150        df_adjust : bool, optional
3151            If True, the degrees of freedom consumed by the model is subtracted
3152            from the degrees-of-freedom used in the test so that the adjusted
3153            dof for the statistics are lags - model_df. In an ARMA model, this
3154            value is usually p+q where p is the AR order and q is the MA order.
3155            When using df_adjust, it is not possible to use tests based on
3156            fewer than model_df lags.
3157        Returns
3158        -------
3159        output : ndarray
3160            An array with `(test_statistic, pvalue)` for each endogenous
3161            variable and each lag. The array is then sized
3162            `(k_endog, 2, lags)`. If the method is called as
3163            `ljungbox = res.test_serial_correlation()`, then `ljungbox[i]`
3164            holds the results of the Ljung-Box test (as would be returned by
3165            `statsmodels.stats.diagnostic.acorr_ljungbox`) for the `i` th
3166            endogenous variable.
3167
3168        See Also
3169        --------
3170        statsmodels.stats.diagnostic.acorr_ljungbox
3171            Ljung-Box test for serial correlation.
3172
3173        Notes
3174        -----
3175        Let `d` = max(loglikelihood_burn, nobs_diffuse); this test is
3176        calculated ignoring the first `d` residuals.
3177
3178        Output is nan for any endogenous variable which has missing values.
3179        """
3180        if method is None:
3181            method = 'ljungbox'
3182
3183        if self.standardized_forecasts_error is None:
3184            raise ValueError('Cannot compute test statistic when standardized'
3185                             ' forecast errors have not been computed.')
3186
3187        if method == 'ljungbox' or method == 'boxpierce':
3188            from statsmodels.stats.diagnostic import acorr_ljungbox
3189            d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse)
3190            # This differs from self.nobs_effective because here we want to
3191            # exclude exact diffuse periods, whereas self.nobs_effective only
3192            # excludes explicitly burned (usually approximate diffuse) periods.
3193            nobs_effective = self.nobs - d
3194            output = []
3195
3196            # Default lags for acorr_ljungbox is 40, but may not always have
3197            # that many observations
3198            if lags is None:
3199                seasonal_periods = getattr(self.model, "seasonal_periods", 0)
3200                if seasonal_periods:
3201                    lags = min(2 * seasonal_periods, nobs_effective // 5)
3202                else:
3203                    lags = min(10, nobs_effective // 5)
3204
3205            model_df = 0
3206            if df_adjust:
3207                model_df = max(0, self.df_model - self.k_diffuse_states - 1)
3208
3209            for i in range(self.model.k_endog):
3210                results = acorr_ljungbox(
3211                    self.filter_results.standardized_forecasts_error[i][d:],
3212                    lags=lags, boxpierce=(method == 'boxpierce'),
3213                    model_df=model_df, return_df=False)
3214                if method == 'ljungbox':
3215                    output.append(results[0:2])
3216                else:
3217                    output.append(results[2:])
3218
3219            output = np.c_[output]
3220        else:
3221            raise NotImplementedError('Invalid serial correlation test'
3222                                      ' method.')
3223        return output
3224
3225    def get_prediction(self, start=None, end=None, dynamic=False,
3226                       index=None, exog=None, extend_model=None,
3227                       extend_kwargs=None, **kwargs):
3228        """
3229        In-sample prediction and out-of-sample forecasting
3230
3231        Parameters
3232        ----------
3233        start : int, str, or datetime, optional
3234            Zero-indexed observation number at which to start forecasting,
3235            i.e., the first forecast is start. Can also be a date string to
3236            parse or a datetime type. Default is the the zeroth observation.
3237        end : int, str, or datetime, optional
3238            Zero-indexed observation number at which to end forecasting, i.e.,
3239            the last forecast is end. Can also be a date string to
3240            parse or a datetime type. However, if the dates index does not
3241            have a fixed frequency, end must be an integer index if you
3242            want out of sample prediction. Default is the last observation in
3243            the sample.
3244        dynamic : bool, int, str, or datetime, optional
3245            Integer offset relative to `start` at which to begin dynamic
3246            prediction. Can also be an absolute date string to parse or a
3247            datetime type (these are not interpreted as offsets).
3248            Prior to this observation, true endogenous values will be used for
3249            prediction; starting with this observation and continuing through
3250            the end of prediction, forecasted endogenous values will be used
3251            instead.
3252        **kwargs
3253            Additional arguments may required for forecasting beyond the end
3254            of the sample. See `FilterResults.predict` for more details.
3255
3256        Returns
3257        -------
3258        predictions : PredictionResults
3259            PredictionResults instance containing in-sample predictions and
3260            out-of-sample forecasts.
3261        """
3262        if start is None:
3263            start = 0
3264
3265        # Handle start, end, dynamic
3266        start, end, out_of_sample, prediction_index = (
3267            self.model._get_prediction_index(start, end, index))
3268
3269        # Handle `dynamic`
3270        if isinstance(dynamic, (str, dt.datetime, pd.Timestamp)):
3271            dynamic, _, _ = self.model._get_index_loc(dynamic)
3272            # Convert to offset relative to start
3273            dynamic = dynamic - start
3274
3275        # If we have out-of-sample forecasting and `exog` or in general any
3276        # kind of time-varying state space model, then we need to create an
3277        # extended model to get updated state space system matrices
3278        if extend_model is None:
3279            extend_model = (self.model.exog is not None or
3280                            not self.filter_results.time_invariant)
3281        if out_of_sample and extend_model:
3282            kwargs = self.model._get_extension_time_varying_matrices(
3283                self.params, exog, out_of_sample, extend_kwargs,
3284                transformed=True, includes_fixed=True, **kwargs)
3285
3286        # Make sure the model class has the current parameters
3287        self.model.update(self.params, transformed=True, includes_fixed=True)
3288
3289        # Perform the prediction
3290        # This is a (k_endog x npredictions) array; do not want to squeeze in
3291        # case of npredictions = 1
3292        prediction_results = self.filter_results.predict(
3293            start, end + out_of_sample + 1, dynamic, **kwargs)
3294
3295        # Return a new mlemodel.PredictionResults object
3296        return PredictionResultsWrapper(PredictionResults(
3297            self, prediction_results, row_labels=prediction_index))
3298
3299    def get_forecast(self, steps=1, **kwargs):
3300        """
3301        Out-of-sample forecasts and prediction intervals
3302
3303        Parameters
3304        ----------
3305        steps : int, str, or datetime, optional
3306            If an integer, the number of steps to forecast from the end of the
3307            sample. Can also be a date string to parse or a datetime type.
3308            However, if the dates index does not have a fixed frequency, steps
3309            must be an integer. Default
3310        **kwargs
3311            Additional arguments may required for forecasting beyond the end
3312            of the sample. See `FilterResults.predict` for more details.
3313
3314        Returns
3315        -------
3316        predictions : PredictionResults
3317            PredictionResults instance containing in-sample predictions and
3318            out-of-sample forecasts.
3319        """
3320        if isinstance(steps, int):
3321            end = self.nobs + steps - 1
3322        else:
3323            end = steps
3324        return self.get_prediction(start=self.nobs, end=end, **kwargs)
3325
3326    def predict(self, start=None, end=None, dynamic=False, **kwargs):
3327        """
3328        In-sample prediction and out-of-sample forecasting
3329
3330        Parameters
3331        ----------
3332        start : int, str, or datetime, optional
3333            Zero-indexed observation number at which to start forecasting,
3334            i.e., the first forecast is start. Can also be a date string to
3335            parse or a datetime type. Default is the the zeroth observation.
3336        end : int, str, or datetime, optional
3337            Zero-indexed observation number at which to end forecasting, i.e.,
3338            the last forecast is end. Can also be a date string to
3339            parse or a datetime type. However, if the dates index does not
3340            have a fixed frequency, end must be an integer index if you
3341            want out of sample prediction. Default is the last observation in
3342            the sample.
3343        dynamic : bool, int, str, or datetime, optional
3344            Integer offset relative to `start` at which to begin dynamic
3345            prediction. Can also be an absolute date string to parse or a
3346            datetime type (these are not interpreted as offsets).
3347            Prior to this observation, true endogenous values will be used for
3348            prediction; starting with this observation and continuing through
3349            the end of prediction, forecasted endogenous values will be used
3350            instead.
3351        **kwargs
3352            Additional arguments may required for forecasting beyond the end
3353            of the sample. See `FilterResults.predict` for more details.
3354
3355        Returns
3356        -------
3357        forecast : array_like
3358            Array of out of in-sample predictions and / or out-of-sample
3359            forecasts. An (npredict x k_endog) array.
3360
3361        See Also
3362        --------
3363        forecast
3364            Out-of-sample forecasts
3365        get_prediction
3366            Prediction results and confidence intervals
3367        """
3368        # Perform the prediction
3369        prediction_results = self.get_prediction(start, end, dynamic, **kwargs)
3370        return prediction_results.predicted_mean
3371
3372    def forecast(self, steps=1, **kwargs):
3373        """
3374        Out-of-sample forecasts
3375
3376        Parameters
3377        ----------
3378        steps : int, str, or datetime, optional
3379            If an integer, the number of steps to forecast from the end of the
3380            sample. Can also be a date string to parse or a datetime type.
3381            However, if the dates index does not have a fixed frequency, steps
3382            must be an integer. Default
3383        **kwargs
3384            Additional arguments may required for forecasting beyond the end
3385            of the sample. See `FilterResults.predict` for more details.
3386
3387        Returns
3388        -------
3389        forecast : PredictionResults
3390            PredictionResults instance containing in-sample predictions and
3391            out-of-sample forecasts.
3392        """
3393        if isinstance(steps, int):
3394            end = self.nobs + steps - 1
3395        else:
3396            end = steps
3397        return self.predict(start=self.nobs, end=end, **kwargs)
3398
3399    def simulate(self, nsimulations, measurement_shocks=None,
3400                 state_shocks=None, initial_state=None, anchor=None,
3401                 repetitions=None, exog=None, extend_model=None,
3402                 extend_kwargs=None, **kwargs):
3403        r"""
3404        Simulate a new time series following the state space model
3405
3406        Parameters
3407        ----------
3408        nsimulations : int
3409            The number of observations to simulate. If the model is
3410            time-invariant this can be any number. If the model is
3411            time-varying, then this number must be less than or equal to the
3412            number
3413        measurement_shocks : array_like, optional
3414            If specified, these are the shocks to the measurement equation,
3415            :math:`\varepsilon_t`. If unspecified, these are automatically
3416            generated using a pseudo-random number generator. If specified,
3417            must be shaped `nsimulations` x `k_endog`, where `k_endog` is the
3418            same as in the state space model.
3419        state_shocks : array_like, optional
3420            If specified, these are the shocks to the state equation,
3421            :math:`\eta_t`. If unspecified, these are automatically
3422            generated using a pseudo-random number generator. If specified,
3423            must be shaped `nsimulations` x `k_posdef` where `k_posdef` is the
3424            same as in the state space model.
3425        initial_state : array_like, optional
3426            If specified, this is the initial state vector to use in
3427            simulation, which should be shaped (`k_states` x 1), where
3428            `k_states` is the same as in the state space model. If unspecified,
3429            but the model has been initialized, then that initialization is
3430            used. This must be specified if `anchor` is anything other than
3431            "start" or 0.
3432        anchor : int, str, or datetime, optional
3433            Starting point from which to begin the simulations; type depends on
3434            the index of the given `endog` model. Two special cases are the
3435            strings 'start' and 'end', which refer to starting at the beginning
3436            and end of the sample, respectively. If a date/time index was
3437            provided to the model, then this argument can be a date string to
3438            parse or a datetime type. Otherwise, an integer index should be
3439            given. Default is 'start'.
3440        repetitions : int, optional
3441            Number of simulated paths to generate. Default is 1 simulated path.
3442        exog : array_like, optional
3443            New observations of exogenous regressors, if applicable.
3444
3445        Returns
3446        -------
3447        simulated_obs : ndarray
3448            An array of simulated observations. If `repetitions=None`, then it
3449            will be shaped (nsimulations x k_endog) or (nsimulations,) if
3450            `k_endog=1`. Otherwise it will be shaped
3451            (nsimulations x k_endog x repetitions). If the model was given
3452            Pandas input then the output will be a Pandas object. If
3453            `k_endog > 1` and `repetitions` is not None, then the output will
3454            be a Pandas DataFrame that has a MultiIndex for the columns, with
3455            the first level containing the names of the `endog` variables and
3456            the second level containing the repetition number.
3457        """
3458        # Get the starting location
3459        if anchor is None or anchor == 'start':
3460            iloc = 0
3461        elif anchor == 'end':
3462            iloc = self.nobs
3463        else:
3464            iloc, _, _ = self.model._get_index_loc(anchor)
3465            if isinstance(iloc, slice):
3466                iloc = iloc.start
3467
3468        if iloc < 0:
3469            iloc = self.nobs + iloc
3470        if iloc > self.nobs:
3471            raise ValueError('Cannot anchor simulation outside of the sample.')
3472
3473        # Setup the initial state
3474        if initial_state is None:
3475            initial_state_moments = (
3476                self.predicted_state[:, iloc],
3477                self.predicted_state_cov[:, :, iloc])
3478
3479            _repetitions = 1 if repetitions is None else repetitions
3480
3481            initial_state = np.random.multivariate_normal(
3482                *initial_state_moments, size=_repetitions).T
3483
3484        scale = self.scale if self.filter_results.filter_concentrated else None
3485        with self.model.ssm.fixed_scale(scale):
3486            sim = self.model.simulate(
3487                self.params, nsimulations,
3488                measurement_shocks=measurement_shocks,
3489                state_shocks=state_shocks, initial_state=initial_state,
3490                anchor=anchor, repetitions=repetitions, exog=exog,
3491                transformed=True, includes_fixed=True,
3492                extend_model=extend_model, extend_kwargs=extend_kwargs,
3493                **kwargs)
3494
3495        return sim
3496
3497    def impulse_responses(self, steps=1, impulse=0, orthogonalized=False,
3498                          cumulative=False, **kwargs):
3499        """
3500        Impulse response function
3501
3502        Parameters
3503        ----------
3504        steps : int, optional
3505            The number of steps for which impulse responses are calculated.
3506            Default is 1. Note that for time-invariant models, the initial
3507            impulse is not counted as a step, so if `steps=1`, the output will
3508            have 2 entries.
3509        impulse : int, str or array_like
3510            If an integer, the state innovation to pulse; must be between 0
3511            and `k_posdef-1`. If a str, it indicates which column of df
3512            the unit (1) impulse is given.
3513            Alternatively, a custom impulse vector may be provided; must be
3514            shaped `k_posdef x 1`.
3515        orthogonalized : bool, optional
3516            Whether or not to perform impulse using orthogonalized innovations.
3517            Note that this will also affect custum `impulse` vectors. Default
3518            is False.
3519        cumulative : bool, optional
3520            Whether or not to return cumulative impulse responses. Default is
3521            False.
3522        anchor : int, str, or datetime, optional
3523            Time point within the sample for the state innovation impulse. Type
3524            depends on the index of the given `endog` in the model. Two special
3525            cases are the strings 'start' and 'end', which refer to setting the
3526            impulse at the first and last points of the sample, respectively.
3527            Integer values can run from 0 to `nobs - 1`, or can be negative to
3528            apply negative indexing. Finally, if a date/time index was provided
3529            to the model, then this argument can be a date string to parse or a
3530            datetime type. Default is 'start'.
3531        exog : array_like, optional
3532            New observations of exogenous regressors, if applicable.
3533        **kwargs
3534            If the model has time-varying design or transition matrices and the
3535            combination of `anchor` and `steps` implies creating impulse
3536            responses for the out-of-sample period, then these matrices must
3537            have updated values provided for the out-of-sample steps. For
3538            example, if `design` is a time-varying component, `nobs` is 10,
3539            `anchor=1`, and `steps` is 15, a (`k_endog` x `k_states` x 7)
3540            matrix must be provided with the new design matrix values.
3541
3542        Returns
3543        -------
3544        impulse_responses : ndarray
3545            Responses for each endogenous variable due to the impulse
3546            given by the `impulse` argument. For a time-invariant model, the
3547            impulse responses are given for `steps + 1` elements (this gives
3548            the "initial impulse" followed by `steps` responses for the
3549            important cases of VAR and SARIMAX models), while for time-varying
3550            models the impulse responses are only given for `steps` elements
3551            (to avoid having to unexpectedly provide updated time-varying
3552            matrices).
3553
3554        Notes
3555        -----
3556        Intercepts in the measurement and state equation are ignored when
3557        calculating impulse responses.
3558        """
3559        scale = self.scale if self.filter_results.filter_concentrated else None
3560        with self.model.ssm.fixed_scale(scale):
3561            irfs = self.model.impulse_responses(self.params, steps, impulse,
3562                                                orthogonalized, cumulative,
3563                                                **kwargs)
3564            # These are wrapped automatically, so just return the array
3565            if isinstance(irfs, (pd.Series, pd.DataFrame)):
3566                irfs = irfs.values
3567        return irfs
3568
3569    def _apply(self, mod, refit=False, fit_kwargs=None, **kwargs):
3570        if fit_kwargs is None:
3571            fit_kwargs = {}
3572
3573        if refit:
3574            fit_kwargs.setdefault('start_params', self.params)
3575            if self._has_fixed_params:
3576                fit_kwargs.setdefault('includes_fixed', True)
3577                res = mod.fit_constrained(self._fixed_params, **fit_kwargs)
3578            else:
3579                res = mod.fit(**fit_kwargs)
3580        else:
3581            if 'cov_type' in fit_kwargs:
3582                raise ValueError('Cannot specify covariance type in'
3583                                 ' `fit_kwargs` unless refitting'
3584                                 ' parameters (not available in extend).')
3585            if 'cov_kwds' in fit_kwargs:
3586                raise ValueError('Cannot specify covariance keyword arguments'
3587                                 ' in `fit_kwargs` unless refitting'
3588                                 ' parameters (not available in extend).')
3589
3590            if self.cov_type == 'none':
3591                fit_kwargs['cov_type'] = 'none'
3592            else:
3593                fit_kwargs['cov_type'] = 'custom'
3594                fit_kwargs['cov_kwds'] = {
3595                    'custom_cov_type': self.cov_type,
3596                    'custom_cov_params': self.cov_params_default,
3597                    'custom_description': ('Parameters and standard errors'
3598                                           ' were estimated using a different'
3599                                           ' dataset and were then applied to'
3600                                           ' this dataset. %s'
3601                                           % self.cov_kwds['description'])}
3602
3603            if self.smoother_results is not None:
3604                func = mod.smooth
3605            else:
3606                func = mod.filter
3607
3608            if self._has_fixed_params:
3609                with mod.fix_params(self._fixed_params):
3610                    fit_kwargs.setdefault('includes_fixed', True)
3611                    res = func(self.params, **fit_kwargs)
3612            else:
3613                res = func(self.params, **fit_kwargs)
3614
3615        return res
3616
3617    def _news_previous_results(self, previous, start, end, periods):
3618        # Compute the news
3619        out = self.smoother_results.news(previous.smoother_results,
3620                                         start=start, end=end)
3621        return out
3622
3623    def _news_updated_results(self, updated, start, end, periods):
3624        return updated._news_previous_results(self, start, end, periods)
3625
3626    def _news_previous_data(self, endog, start, end, periods, exog):
3627        previous = self.apply(endog, exog=exog, copy_initialization=True)
3628        return self._news_previous_results(previous, start, end, periods)
3629
3630    def _news_updated_data(self, endog, start, end, periods, exog):
3631        updated = self.apply(endog, exog=exog, copy_initialization=True)
3632        return self._news_updated_results(updated, start, end, periods)
3633
3634    def news(self, comparison, impact_date=None, impacted_variable=None,
3635             start=None, end=None, periods=None, exog=None,
3636             comparison_type=None, return_raw=False, tolerance=1e-10,
3637             **kwargs):
3638        """
3639        Compute impacts from updated data (news and revisions)
3640
3641        Parameters
3642        ----------
3643        comparison : array_like or MLEResults
3644            An updated dataset with updated and/or revised data from which the
3645            news can be computed, or an updated or previous results object
3646            to use in computing the news.
3647        impact_date : int, str, or datetime, optional
3648            A single specific period of impacts from news and revisions to
3649            compute. Can also be a date string to parse or a datetime type.
3650            This argument cannot be used in combination with `start`, `end`, or
3651            `periods`. Default is the first out-of-sample observation.
3652        impacted_variable : str, list, array, or slice, optional
3653            Observation variable label or slice of labels specifying that only
3654            specific impacted variables should be shown in the News output. The
3655            impacted variable(s) describe the variables that were *affected* by
3656            the news. If you do not know the labels for the variables, check
3657            the `endog_names` attribute of the model instance.
3658        start : int, str, or datetime, optional
3659            The first period of impacts from news and revisions to compute.
3660            Can also be a date string to parse or a datetime type. Default is
3661            the first out-of-sample observation.
3662        end : int, str, or datetime, optional
3663            The last period of impacts from news and revisions to compute.
3664            Can also be a date string to parse or a datetime type. Default is
3665            the first out-of-sample observation.
3666        periods : int, optional
3667            The number of periods of impacts from news and revisions to
3668            compute.
3669        exog : array_like, optional
3670            Array of exogenous regressors for the out-of-sample period, if
3671            applicable.
3672        comparison_type : {None, 'previous', 'updated'}
3673            This denotes whether the `comparison` argument represents a
3674            *previous* results object or dataset or an *updated* results object
3675            or dataset. If not specified, then an attempt is made to determine
3676            the comparison type.
3677        return_raw : bool, optional
3678            Whether or not to return only the specific output or a full
3679            results object. Default is to return a full results object.
3680        tolerance : float, optional
3681            The numerical threshold for determining zero impact. Default is
3682            that any impact less than 1e-10 is assumed to be zero.
3683
3684        Returns
3685        -------
3686        NewsResults
3687            Impacts of data revisions and news on estimates
3688
3689        References
3690        ----------
3691        .. [1] Bańbura, Marta, and Michele Modugno.
3692               "Maximum likelihood estimation of factor models on datasets with
3693               arbitrary pattern of missing data."
3694               Journal of Applied Econometrics 29, no. 1 (2014): 133-160.
3695        .. [2] Bańbura, Marta, Domenico Giannone, and Lucrezia Reichlin.
3696               "Nowcasting."
3697               The Oxford Handbook of Economic Forecasting. July 8, 2011.
3698        .. [3] Bańbura, Marta, Domenico Giannone, Michele Modugno, and Lucrezia
3699               Reichlin.
3700               "Now-casting and the real-time data flow."
3701               In Handbook of economic forecasting, vol. 2, pp. 195-237.
3702               Elsevier, 2013.
3703        """
3704        # Validate input
3705        if self.smoother_results is None:
3706            raise ValueError('Cannot compute news without Kalman smoother'
3707                             ' results.')
3708
3709        # If we were given data, create a new results object
3710        comparison_dataset = not isinstance(
3711            comparison, (MLEResults, MLEResultsWrapper))
3712        if comparison_dataset:
3713            # If `exog` is longer than `comparison`, then we extend it to match
3714            nobs_endog = len(comparison)
3715            nobs_exog = len(exog) if exog is not None else nobs_endog
3716
3717            if nobs_exog > nobs_endog:
3718                _, _, _, ix = self.model._get_prediction_index(
3719                    start=0, end=nobs_exog - 1)
3720                # TODO: check that the index of `comparison` matches the model
3721                comparison = np.asarray(comparison)
3722                if comparison.ndim < 2:
3723                    comparison = np.atleast_2d(comparison).T
3724                if (comparison.ndim != 2 or
3725                        comparison.shape[1] != self.model.k_endog):
3726                    raise ValueError('Invalid shape for `comparison`. Must'
3727                                     f' contain {self.model.k_endog} columns.')
3728                extra = np.zeros((nobs_exog - nobs_endog,
3729                                  self.model.k_endog)) * np.nan
3730                comparison = pd.DataFrame(
3731                    np.concatenate([comparison, extra], axis=0), index=ix,
3732                    columns=self.model.endog_names)
3733
3734            # Get the results object
3735            comparison = self.apply(comparison, exog=exog,
3736                                    copy_initialization=True, **kwargs)
3737
3738        # Now, figure out the `updated` versus `previous` results objects
3739        nmissing = self.filter_results.missing.sum()
3740        nmissing_comparison = comparison.filter_results.missing.sum()
3741        if (comparison_type == 'updated' or (comparison_type is None and (
3742                comparison.nobs > self.nobs or
3743                (comparison.nobs == self.nobs and
3744                 nmissing > nmissing_comparison)))):
3745            updated = comparison
3746            previous = self
3747        elif (comparison_type == 'previous' or (comparison_type is None and (
3748                comparison.nobs < self.nobs or
3749                (comparison.nobs == self.nobs and
3750                 nmissing < nmissing_comparison)))):
3751            updated = self
3752            previous = comparison
3753        else:
3754            raise ValueError('Could not automatically determine the type'
3755                             ' of comparison requested to compute the'
3756                             ' News, so it must be specified as "updated"'
3757                             ' or "previous", using the `comparison_type`'
3758                             ' keyword argument')
3759
3760        # Check that the index of `updated` is a superset of the
3761        # index of `previous`
3762        # Note: the try/except block is for Pandas < 0.25, in which
3763        # `PeriodIndex.difference` raises a ValueError if the argument is not
3764        # also a `PeriodIndex`.
3765        diff = previous.model._index.difference(updated.model._index)
3766        if len(diff) > 0:
3767            raise ValueError('The index associated with the updated results is'
3768                             ' not a superset of the index associated with the'
3769                             ' previous results, and so these datasets do not'
3770                             ' appear to be related. Can only compute the'
3771                             ' news by comparing this results set to previous'
3772                             ' results objects.')
3773
3774        # Handle start, end, periods
3775        # There doesn't seem to be any universal defaults that both (a) make
3776        # sense for all data update combinations, and (b) work with both
3777        # time-invariant and time-varying models. So we require that the user
3778        # specify exactly two of start, end, periods.
3779        if impact_date is not None:
3780            if not (start is None and end is None and periods is None):
3781                raise ValueError('Cannot use the `impact_date` argument in'
3782                                 ' combination with `start`, `end`, or'
3783                                 ' `periods`.')
3784            start = impact_date
3785            periods = 1
3786        if start is None and end is None and periods is None:
3787            start = previous.nobs - 1
3788            end = previous.nobs - 1
3789        if int(start is None) + int(end is None) + int(periods is None) != 1:
3790            raise ValueError('Of the three parameters: start, end, and'
3791                             ' periods, exactly two must be specified')
3792        # If we have the `periods` object, we need to convert `start`/`end` to
3793        # integers so that we can compute the other one. That's because
3794        # _get_prediction_index doesn't support a `periods` argument
3795        elif start is not None and periods is not None:
3796            start, _, _, _ = self.model._get_prediction_index(start, start)
3797            end = start + (periods - 1)
3798        elif end is not None and periods is not None:
3799            _, end, _, _ = self.model._get_prediction_index(end, end)
3800            start = end - (periods - 1)
3801        elif start is not None and end is not None:
3802            pass
3803
3804        # Get the integer-based start, end and the prediction index
3805        start, end, out_of_sample, prediction_index = (
3806            updated.model._get_prediction_index(start, end))
3807        end = end + out_of_sample
3808
3809        # News results will always use Pandas, so if the model's data was not
3810        # from Pandas, we'll create an index, as if the model's data had been
3811        # given a default Pandas index.
3812        if prediction_index is None:
3813            prediction_index = pd.RangeIndex(start=start, stop=end + 1)
3814
3815        # For time-varying models try to create an appended `updated` model
3816        # with NaN values. Do not extend the model if this was already done
3817        # above (i.e. the case that `comparison` was a new dataset), because
3818        # in that case `exog` and `kwargs` should have
3819        # been set with the input `comparison` dataset in mind, and so would be
3820        # useless here. Ultimately, we've already extended `updated` as far
3821        # as we can. So raise an  exception in that case with a useful message.
3822        # However, we still want to try to accommodate extending the model here
3823        # if it is possible.
3824        # Note that we do not need to extend time-invariant models, because
3825        # `KalmanSmoother.news` can itself handle any impact dates for
3826        # time-invariant models.
3827        time_varying = not (previous.filter_results.time_invariant or
3828                            updated.filter_results.time_invariant)
3829        if time_varying and end >= updated.nobs:
3830            # If we the given `comparison` was a dataset and either `exog` or
3831            # `kwargs` was set, then we assume that we cannot create an updated
3832            # time-varying model (because then we can't tell if `kwargs` and
3833            # `exog` arguments are meant to apply to the `comparison` dataset
3834            # or to this extension)
3835            if comparison_dataset and (exog is not None or len(kwargs) > 0):
3836                if comparison is updated:
3837                    raise ValueError('If providing an updated dataset as the'
3838                                     ' `comparison` with a time-varying model,'
3839                                     ' then the `end` period cannot be beyond'
3840                                     ' the end of that updated dataset.')
3841                else:
3842                    raise ValueError('If providing an previous dataset as the'
3843                                     ' `comparison` with a time-varying model,'
3844                                     ' then the `end` period cannot be beyond'
3845                                     ' the end of the (updated) results'
3846                                     ' object.')
3847
3848            # Try to extend `updated`
3849            updated_orig = updated
3850            # TODO: `append` should fix this k_endog=1 issue for us
3851            # TODO: is the + 1 necessary?
3852            if self.model.k_endog > 1:
3853                extra = np.zeros((end - updated.nobs + 1,
3854                                  self.model.k_endog)) * np.nan
3855            else:
3856                extra = np.zeros((end - updated.nobs + 1,)) * np.nan
3857            updated = updated_orig.append(extra, exog=exog, **kwargs)
3858
3859        # Compute the news
3860        news_results = (
3861            updated._news_previous_results(previous, start, end + 1, periods))
3862
3863        if not return_raw:
3864            news_results = NewsResults(
3865                news_results, self, updated, previous, impacted_variable,
3866                tolerance, row_labels=prediction_index)
3867        return news_results
3868
3869    def append(self, endog, exog=None, refit=False, fit_kwargs=None,
3870               copy_initialization=False, **kwargs):
3871        """
3872        Recreate the results object with new data appended to the original data
3873
3874        Creates a new result object applied to a dataset that is created by
3875        appending new data to the end of the model's original data. The new
3876        results can then be used for analysis or forecasting.
3877
3878        Parameters
3879        ----------
3880        endog : array_like
3881            New observations from the modeled time-series process.
3882        exog : array_like, optional
3883            New observations of exogenous regressors, if applicable.
3884        refit : bool, optional
3885            Whether to re-fit the parameters, based on the combined dataset.
3886            Default is False (so parameters from the current results object
3887            are used to create the new results object).
3888        copy_initialization : bool, optional
3889            Whether or not to copy the initialization from the current results
3890            set to the new model. Default is False
3891        fit_kwargs : dict, optional
3892            Keyword arguments to pass to `fit` (if `refit=True`) or `filter` /
3893            `smooth`.
3894        copy_initialization : bool, optional
3895        **kwargs
3896            Keyword arguments may be used to modify model specification
3897            arguments when created the new model object.
3898
3899        Returns
3900        -------
3901        results
3902            Updated Results object, that includes results from both the
3903            original dataset and the new dataset.
3904
3905        Notes
3906        -----
3907        The `endog` and `exog` arguments to this method must be formatted in
3908        the same way (e.g. Pandas Series versus Numpy array) as were the
3909        `endog` and `exog` arrays passed to the original model.
3910
3911        The `endog` argument to this method should consist of new observations
3912        that occurred directly after the last element of `endog`. For any other
3913        kind of dataset, see the `apply` method.
3914
3915        This method will apply filtering to all of the original data as well
3916        as to the new data. To apply filtering only to the new data (which
3917        can be much faster if the original dataset is large), see the `extend`
3918        method.
3919
3920        See Also
3921        --------
3922        statsmodels.tsa.statespace.mlemodel.MLEResults.extend
3923        statsmodels.tsa.statespace.mlemodel.MLEResults.apply
3924
3925        Examples
3926        --------
3927        >>> index = pd.period_range(start='2000', periods=2, freq='A')
3928        >>> original_observations = pd.Series([1.2, 1.5], index=index)
3929        >>> mod = sm.tsa.SARIMAX(original_observations)
3930        >>> res = mod.fit()
3931        >>> print(res.params)
3932        ar.L1     0.9756
3933        sigma2    0.0889
3934        dtype: float64
3935        >>> print(res.fittedvalues)
3936        2000    0.0000
3937        2001    1.1707
3938        Freq: A-DEC, dtype: float64
3939        >>> print(res.forecast(1))
3940        2002    1.4634
3941        Freq: A-DEC, dtype: float64
3942
3943        >>> new_index = pd.period_range(start='2002', periods=1, freq='A')
3944        >>> new_observations = pd.Series([0.9], index=new_index)
3945        >>> updated_res = res.append(new_observations)
3946        >>> print(updated_res.params)
3947        ar.L1     0.9756
3948        sigma2    0.0889
3949        dtype: float64
3950        >>> print(updated_res.fittedvalues)
3951        2000    0.0000
3952        2001    1.1707
3953        2002    1.4634
3954        Freq: A-DEC, dtype: float64
3955        >>> print(updated_res.forecast(1))
3956        2003    0.878
3957        Freq: A-DEC, dtype: float64
3958        """
3959        start = self.nobs
3960        end = self.nobs + len(endog) - 1
3961        _, _, _, append_ix = self.model._get_prediction_index(start, end)
3962
3963        # Check the index of the new data
3964        if isinstance(self.model.data, PandasData):
3965            _check_index(append_ix, endog, '`endog`')
3966
3967        # Concatenate the new data to original data
3968        new_endog = concat([self.model.data.orig_endog, endog], axis=0,
3969                           allow_mix=True)
3970
3971        # Handle `exog`
3972        if exog is not None:
3973            _, exog = prepare_exog(exog)
3974            _check_index(append_ix, exog, '`exog`')
3975
3976            new_exog = concat([self.model.data.orig_exog, exog], axis=0,
3977                              allow_mix=True)
3978        else:
3979            new_exog = None
3980
3981        # Create a continuous index for the combined data
3982        if isinstance(self.model.data, PandasData):
3983            start = 0
3984            end = len(new_endog) - 1
3985            _, _, _, new_index = self.model._get_prediction_index(start, end)
3986
3987            # Standardize `endog` to have the right index and columns
3988            columns = self.model.endog_names
3989            if not isinstance(columns, list):
3990                columns = [columns]
3991            new_endog = pd.DataFrame(new_endog, index=new_index,
3992                                     columns=columns)
3993
3994            # Standardize `exog` to have the right index
3995            if new_exog is not None:
3996                new_exog = pd.DataFrame(new_exog, index=new_index,
3997                                        columns=self.model.exog_names)
3998
3999        if copy_initialization:
4000            res = self.filter_results
4001            init = Initialization(
4002                self.model.k_states, 'known', constant=res.initial_state,
4003                stationary_cov=res.initial_state_cov)
4004            kwargs.setdefault('initialization', init)
4005
4006        mod = self.model.clone(new_endog, exog=new_exog, **kwargs)
4007        res = self._apply(mod, refit=refit, fit_kwargs=fit_kwargs, **kwargs)
4008
4009        return res
4010
4011    def extend(self, endog, exog=None, fit_kwargs=None, **kwargs):
4012        """
4013        Recreate the results object for new data that extends the original data
4014
4015        Creates a new result object applied to a new dataset that is assumed to
4016        follow directly from the end of the model's original data. The new
4017        results can then be used for analysis or forecasting.
4018
4019        Parameters
4020        ----------
4021        endog : array_like
4022            New observations from the modeled time-series process.
4023        exog : array_like, optional
4024            New observations of exogenous regressors, if applicable.
4025        fit_kwargs : dict, optional
4026            Keyword arguments to pass to `filter` or `smooth`.
4027        **kwargs
4028            Keyword arguments may be used to modify model specification
4029            arguments when created the new model object.
4030
4031        Returns
4032        -------
4033        results
4034            Updated Results object, that includes results only for the new
4035            dataset.
4036
4037        See Also
4038        --------
4039        statsmodels.tsa.statespace.mlemodel.MLEResults.append
4040        statsmodels.tsa.statespace.mlemodel.MLEResults.apply
4041
4042        Notes
4043        -----
4044        The `endog` argument to this method should consist of new observations
4045        that occurred directly after the last element of the model's original
4046        `endog` array. For any other kind of dataset, see the `apply` method.
4047
4048        This method will apply filtering only to the new data provided by the
4049        `endog` argument, which can be much faster than re-filtering the entire
4050        dataset. However, the returned results object will only have results
4051        for the new data. To retrieve results for both the new data and the
4052        original data, see the `append` method.
4053
4054        Examples
4055        --------
4056        >>> index = pd.period_range(start='2000', periods=2, freq='A')
4057        >>> original_observations = pd.Series([1.2, 1.5], index=index)
4058        >>> mod = sm.tsa.SARIMAX(original_observations)
4059        >>> res = mod.fit()
4060        >>> print(res.params)
4061        ar.L1     0.9756
4062        sigma2    0.0889
4063        dtype: float64
4064        >>> print(res.fittedvalues)
4065        2000    0.0000
4066        2001    1.1707
4067        Freq: A-DEC, dtype: float64
4068        >>> print(res.forecast(1))
4069        2002    1.4634
4070        Freq: A-DEC, dtype: float64
4071
4072        >>> new_index = pd.period_range(start='2002', periods=1, freq='A')
4073        >>> new_observations = pd.Series([0.9], index=new_index)
4074        >>> updated_res = res.extend(new_observations)
4075        >>> print(updated_res.params)
4076        ar.L1     0.9756
4077        sigma2    0.0889
4078        dtype: float64
4079        >>> print(updated_res.fittedvalues)
4080        2002    1.4634
4081        Freq: A-DEC, dtype: float64
4082        >>> print(updated_res.forecast(1))
4083        2003    0.878
4084        Freq: A-DEC, dtype: float64
4085        """
4086        start = self.nobs
4087        end = self.nobs + len(endog) - 1
4088        _, _, _, extend_ix = self.model._get_prediction_index(start, end)
4089
4090        if isinstance(self.model.data, PandasData):
4091            _check_index(extend_ix, endog, '`endog`')
4092
4093            # Standardize `endog` to have the right index and columns
4094            columns = self.model.endog_names
4095            if not isinstance(columns, list):
4096                columns = [columns]
4097            endog = pd.DataFrame(endog, index=extend_ix, columns=columns)
4098        # Extend the current fit result to additional data
4099        mod = self.model.clone(endog, exog=exog, **kwargs)
4100        mod.ssm.initialization = Initialization(
4101            mod.k_states, 'known', constant=self.predicted_state[..., -1],
4102            stationary_cov=self.predicted_state_cov[..., -1])
4103        res = self._apply(mod, refit=False, fit_kwargs=fit_kwargs, **kwargs)
4104
4105        return res
4106
4107    def apply(self, endog, exog=None, refit=False, fit_kwargs=None,
4108              copy_initialization=False, **kwargs):
4109        """
4110        Apply the fitted parameters to new data unrelated to the original data
4111
4112        Creates a new result object using the current fitted parameters,
4113        applied to a completely new dataset that is assumed to be unrelated to
4114        the model's original data. The new results can then be used for
4115        analysis or forecasting.
4116
4117        Parameters
4118        ----------
4119        endog : array_like
4120            New observations from the modeled time-series process.
4121        exog : array_like, optional
4122            New observations of exogenous regressors, if applicable.
4123        refit : bool, optional
4124            Whether to re-fit the parameters, using the new dataset.
4125            Default is False (so parameters from the current results object
4126            are used to create the new results object).
4127        copy_initialization : bool, optional
4128            Whether or not to copy the initialization from the current results
4129            set to the new model. Default is False
4130        fit_kwargs : dict, optional
4131            Keyword arguments to pass to `fit` (if `refit=True`) or `filter` /
4132            `smooth`.
4133        **kwargs
4134            Keyword arguments may be used to modify model specification
4135            arguments when created the new model object.
4136
4137        Returns
4138        -------
4139        results
4140            Updated Results object, that includes results only for the new
4141            dataset.
4142
4143        See Also
4144        --------
4145        statsmodels.tsa.statespace.mlemodel.MLEResults.append
4146        statsmodels.tsa.statespace.mlemodel.MLEResults.apply
4147
4148        Notes
4149        -----
4150        The `endog` argument to this method should consist of new observations
4151        that are not necessarily related to the original model's `endog`
4152        dataset. For observations that continue that original dataset by follow
4153        directly after its last element, see the `append` and `extend` methods.
4154
4155        Examples
4156        --------
4157        >>> index = pd.period_range(start='2000', periods=2, freq='A')
4158        >>> original_observations = pd.Series([1.2, 1.5], index=index)
4159        >>> mod = sm.tsa.SARIMAX(original_observations)
4160        >>> res = mod.fit()
4161        >>> print(res.params)
4162        ar.L1     0.9756
4163        sigma2    0.0889
4164        dtype: float64
4165        >>> print(res.fittedvalues)
4166        2000    0.0000
4167        2001    1.1707
4168        Freq: A-DEC, dtype: float64
4169        >>> print(res.forecast(1))
4170        2002    1.4634
4171        Freq: A-DEC, dtype: float64
4172
4173        >>> new_index = pd.period_range(start='1980', periods=3, freq='A')
4174        >>> new_observations = pd.Series([1.4, 0.3, 1.2], index=new_index)
4175        >>> new_res = res.apply(new_observations)
4176        >>> print(new_res.params)
4177        ar.L1     0.9756
4178        sigma2    0.0889
4179        dtype: float64
4180        >>> print(new_res.fittedvalues)
4181        1980    1.1707
4182        1981    1.3659
4183        1982    0.2927
4184        Freq: A-DEC, dtype: float64
4185        Freq: A-DEC, dtype: float64
4186        >>> print(new_res.forecast(1))
4187        1983    1.1707
4188        Freq: A-DEC, dtype: float64
4189        """
4190        mod = self.model.clone(endog, exog=exog, **kwargs)
4191
4192        if copy_initialization:
4193            res = self.filter_results
4194            init = Initialization(
4195                self.model.k_states, 'known', constant=res.initial_state,
4196                stationary_cov=res.initial_state_cov)
4197            mod.ssm.initialization = init
4198
4199        res = self._apply(mod, refit=refit, fit_kwargs=fit_kwargs, **kwargs)
4200
4201        return res
4202
4203    def plot_diagnostics(self, variable=0, lags=10, fig=None, figsize=None,
4204                         truncate_endog_names=24, auto_ylims=False,
4205                         bartlett_confint=False, acf_kwargs=None):
4206        """
4207        Diagnostic plots for standardized residuals of one endogenous variable
4208
4209        Parameters
4210        ----------
4211        variable : int, optional
4212            Index of the endogenous variable for which the diagnostic plots
4213            should be created. Default is 0.
4214        lags : int, optional
4215            Number of lags to include in the correlogram. Default is 10.
4216        fig : Figure, optional
4217            If given, subplots are created in this figure instead of in a new
4218            figure. Note that the 2x2 grid will be created in the provided
4219            figure using `fig.add_subplot()`.
4220        figsize : tuple, optional
4221            If a figure is created, this argument allows specifying a size.
4222            The tuple is (width, height).
4223        auto_ylims : bool, optional
4224            If True, adjusts automatically the y-axis limits to ACF values.
4225        bartlett_confint : bool, default True
4226            Confidence intervals for ACF values are generally placed at 2
4227            standard errors around r_k. The formula used for standard error
4228            depends upon the situation. If the autocorrelations are being used
4229            to test for randomness of residuals as part of the ARIMA routine,
4230            the standard errors are determined assuming the residuals are white
4231            noise. The approximate formula for any lag is that standard error
4232            of each r_k = 1/sqrt(N). See section 9.4 of [1] for more details on
4233            the 1/sqrt(N) result. For more elementary discussion, see section
4234            5.3.2 in [2].
4235            For the ACF of raw data, the standard error at a lag k is
4236            found as if the right model was an MA(k-1). This allows the
4237            possible interpretation that if all autocorrelations past a
4238            certain lag are within the limits, the model might be an MA of
4239            order defined by the last significant autocorrelation. In this
4240            case, a moving average model is assumed for the data and the
4241            standard errors for the confidence intervals should be
4242            generated using Bartlett's formula. For more details on
4243            Bartlett formula result, see section 7.2 in [1].+
4244        acf_kwargs : dict, optional
4245            Optional dictionary of keyword arguments that are directly passed
4246            on to the correlogram Matplotlib plot produced by plot_acf().
4247
4248        Returns
4249        -------
4250        Figure
4251            Figure instance with diagnostic plots
4252
4253        See Also
4254        --------
4255        statsmodels.graphics.gofplots.qqplot
4256        statsmodels.graphics.tsaplots.plot_acf
4257
4258        Notes
4259        -----
4260        Produces a 2x2 plot grid with the following plots (ordered clockwise
4261        from top left):
4262
4263        1. Standardized residuals over time
4264        2. Histogram plus estimated density of standardized residuals, along
4265           with a Normal(0,1) density plotted for reference.
4266        3. Normal Q-Q plot, with Normal reference line.
4267        4. Correlogram
4268
4269        References
4270        ----------
4271        [1] Brockwell and Davis, 1987. Time Series Theory and Methods
4272        [2] Brockwell and Davis, 2010. Introduction to Time Series and
4273        Forecasting, 2nd edition.
4274        """
4275        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
4276        _import_mpl()
4277        fig = create_mpl_fig(fig, figsize)
4278        # Eliminate residuals associated with burned or diffuse likelihoods
4279        d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse)
4280
4281        # If given a variable name, find the index
4282        if isinstance(variable, str):
4283            variable = self.model.endog_names.index(variable)
4284
4285        # Get residuals
4286        if hasattr(self.data, 'dates') and self.data.dates is not None:
4287            ix = self.data.dates[d:]
4288        else:
4289            ix = np.arange(self.nobs - d)
4290        resid = pd.Series(
4291            self.filter_results.standardized_forecasts_error[variable, d:],
4292            index=ix)
4293
4294        if resid.shape[0] < max(d, lags):
4295            raise ValueError(
4296                "Length of endogenous variable must be larger the the number "
4297                "of lags used in the model and the number of observations "
4298                "burned in the log-likelihood calculation."
4299            )
4300
4301        # Top-left: residuals vs time
4302        ax = fig.add_subplot(221)
4303        resid.dropna().plot(ax=ax)
4304        ax.hlines(0, ix[0], ix[-1], alpha=0.5)
4305        ax.set_xlim(ix[0], ix[-1])
4306        name = self.model.endog_names[variable]
4307        if len(name) > truncate_endog_names:
4308            name = name[:truncate_endog_names - 3] + '...'
4309        ax.set_title(f'Standardized residual for "{name}"')
4310
4311        # Top-right: histogram, Gaussian kernel density, Normal density
4312        # Can only do histogram and Gaussian kernel density on the non-null
4313        # elements
4314        resid_nonmissing = resid.dropna()
4315        ax = fig.add_subplot(222)
4316
4317        ax.hist(resid_nonmissing, density=True, label='Hist',
4318                edgecolor='#FFFFFF')
4319
4320        from scipy.stats import gaussian_kde, norm
4321        kde = gaussian_kde(resid_nonmissing)
4322        xlim = (-1.96*2, 1.96*2)
4323        x = np.linspace(xlim[0], xlim[1])
4324        ax.plot(x, kde(x), label='KDE')
4325        ax.plot(x, norm.pdf(x), label='N(0,1)')
4326        ax.set_xlim(xlim)
4327        ax.legend()
4328        ax.set_title('Histogram plus estimated density')
4329
4330        # Bottom-left: QQ plot
4331        ax = fig.add_subplot(223)
4332        from statsmodels.graphics.gofplots import qqplot
4333        qqplot(resid_nonmissing, line='s', ax=ax)
4334        ax.set_title('Normal Q-Q')
4335
4336        # Bottom-right: Correlogram
4337        ax = fig.add_subplot(224)
4338        from statsmodels.graphics.tsaplots import plot_acf
4339
4340        if acf_kwargs is None:
4341            acf_kwargs = {}
4342        plot_acf(resid, ax=ax, lags=lags, auto_ylims=auto_ylims,
4343                 bartlett_confint=bartlett_confint, **acf_kwargs)
4344        ax.set_title('Correlogram')
4345
4346        return fig
4347
4348    def summary(self, alpha=.05, start=None, title=None, model_name=None,
4349                display_params=True, display_diagnostics=True,
4350                truncate_endog_names=None, display_max_endog=None,
4351                extra_top_left=None, extra_top_right=None):
4352        """
4353        Summarize the Model
4354
4355        Parameters
4356        ----------
4357        alpha : float, optional
4358            Significance level for the confidence intervals. Default is 0.05.
4359        start : int, optional
4360            Integer of the start observation. Default is 0.
4361        model_name : str
4362            The name of the model used. Default is to use model class name.
4363
4364        Returns
4365        -------
4366        summary : Summary instance
4367            This holds the summary table and text, which can be printed or
4368            converted to various output formats.
4369
4370        See Also
4371        --------
4372        statsmodels.iolib.summary.Summary
4373        """
4374        from statsmodels.iolib.summary import Summary
4375        from statsmodels.iolib.table import SimpleTable
4376        from statsmodels.iolib.tableformatting import fmt_params
4377
4378        # Model specification results
4379        model = self.model
4380        if title is None:
4381            title = 'Statespace Model Results'
4382
4383        if start is None:
4384            start = 0
4385        if self.model._index_dates:
4386            ix = self.model._index
4387            d = ix[start]
4388            sample = ['%02d-%02d-%02d' % (d.month, d.day, d.year)]
4389            d = ix[-1]
4390            sample += ['- ' + '%02d-%02d-%02d' % (d.month, d.day, d.year)]
4391        else:
4392            sample = [str(start), ' - ' + str(self.nobs)]
4393
4394        # Standardize the model name as a list of str
4395        if model_name is None:
4396            model_name = model.__class__.__name__
4397
4398        # Truncate endog names
4399        if truncate_endog_names is None:
4400            truncate_endog_names = False if self.model.k_endog == 1 else 24
4401        endog_names = self.model.endog_names
4402        if not isinstance(endog_names, list):
4403            endog_names = [endog_names]
4404        endog_names = [str(name) for name in endog_names]
4405        if truncate_endog_names is not False:
4406            n = truncate_endog_names
4407            endog_names = [name if len(name) <= n else name[:n] + '...'
4408                           for name in endog_names]
4409
4410        # Shorten the endog name list if applicable
4411        if display_max_endog is None:
4412            display_max_endog = np.inf
4413        yname = None
4414        if self.model.k_endog > display_max_endog:
4415            k = self.model.k_endog - 1
4416            yname = '"' + endog_names[0] + f'", and {k} more'
4417
4418        # Create the tables
4419        if not isinstance(model_name, list):
4420            model_name = [model_name]
4421
4422        top_left = [('Dep. Variable:', None)]
4423        top_left.append(('Model:', [model_name[0]]))
4424        for i in range(1, len(model_name)):
4425            top_left.append(('', ['+ ' + model_name[i]]))
4426        top_left += [
4427            ('Date:', None),
4428            ('Time:', None),
4429            ('Sample:', [sample[0]]),
4430            ('', [sample[1]])
4431        ]
4432
4433        top_right = [
4434            ('No. Observations:', [self.nobs]),
4435            ('Log Likelihood', ["%#5.3f" % self.llf]),
4436        ]
4437        if hasattr(self, 'rsquared'):
4438            top_right.append(('R-squared:', ["%#8.3f" % self.rsquared]))
4439        top_right += [
4440            ('AIC', ["%#5.3f" % self.aic]),
4441            ('BIC', ["%#5.3f" % self.bic]),
4442            ('HQIC', ["%#5.3f" % self.hqic])]
4443        if (self.filter_results is not None and
4444                self.filter_results.filter_concentrated):
4445            top_right.append(('Scale', ["%#5.3f" % self.scale]))
4446
4447        if hasattr(self, 'cov_type'):
4448            cov_type = self.cov_type
4449            if cov_type == 'none':
4450                cov_type = 'Not computed'
4451            top_left.append(('Covariance Type:', [cov_type]))
4452
4453        if extra_top_left is not None:
4454            top_left += extra_top_left
4455        if extra_top_right is not None:
4456            top_right += extra_top_right
4457
4458        summary = Summary()
4459        summary.add_table_2cols(self, gleft=top_left, gright=top_right,
4460                                title=title, yname=yname)
4461        table_ix = 1
4462        if len(self.params) > 0 and display_params:
4463            summary.add_table_params(self, alpha=alpha,
4464                                     xname=self.param_names, use_t=False)
4465            table_ix += 1
4466
4467        # Diagnostic tests results
4468        if display_diagnostics:
4469            try:
4470                het = self.test_heteroskedasticity(method='breakvar')
4471            except Exception:  # FIXME: catch something specific
4472                het = np.zeros((self.model.k_endog, 2)) * np.nan
4473            try:
4474                lb = self.test_serial_correlation(method='ljungbox', lags=[1])
4475            except Exception:  # FIXME: catch something specific
4476                lb = np.zeros((self.model.k_endog, 2, 1)) * np.nan
4477            try:
4478                jb = self.test_normality(method='jarquebera')
4479            except Exception:  # FIXME: catch something specific
4480                jb = np.zeros((self.model.k_endog, 4)) * np.nan
4481
4482            if self.model.k_endog <= display_max_endog:
4483                format_str = lambda array: [  # noqa:E731
4484                    ', '.join(['{0:.2f}'.format(i) for i in array])
4485                ]
4486                diagn_left = [
4487                    ('Ljung-Box (L1) (Q):', format_str(lb[:, 0, -1])),
4488                    ('Prob(Q):', format_str(lb[:, 1, -1])),
4489                    ('Heteroskedasticity (H):', format_str(het[:, 0])),
4490                    ('Prob(H) (two-sided):', format_str(het[:, 1]))]
4491
4492                diagn_right = [('Jarque-Bera (JB):', format_str(jb[:, 0])),
4493                               ('Prob(JB):', format_str(jb[:, 1])),
4494                               ('Skew:', format_str(jb[:, 2])),
4495                               ('Kurtosis:', format_str(jb[:, 3]))
4496                               ]
4497
4498                summary.add_table_2cols(self, gleft=diagn_left,
4499                                        gright=diagn_right, title="")
4500            else:
4501                columns = ['LjungBox\n(L1) (Q)', 'Prob(Q)',
4502                           'Het.(H)', 'Prob(H)',
4503                           'Jarque\nBera(JB)', 'Prob(JB)', 'Skew', 'Kurtosis']
4504                data = pd.DataFrame(
4505                    np.c_[lb[:, :2, -1], het[:, :2], jb[:, :4]],
4506                    index=endog_names, columns=columns).applymap(
4507                        lambda num: '' if pd.isnull(num) else '%.2f' % num)
4508                data.index.name = 'Residual of\nDep. variable'
4509                data = data.reset_index()
4510
4511                params_data = data.values
4512                params_header = data.columns.tolist()
4513                params_stubs = None
4514
4515                title = 'Residual diagnostics:'
4516                table = SimpleTable(
4517                    params_data, params_header, params_stubs,
4518                    txt_fmt=fmt_params, title=title)
4519                summary.tables.insert(table_ix, table)
4520
4521        # Add warnings/notes, added to text format only
4522        etext = []
4523        if hasattr(self, 'cov_type') and 'description' in self.cov_kwds:
4524            etext.append(self.cov_kwds['description'])
4525        if self._rank < (len(self.params) - len(self.fixed_params)):
4526            cov_params = self.cov_params()
4527            if len(self.fixed_params) > 0:
4528                mask = np.ix_(self._free_params_index, self._free_params_index)
4529                cov_params = cov_params[mask]
4530            etext.append("Covariance matrix is singular or near-singular,"
4531                         " with condition number %6.3g. Standard errors may be"
4532                         " unstable." % _safe_cond(cov_params))
4533
4534        if etext:
4535            etext = ["[{0}] {1}".format(i + 1, text)
4536                     for i, text in enumerate(etext)]
4537            etext.insert(0, "Warnings:")
4538            summary.add_extra_txt(etext)
4539
4540        return summary
4541
4542
4543class MLEResultsWrapper(wrap.ResultsWrapper):
4544    _attrs = {
4545        'zvalues': 'columns',
4546        'cov_params_approx': 'cov',
4547        'cov_params_default': 'cov',
4548        'cov_params_oim': 'cov',
4549        'cov_params_opg': 'cov',
4550        'cov_params_robust': 'cov',
4551        'cov_params_robust_approx': 'cov',
4552        'cov_params_robust_oim': 'cov',
4553    }
4554    _wrap_attrs = wrap.union_dicts(tsbase.TimeSeriesResultsWrapper._wrap_attrs,
4555                                   _attrs)
4556    _methods = {
4557        'forecast': 'dates',
4558        'impulse_responses': 'ynames'
4559    }
4560    _wrap_methods = wrap.union_dicts(
4561        tsbase.TimeSeriesResultsWrapper._wrap_methods, _methods)
4562wrap.populate_wrapper(MLEResultsWrapper, MLEResults)  # noqa:E305
4563
4564
4565class PredictionResults(pred.PredictionResults):
4566    """
4567
4568    Parameters
4569    ----------
4570    prediction_results : kalman_filter.PredictionResults instance
4571        Results object from prediction after fitting or filtering a state space
4572        model.
4573    row_labels : iterable
4574        Row labels for the predicted data.
4575
4576    Attributes
4577    ----------
4578    """
4579    def __init__(self, model, prediction_results, row_labels=None):
4580        if model.model.k_endog == 1:
4581            endog = pd.Series(prediction_results.endog[0],
4582                              name=model.model.endog_names)
4583        else:
4584            endog = pd.DataFrame(prediction_results.endog.T,
4585                                 columns=model.model.endog_names)
4586        self.model = Bunch(data=model.data.__class__(
4587            endog=endog, predict_dates=row_labels))
4588        self.prediction_results = prediction_results
4589
4590        # Get required values
4591        k_endog, nobs = prediction_results.endog.shape
4592        if not prediction_results.results.memory_no_forecast_mean:
4593            predicted_mean = self.prediction_results.forecasts
4594        else:
4595            predicted_mean = np.zeros((k_endog, nobs)) * np.nan
4596
4597        if predicted_mean.shape[0] == 1:
4598            predicted_mean = predicted_mean[0, :]
4599        else:
4600            predicted_mean = predicted_mean.transpose()
4601
4602        if not prediction_results.results.memory_no_forecast_cov:
4603            var_pred_mean = self.prediction_results.forecasts_error_cov
4604        else:
4605            var_pred_mean = np.zeros((k_endog, k_endog, nobs)) * np.nan
4606
4607        if var_pred_mean.shape[0] == 1:
4608            var_pred_mean = var_pred_mean[0, 0, :]
4609        else:
4610            var_pred_mean = var_pred_mean.transpose()
4611
4612        # Initialize
4613        super(PredictionResults, self).__init__(predicted_mean, var_pred_mean,
4614                                                dist='norm',
4615                                                row_labels=row_labels)
4616
4617    @property
4618    def se_mean(self):
4619        # Replace negative values with np.nan to avoid a RuntimeWarning
4620        var_pred_mean = self.var_pred_mean.copy()
4621        var_pred_mean[var_pred_mean < 0] = np.nan
4622        if var_pred_mean.ndim == 1:
4623            se_mean = np.sqrt(var_pred_mean)
4624        else:
4625            se_mean = np.sqrt(var_pred_mean.T.diagonal())
4626        return se_mean
4627
4628    def conf_int(self, method='endpoint', alpha=0.05, **kwds):
4629        # TODO: this performs metadata wrapping, and that should be handled
4630        #       by attach_* methods. However, they do not currently support
4631        #       this use case.
4632        _use_pandas = self._use_pandas
4633        self._use_pandas = False
4634        conf_int = super(PredictionResults, self).conf_int(alpha, **kwds)
4635        self._use_pandas = _use_pandas
4636
4637        # Create a dataframe
4638        if self._row_labels is not None:
4639            conf_int = pd.DataFrame(conf_int, index=self.row_labels)
4640
4641            # Attach the endog names
4642            ynames = self.model.data.ynames
4643            if not type(ynames) == list:
4644                ynames = [ynames]
4645            names = (['lower {0}'.format(name) for name in ynames] +
4646                     ['upper {0}'.format(name) for name in ynames])
4647            conf_int.columns = names
4648
4649        return conf_int
4650
4651    def summary_frame(self, endog=0, alpha=0.05):
4652        # TODO: finish and cleanup
4653        # import pandas as pd
4654        # ci_obs = self.conf_int(alpha=alpha, obs=True) # need to split
4655        ci_mean = np.asarray(self.conf_int(alpha=alpha))
4656        _use_pandas = self._use_pandas
4657        self._use_pandas = False
4658        to_include = {}
4659        if self.predicted_mean.ndim == 1:
4660            yname = self.model.data.ynames
4661            to_include['mean'] = self.predicted_mean
4662            to_include['mean_se'] = self.se_mean
4663            k_endog = 1
4664        else:
4665            yname = self.model.data.ynames[endog]
4666            to_include['mean'] = self.predicted_mean[:, endog]
4667            to_include['mean_se'] = self.se_mean[:, endog]
4668            k_endog = self.predicted_mean.shape[1]
4669        self._use_pandas = _use_pandas
4670        to_include['mean_ci_lower'] = ci_mean[:, endog]
4671        to_include['mean_ci_upper'] = ci_mean[:, k_endog + endog]
4672
4673        # pandas dict does not handle 2d_array
4674        # data = np.column_stack(list(to_include.values()))
4675        # names = ....
4676        res = pd.DataFrame(to_include, index=self._row_labels,
4677                           columns=list(to_include.keys()))
4678        res.columns.name = yname
4679        return res
4680
4681
4682class PredictionResultsWrapper(wrap.ResultsWrapper):
4683    _attrs = {
4684        'predicted_mean': 'dates',
4685        'se_mean': 'dates',
4686        't_values': 'dates',
4687    }
4688    _wrap_attrs = wrap.union_dicts(_attrs)
4689
4690    _methods = {}
4691    _wrap_methods = wrap.union_dicts(_methods)
4692wrap.populate_wrapper(PredictionResultsWrapper, PredictionResults)  # noqa:E305
4693