1# -*- coding: utf-8 -*-
2"""
3Dynamic factor model
4
5Author: Chad Fulton
6License: Simplified-BSD
7"""
8
9import numpy as np
10from .mlemodel import MLEModel, MLEResults, MLEResultsWrapper
11from .tools import (
12    is_invertible, prepare_exog,
13    constrain_stationary_univariate, unconstrain_stationary_univariate,
14    constrain_stationary_multivariate, unconstrain_stationary_multivariate
15)
16from statsmodels.multivariate.pca import PCA
17from statsmodels.regression.linear_model import OLS
18from statsmodels.tsa.vector_ar.var_model import VAR
19from statsmodels.tsa.arima.model import ARIMA
20from statsmodels.tools.tools import Bunch
21from statsmodels.tools.data import _is_using_pandas
22from statsmodels.tsa.tsatools import lagmat
23from statsmodels.tools.decorators import cache_readonly
24import statsmodels.base.wrapper as wrap
25from statsmodels.compat.pandas import Appender
26
27
28class DynamicFactor(MLEModel):
29    r"""
30    Dynamic factor model
31
32    Parameters
33    ----------
34    endog : array_like
35        The observed time-series process :math:`y`
36    exog : array_like, optional
37        Array of exogenous regressors for the observation equation, shaped
38        nobs x k_exog.
39    k_factors : int
40        The number of unobserved factors.
41    factor_order : int
42        The order of the vector autoregression followed by the factors.
43    error_cov_type : {'scalar', 'diagonal', 'unstructured'}, optional
44        The structure of the covariance matrix of the observation error term,
45        where "unstructured" puts no restrictions on the matrix, "diagonal"
46        requires it to be any diagonal matrix (uncorrelated errors), and
47        "scalar" requires it to be a scalar times the identity matrix. Default
48        is "diagonal".
49    error_order : int, optional
50        The order of the vector autoregression followed by the observation
51        error component. Default is None, corresponding to white noise errors.
52    error_var : bool, optional
53        Whether or not to model the errors jointly via a vector autoregression,
54        rather than as individual autoregressions. Has no effect unless
55        `error_order` is set. Default is False.
56    enforce_stationarity : bool, optional
57        Whether or not to transform the AR parameters to enforce stationarity
58        in the autoregressive component of the model. Default is True.
59    **kwargs
60        Keyword arguments may be used to provide default values for state space
61        matrices or for Kalman filtering options. See `Representation`, and
62        `KalmanFilter` for more details.
63
64    Attributes
65    ----------
66    exog : array_like, optional
67        Array of exogenous regressors for the observation equation, shaped
68        nobs x k_exog.
69    k_factors : int
70        The number of unobserved factors.
71    factor_order : int
72        The order of the vector autoregression followed by the factors.
73    error_cov_type : {'diagonal', 'unstructured'}
74        The structure of the covariance matrix of the error term, where
75        "unstructured" puts no restrictions on the matrix and "diagonal"
76        requires it to be a diagonal matrix (uncorrelated errors).
77    error_order : int
78        The order of the vector autoregression followed by the observation
79        error component.
80    error_var : bool
81        Whether or not to model the errors jointly via a vector autoregression,
82        rather than as individual autoregressions. Has no effect unless
83        `error_order` is set.
84    enforce_stationarity : bool, optional
85        Whether or not to transform the AR parameters to enforce stationarity
86        in the autoregressive component of the model. Default is True.
87
88    Notes
89    -----
90    The dynamic factor model considered here is in the so-called static form,
91    and is specified:
92
93    .. math::
94
95        y_t & = \Lambda f_t + B x_t + u_t \\
96        f_t & = A_1 f_{t-1} + \dots + A_p f_{t-p} + \eta_t \\
97        u_t & = C_1 u_{t-1} + \dots + C_q u_{t-q} + \varepsilon_t
98
99    where there are `k_endog` observed series and `k_factors` unobserved
100    factors. Thus :math:`y_t` is a `k_endog` x 1 vector and :math:`f_t` is a
101    `k_factors` x 1 vector.
102
103    :math:`x_t` are optional exogenous vectors, shaped `k_exog` x 1.
104
105    :math:`\eta_t` and :math:`\varepsilon_t` are white noise error terms. In
106    order to identify the factors, :math:`Var(\eta_t) = I`. Denote
107    :math:`Var(\varepsilon_t) \equiv \Sigma`.
108
109    Options related to the unobserved factors:
110
111    - `k_factors`: this is the dimension of the vector :math:`f_t`, above.
112      To exclude factors completely, set `k_factors = 0`.
113    - `factor_order`: this is the number of lags to include in the factor
114      evolution equation, and corresponds to :math:`p`, above. To have static
115      factors, set `factor_order = 0`.
116
117    Options related to the observation error term :math:`u_t`:
118
119    - `error_order`: the number of lags to include in the error evolution
120      equation; corresponds to :math:`q`, above. To have white noise errors,
121      set `error_order = 0` (this is the default).
122    - `error_cov_type`: this controls the form of the covariance matrix
123      :math:`\Sigma`. If it is "dscalar", then :math:`\Sigma = \sigma^2 I`. If
124      it is "diagonal", then
125      :math:`\Sigma = \text{diag}(\sigma_1^2, \dots, \sigma_n^2)`. If it is
126      "unstructured", then :math:`\Sigma` is any valid variance / covariance
127      matrix (i.e. symmetric and positive definite).
128    - `error_var`: this controls whether or not the errors evolve jointly
129      according to a VAR(q), or individually according to separate AR(q)
130      processes. In terms of the formulation above, if `error_var = False`,
131      then the matrices :math:C_i` are diagonal, otherwise they are general
132      VAR matrices.
133
134    References
135    ----------
136    .. [*] Lütkepohl, Helmut. 2007.
137       New Introduction to Multiple Time Series Analysis.
138       Berlin: Springer.
139    """
140
141    def __init__(self, endog, k_factors, factor_order, exog=None,
142                 error_order=0, error_var=False, error_cov_type='diagonal',
143                 enforce_stationarity=True, **kwargs):
144
145        # Model properties
146        self.enforce_stationarity = enforce_stationarity
147
148        # Factor-related properties
149        self.k_factors = k_factors
150        self.factor_order = factor_order
151
152        # Error-related properties
153        self.error_order = error_order
154        self.error_var = error_var and error_order > 0
155        self.error_cov_type = error_cov_type
156
157        # Exogenous data
158        (self.k_exog, exog) = prepare_exog(exog)
159
160        # Note: at some point in the future might add state regression, as in
161        # SARIMAX.
162        self.mle_regression = self.k_exog > 0
163
164        # We need to have an array or pandas at this point
165        if not _is_using_pandas(endog, None):
166            endog = np.asanyarray(endog, order='C')
167
168        # Save some useful model orders, internally used
169        k_endog = endog.shape[1] if endog.ndim > 1 else 1
170        self._factor_order = max(1, self.factor_order) * self.k_factors
171        self._error_order = self.error_order * k_endog
172
173        # Calculate the number of states
174        k_states = self._factor_order
175        k_posdef = self.k_factors
176        if self.error_order > 0:
177            k_states += self._error_order
178            k_posdef += k_endog
179
180        # We can still estimate the model with no dynamic state (e.g. SUR), we
181        # just need to have one state that does nothing.
182        self._unused_state = False
183        if k_states == 0:
184            k_states = 1
185            k_posdef = 1
186            self._unused_state = True
187
188        # Test for non-multivariate endog
189        if k_endog < 2:
190            raise ValueError('The dynamic factors model is only valid for'
191                             ' multivariate time series.')
192
193        # Test for too many factors
194        if self.k_factors >= k_endog:
195            raise ValueError('Number of factors must be less than the number'
196                             ' of endogenous variables.')
197
198        # Test for invalid error_cov_type
199        if self.error_cov_type not in ['scalar', 'diagonal', 'unstructured']:
200            raise ValueError('Invalid error covariance matrix type'
201                             ' specification.')
202
203        # By default, initialize as stationary
204        kwargs.setdefault('initialization', 'stationary')
205
206        # Initialize the state space model
207        super(DynamicFactor, self).__init__(
208            endog, exog=exog, k_states=k_states, k_posdef=k_posdef, **kwargs
209        )
210
211        # Set as time-varying model if we have exog
212        if self.k_exog > 0:
213            self.ssm._time_invariant = False
214
215        # Initialize the components
216        self.parameters = {}
217        self._initialize_loadings()
218        self._initialize_exog()
219        self._initialize_error_cov()
220        self._initialize_factor_transition()
221        self._initialize_error_transition()
222        self.k_params = sum(self.parameters.values())
223
224        # Cache parameter vector slices
225        def _slice(key, offset):
226            length = self.parameters[key]
227            param_slice = np.s_[offset:offset + length]
228            offset += length
229            return param_slice, offset
230
231        offset = 0
232        self._params_loadings, offset = _slice('factor_loadings', offset)
233        self._params_exog, offset = _slice('exog', offset)
234        self._params_error_cov, offset = _slice('error_cov', offset)
235        self._params_factor_transition, offset = (
236            _slice('factor_transition', offset))
237        self._params_error_transition, offset = (
238            _slice('error_transition', offset))
239
240        # Update _init_keys attached by super
241        self._init_keys += ['k_factors', 'factor_order', 'error_order',
242                            'error_var', 'error_cov_type',
243                            'enforce_stationarity'] + list(kwargs.keys())
244
245    def _initialize_loadings(self):
246        # Initialize the parameters
247        self.parameters['factor_loadings'] = self.k_endog * self.k_factors
248
249        # Setup fixed components of state space matrices
250        if self.error_order > 0:
251            start = self._factor_order
252            end = self._factor_order + self.k_endog
253            self.ssm['design', :, start:end] = np.eye(self.k_endog)
254
255        # Setup indices of state space matrices
256        self._idx_loadings = np.s_['design', :, :self.k_factors]
257
258    def _initialize_exog(self):
259        # Initialize the parameters
260        self.parameters['exog'] = self.k_exog * self.k_endog
261
262        # If we have exog effects, then the obs intercept needs to be
263        # time-varying
264        if self.k_exog > 0:
265            self.ssm['obs_intercept'] = np.zeros((self.k_endog, self.nobs))
266
267        # Setup indices of state space matrices
268        self._idx_exog = np.s_['obs_intercept', :self.k_endog, :]
269
270    def _initialize_error_cov(self):
271        if self.error_cov_type == 'scalar':
272            self._initialize_error_cov_diagonal(scalar=True)
273        elif self.error_cov_type == 'diagonal':
274            self._initialize_error_cov_diagonal(scalar=False)
275        elif self.error_cov_type == 'unstructured':
276            self._initialize_error_cov_unstructured()
277
278    def _initialize_error_cov_diagonal(self, scalar=False):
279        # Initialize the parameters
280        self.parameters['error_cov'] = 1 if scalar else self.k_endog
281
282        # Setup fixed components of state space matrices
283
284        # Setup indices of state space matrices
285        k_endog = self.k_endog
286        k_factors = self.k_factors
287        idx = np.diag_indices(k_endog)
288        if self.error_order > 0:
289            matrix = 'state_cov'
290            idx = (idx[0] + k_factors, idx[1] + k_factors)
291        else:
292            matrix = 'obs_cov'
293        self._idx_error_cov = (matrix,) + idx
294
295    def _initialize_error_cov_unstructured(self):
296        # Initialize the parameters
297        k_endog = self.k_endog
298        self.parameters['error_cov'] = int(k_endog * (k_endog + 1) / 2)
299
300        # Setup fixed components of state space matrices
301
302        # Setup indices of state space matrices
303        self._idx_lower_error_cov = np.tril_indices(self.k_endog)
304        if self.error_order > 0:
305            start = self.k_factors
306            end = self.k_factors + self.k_endog
307            self._idx_error_cov = (
308                np.s_['state_cov', start:end, start:end])
309        else:
310            self._idx_error_cov = np.s_['obs_cov', :, :]
311
312    def _initialize_factor_transition(self):
313        order = self.factor_order * self.k_factors
314        k_factors = self.k_factors
315
316        # Initialize the parameters
317        self.parameters['factor_transition'] = (
318            self.factor_order * self.k_factors**2)
319
320        # Setup fixed components of state space matrices
321        # VAR(p) for factor transition
322        if self.k_factors > 0:
323            if self.factor_order > 0:
324                self.ssm['transition', k_factors:order, :order - k_factors] = (
325                    np.eye(order - k_factors))
326
327            self.ssm['selection', :k_factors, :k_factors] = np.eye(k_factors)
328            # Identification requires constraining the state covariance to an
329            # identity matrix
330            self.ssm['state_cov', :k_factors, :k_factors] = np.eye(k_factors)
331
332        # Setup indices of state space matrices
333        self._idx_factor_transition = np.s_['transition', :k_factors, :order]
334
335    def _initialize_error_transition(self):
336        # Initialize the appropriate situation
337        if self.error_order == 0:
338            self._initialize_error_transition_white_noise()
339        else:
340            # Generic setup fixed components of state space matrices
341            # VAR(q) for error transition
342            # (in the individual AR case, we still have the VAR(q) companion
343            # matrix structure, but force the coefficient matrices to be
344            # diagonal)
345            k_endog = self.k_endog
346            k_factors = self.k_factors
347            _factor_order = self._factor_order
348            _error_order = self._error_order
349            _slice = np.s_['selection',
350                           _factor_order:_factor_order + k_endog,
351                           k_factors:k_factors + k_endog]
352            self.ssm[_slice] = np.eye(k_endog)
353            _slice = np.s_[
354                'transition',
355                _factor_order + k_endog:_factor_order + _error_order,
356                _factor_order:_factor_order + _error_order - k_endog]
357            self.ssm[_slice] = np.eye(_error_order - k_endog)
358
359            # Now specialized setups
360            if self.error_var:
361                self._initialize_error_transition_var()
362            else:
363                self._initialize_error_transition_individual()
364
365    def _initialize_error_transition_white_noise(self):
366        # Initialize the parameters
367        self.parameters['error_transition'] = 0
368
369        # No fixed components of state space matrices
370
371        # Setup indices of state space matrices (just an empty slice)
372        self._idx_error_transition = np.s_['transition', 0:0, 0:0]
373
374    def _initialize_error_transition_var(self):
375        k_endog = self.k_endog
376        _factor_order = self._factor_order
377        _error_order = self._error_order
378
379        # Initialize the parameters
380        self.parameters['error_transition'] = _error_order * k_endog
381
382        # Fixed components already setup above
383
384        # Setup indices of state space matrices
385        # Here we want to set all of the elements of the coefficient matrices,
386        # the same as in a VAR specification
387        self._idx_error_transition = np.s_[
388            'transition',
389            _factor_order:_factor_order + k_endog,
390            _factor_order:_factor_order + _error_order]
391
392    def _initialize_error_transition_individual(self):
393        k_endog = self.k_endog
394        _error_order = self._error_order
395
396        # Initialize the parameters
397        self.parameters['error_transition'] = _error_order
398
399        # Fixed components already setup above
400
401        # Setup indices of state space matrices
402        # Here we want to set only the diagonal elements of the coefficient
403        # matrices, and we want to set them in order by equation, not by
404        # matrix (i.e. set the first element of the first matrix's diagonal,
405        # then set the first element of the second matrix's diagonal, then...)
406
407        # The basic setup is a tiled list of diagonal indices, one for each
408        # coefficient matrix
409        idx = np.tile(np.diag_indices(k_endog), self.error_order)
410        # Now we need to shift the rows down to the correct location
411        row_shift = self._factor_order
412        # And we need to shift the columns in an increasing way
413        col_inc = self._factor_order + np.repeat(
414            [i * k_endog for i in range(self.error_order)], k_endog)
415        idx[0] += row_shift
416        idx[1] += col_inc
417
418        # Make a copy (without the row shift) so that we can easily get the
419        # diagonal parameters back out of a generic coefficients matrix array
420        idx_diag = idx.copy()
421        idx_diag[0] -= row_shift
422        idx_diag[1] -= self._factor_order
423        idx_diag = idx_diag[:, np.lexsort((idx_diag[1], idx_diag[0]))]
424        self._idx_error_diag = (idx_diag[0], idx_diag[1])
425
426        # Finally, we want to fill the entries in in the correct order, which
427        # is to say we want to fill in lexicographically, first by row then by
428        # column
429        idx = idx[:, np.lexsort((idx[1], idx[0]))]
430        self._idx_error_transition = np.s_['transition', idx[0], idx[1]]
431
432    def clone(self, endog, exog=None, **kwargs):
433        return self._clone_from_init_kwds(endog, exog=exog, **kwargs)
434
435    @property
436    def _res_classes(self):
437        return {'fit': (DynamicFactorResults, DynamicFactorResultsWrapper)}
438
439    @property
440    def start_params(self):
441        params = np.zeros(self.k_params, dtype=np.float64)
442
443        endog = self.endog.copy()
444        mask = ~np.any(np.isnan(endog), axis=1)
445        endog = endog[mask]
446        if self.k_exog > 0:
447            exog = self.exog[mask]
448
449        # 1. Factor loadings (estimated via PCA)
450        if self.k_factors > 0:
451            # Use principal components + OLS as starting values
452            res_pca = PCA(endog, ncomp=self.k_factors)
453            mod_ols = OLS(endog, res_pca.factors)
454            res_ols = mod_ols.fit()
455
456            # Using OLS params for the loadings tends to gives higher starting
457            # log-likelihood.
458            params[self._params_loadings] = res_ols.params.T.ravel()
459            # params[self._params_loadings] = res_pca.loadings.ravel()
460
461            # However, using res_ols.resid tends to causes non-invertible
462            # starting VAR coefficients for error VARs
463            # endog = res_ols.resid
464            endog = endog - np.dot(res_pca.factors, res_pca.loadings.T)
465
466        # 2. Exog (OLS on residuals)
467        if self.k_exog > 0:
468            mod_ols = OLS(endog, exog=exog)
469            res_ols = mod_ols.fit()
470            # In the form: beta.x1.y1, beta.x2.y1, beta.x1.y2, ...
471            params[self._params_exog] = res_ols.params.T.ravel()
472            endog = res_ols.resid
473
474        # 3. Factors (VAR on res_pca.factors)
475        stationary = True
476        if self.k_factors > 1 and self.factor_order > 0:
477            # 3a. VAR transition (OLS on factors estimated via PCA)
478            mod_factors = VAR(res_pca.factors)
479            res_factors = mod_factors.fit(maxlags=self.factor_order, ic=None,
480                                          trend='n')
481            # Save the parameters
482            params[self._params_factor_transition] = (
483                res_factors.params.T.ravel())
484
485            # Test for stationarity
486            coefficient_matrices = (
487                params[self._params_factor_transition].reshape(
488                    self.k_factors * self.factor_order, self.k_factors
489                ).T
490            ).reshape(self.k_factors, self.k_factors, self.factor_order).T
491
492            stationary = is_invertible([1] + list(-coefficient_matrices))
493        elif self.k_factors > 0 and self.factor_order > 0:
494            # 3b. AR transition
495            Y = res_pca.factors[self.factor_order:]
496            X = lagmat(res_pca.factors, self.factor_order, trim='both')
497            params_ar = np.linalg.pinv(X).dot(Y)
498            stationary = is_invertible(np.r_[1, -params_ar.squeeze()])
499            params[self._params_factor_transition] = params_ar[:, 0]
500
501        # Check for stationarity
502        if not stationary and self.enforce_stationarity:
503            raise ValueError('Non-stationary starting autoregressive'
504                             ' parameters found with `enforce_stationarity`'
505                             ' set to True.')
506
507        # 4. Errors
508        if self.error_order == 0:
509            if self.error_cov_type == 'scalar':
510                params[self._params_error_cov] = endog.var(axis=0).mean()
511            elif self.error_cov_type == 'diagonal':
512                params[self._params_error_cov] = endog.var(axis=0)
513            elif self.error_cov_type == 'unstructured':
514                cov_factor = np.diag(endog.std(axis=0))
515                params[self._params_error_cov] = (
516                    cov_factor[self._idx_lower_error_cov].ravel())
517        elif self.error_var:
518            mod_errors = VAR(endog)
519            res_errors = mod_errors.fit(maxlags=self.error_order, ic=None,
520                                        trend='n')
521
522            # Test for stationarity
523            coefficient_matrices = (
524                np.array(res_errors.params.T).ravel().reshape(
525                    self.k_endog * self.error_order, self.k_endog
526                ).T
527            ).reshape(self.k_endog, self.k_endog, self.error_order).T
528
529            stationary = is_invertible([1] + list(-coefficient_matrices))
530            if not stationary and self.enforce_stationarity:
531                raise ValueError('Non-stationary starting error autoregressive'
532                                 ' parameters found with'
533                                 ' `enforce_stationarity` set to True.')
534
535            # Get the error autoregressive parameters
536            params[self._params_error_transition] = (
537                    np.array(res_errors.params.T).ravel())
538
539            # Get the error covariance parameters
540            if self.error_cov_type == 'scalar':
541                params[self._params_error_cov] = (
542                    res_errors.sigma_u.diagonal().mean())
543            elif self.error_cov_type == 'diagonal':
544                params[self._params_error_cov] = res_errors.sigma_u.diagonal()
545            elif self.error_cov_type == 'unstructured':
546                try:
547                    cov_factor = np.linalg.cholesky(res_errors.sigma_u)
548                except np.linalg.LinAlgError:
549                    cov_factor = np.eye(res_errors.sigma_u.shape[0]) * (
550                        res_errors.sigma_u.diagonal().mean()**0.5)
551                cov_factor = np.eye(res_errors.sigma_u.shape[0]) * (
552                    res_errors.sigma_u.diagonal().mean()**0.5)
553                params[self._params_error_cov] = (
554                    cov_factor[self._idx_lower_error_cov].ravel())
555        else:
556            error_ar_params = []
557            error_cov_params = []
558            for i in range(self.k_endog):
559                mod_error = ARIMA(endog[:, i], order=(self.error_order, 0, 0),
560                                  trend='n', enforce_stationarity=True)
561                res_error = mod_error.fit(method='burg')
562                error_ar_params += res_error.params[:self.error_order].tolist()
563                error_cov_params += res_error.params[-1:].tolist()
564
565            params[self._params_error_transition] = np.r_[error_ar_params]
566            params[self._params_error_cov] = np.r_[error_cov_params]
567
568        return params
569
570    @property
571    def param_names(self):
572        param_names = []
573        endog_names = self.endog_names
574
575        # 1. Factor loadings
576        param_names += [
577            'loading.f%d.%s' % (j+1, endog_names[i])
578            for i in range(self.k_endog)
579            for j in range(self.k_factors)
580        ]
581
582        # 2. Exog
583        # Recall these are in the form: beta.x1.y1, beta.x2.y1, beta.x1.y2, ...
584        param_names += [
585            'beta.%s.%s' % (self.exog_names[j], endog_names[i])
586            for i in range(self.k_endog)
587            for j in range(self.k_exog)
588        ]
589
590        # 3. Error covariances
591        if self.error_cov_type == 'scalar':
592            param_names += ['sigma2']
593        elif self.error_cov_type == 'diagonal':
594            param_names += [
595                'sigma2.%s' % endog_names[i]
596                for i in range(self.k_endog)
597            ]
598        elif self.error_cov_type == 'unstructured':
599            param_names += [
600                'cov.chol[%d,%d]' % (i + 1, j + 1)
601                for i in range(self.k_endog)
602                for j in range(i+1)
603            ]
604
605        # 4. Factor transition VAR
606        param_names += [
607            'L%d.f%d.f%d' % (i+1, k+1, j+1)
608            for j in range(self.k_factors)
609            for i in range(self.factor_order)
610            for k in range(self.k_factors)
611        ]
612
613        # 5. Error transition VAR
614        if self.error_var:
615            param_names += [
616                'L%d.e(%s).e(%s)' % (i+1, endog_names[k], endog_names[j])
617                for j in range(self.k_endog)
618                for i in range(self.error_order)
619                for k in range(self.k_endog)
620            ]
621        else:
622            param_names += [
623                'L%d.e(%s).e(%s)' % (i+1, endog_names[j], endog_names[j])
624                for j in range(self.k_endog)
625                for i in range(self.error_order)
626            ]
627
628        return param_names
629
630    @property
631    def state_names(self):
632        names = []
633        endog_names = self.endog_names
634
635        # Factors and lags
636        names += [
637            (('f%d' % (j + 1)) if i == 0 else ('f%d.L%d' % (j + 1, i)))
638            for i in range(max(1, self.factor_order))
639            for j in range(self.k_factors)]
640
641        if self.error_order > 0:
642            names += [
643                (('e(%s)' % endog_names[j]) if i == 0
644                 else ('e(%s).L%d' % (endog_names[j], i)))
645                for i in range(self.error_order)
646                for j in range(self.k_endog)]
647
648        if self._unused_state:
649            names += ['dummy']
650
651        return names
652
653    def transform_params(self, unconstrained):
654        """
655        Transform unconstrained parameters used by the optimizer to constrained
656        parameters used in likelihood evaluation
657
658        Parameters
659        ----------
660        unconstrained : array_like
661            Array of unconstrained parameters used by the optimizer, to be
662            transformed.
663
664        Returns
665        -------
666        constrained : array_like
667            Array of constrained parameters which may be used in likelihood
668            evaluation.
669
670        Notes
671        -----
672        Constrains the factor transition to be stationary and variances to be
673        positive.
674        """
675        unconstrained = np.array(unconstrained, ndmin=1)
676        dtype = unconstrained.dtype
677        constrained = np.zeros(unconstrained.shape, dtype=dtype)
678
679        # 1. Factor loadings
680        # The factor loadings do not need to be adjusted
681        constrained[self._params_loadings] = (
682            unconstrained[self._params_loadings])
683
684        # 2. Exog
685        # The regression coefficients do not need to be adjusted
686        constrained[self._params_exog] = (
687            unconstrained[self._params_exog])
688
689        # 3. Error covariances
690        # If we have variances, force them to be positive
691        if self.error_cov_type in ['scalar', 'diagonal']:
692            constrained[self._params_error_cov] = (
693                unconstrained[self._params_error_cov]**2)
694        # Otherwise, nothing needs to be done
695        elif self.error_cov_type == 'unstructured':
696            constrained[self._params_error_cov] = (
697                unconstrained[self._params_error_cov])
698
699        # 4. Factor transition VAR
700        # VAR transition: optionally force to be stationary
701        if self.enforce_stationarity and self.factor_order > 0:
702            # Transform the parameters
703            unconstrained_matrices = (
704                unconstrained[self._params_factor_transition].reshape(
705                    self.k_factors, self._factor_order))
706            # This is always an identity matrix, but because the transform
707            # done prior to update (where the ssm representation matrices
708            # change), it may be complex
709            cov = self.ssm['state_cov', :self.k_factors, :self.k_factors].real
710            coefficient_matrices, variance = (
711                constrain_stationary_multivariate(unconstrained_matrices, cov))
712            constrained[self._params_factor_transition] = (
713                coefficient_matrices.ravel())
714        else:
715            constrained[self._params_factor_transition] = (
716                unconstrained[self._params_factor_transition])
717
718        # 5. Error transition VAR
719        # VAR transition: optionally force to be stationary
720        if self.enforce_stationarity and self.error_order > 0:
721
722            # Joint VAR specification
723            if self.error_var:
724                unconstrained_matrices = (
725                    unconstrained[self._params_error_transition].reshape(
726                        self.k_endog, self._error_order))
727                start = self.k_factors
728                end = self.k_factors + self.k_endog
729                cov = self.ssm['state_cov', start:end, start:end].real
730                coefficient_matrices, variance = (
731                    constrain_stationary_multivariate(
732                        unconstrained_matrices, cov))
733                constrained[self._params_error_transition] = (
734                    coefficient_matrices.ravel())
735            # Separate AR specifications
736            else:
737                coefficients = (
738                    unconstrained[self._params_error_transition].copy())
739                for i in range(self.k_endog):
740                    start = i * self.error_order
741                    end = (i + 1) * self.error_order
742                    coefficients[start:end] = constrain_stationary_univariate(
743                        coefficients[start:end])
744                constrained[self._params_error_transition] = coefficients
745
746        else:
747            constrained[self._params_error_transition] = (
748                unconstrained[self._params_error_transition])
749
750        return constrained
751
752    def untransform_params(self, constrained):
753        """
754        Transform constrained parameters used in likelihood evaluation
755        to unconstrained parameters used by the optimizer.
756
757        Parameters
758        ----------
759        constrained : array_like
760            Array of constrained parameters used in likelihood evaluation, to
761            be transformed.
762
763        Returns
764        -------
765        unconstrained : array_like
766            Array of unconstrained parameters used by the optimizer.
767        """
768        constrained = np.array(constrained, ndmin=1)
769        dtype = constrained.dtype
770        unconstrained = np.zeros(constrained.shape, dtype=dtype)
771
772        # 1. Factor loadings
773        # The factor loadings do not need to be adjusted
774        unconstrained[self._params_loadings] = (
775            constrained[self._params_loadings])
776
777        # 2. Exog
778        # The regression coefficients do not need to be adjusted
779        unconstrained[self._params_exog] = (
780            constrained[self._params_exog])
781
782        # 3. Error covariances
783        # If we have variances, force them to be positive
784        if self.error_cov_type in ['scalar', 'diagonal']:
785            unconstrained[self._params_error_cov] = (
786                constrained[self._params_error_cov]**0.5)
787        # Otherwise, nothing needs to be done
788        elif self.error_cov_type == 'unstructured':
789            unconstrained[self._params_error_cov] = (
790                constrained[self._params_error_cov])
791
792        # 3. Factor transition VAR
793        # VAR transition: optionally force to be stationary
794        if self.enforce_stationarity and self.factor_order > 0:
795            # Transform the parameters
796            constrained_matrices = (
797                constrained[self._params_factor_transition].reshape(
798                    self.k_factors, self._factor_order))
799            cov = self.ssm['state_cov', :self.k_factors, :self.k_factors].real
800            coefficient_matrices, variance = (
801                unconstrain_stationary_multivariate(
802                    constrained_matrices, cov))
803            unconstrained[self._params_factor_transition] = (
804                coefficient_matrices.ravel())
805        else:
806            unconstrained[self._params_factor_transition] = (
807                constrained[self._params_factor_transition])
808
809        # 5. Error transition VAR
810        # VAR transition: optionally force to be stationary
811        if self.enforce_stationarity and self.error_order > 0:
812
813            # Joint VAR specification
814            if self.error_var:
815                constrained_matrices = (
816                    constrained[self._params_error_transition].reshape(
817                        self.k_endog, self._error_order))
818                start = self.k_factors
819                end = self.k_factors + self.k_endog
820                cov = self.ssm['state_cov', start:end, start:end].real
821                coefficient_matrices, variance = (
822                    unconstrain_stationary_multivariate(
823                        constrained_matrices, cov))
824                unconstrained[self._params_error_transition] = (
825                    coefficient_matrices.ravel())
826            # Separate AR specifications
827            else:
828                coefficients = (
829                    constrained[self._params_error_transition].copy())
830                for i in range(self.k_endog):
831                    start = i * self.error_order
832                    end = (i + 1) * self.error_order
833                    coefficients[start:end] = (
834                        unconstrain_stationary_univariate(
835                            coefficients[start:end]))
836                unconstrained[self._params_error_transition] = coefficients
837
838        else:
839            unconstrained[self._params_error_transition] = (
840                constrained[self._params_error_transition])
841
842        return unconstrained
843
844    def _validate_can_fix_params(self, param_names):
845        super(DynamicFactor, self)._validate_can_fix_params(param_names)
846
847        ix = np.cumsum(list(self.parameters.values()))[:-1]
848        (_, _, _, factor_transition_names, error_transition_names) = [
849            arr.tolist() for arr in np.array_split(self.param_names, ix)]
850
851        if self.enforce_stationarity and self.factor_order > 0:
852            if self.k_factors > 1 or self.factor_order > 1:
853                fix_all = param_names.issuperset(factor_transition_names)
854                fix_any = (
855                    len(param_names.intersection(factor_transition_names)) > 0)
856                if fix_any and not fix_all:
857                    raise ValueError(
858                        'Cannot fix individual factor transition parameters'
859                        ' when `enforce_stationarity=True`. In this case,'
860                        ' must either fix all factor transition parameters or'
861                        ' none.')
862        if self.enforce_stationarity and self.error_order > 0:
863            if self.error_var or self.error_order > 1:
864                fix_all = param_names.issuperset(error_transition_names)
865                fix_any = (
866                    len(param_names.intersection(error_transition_names)) > 0)
867                if fix_any and not fix_all:
868                    raise ValueError(
869                        'Cannot fix individual error transition parameters'
870                        ' when `enforce_stationarity=True`. In this case,'
871                        ' must either fix all error transition parameters or'
872                        ' none.')
873
874    def update(self, params, transformed=True, includes_fixed=False,
875               complex_step=False):
876        """
877        Update the parameters of the model
878
879        Updates the representation matrices to fill in the new parameter
880        values.
881
882        Parameters
883        ----------
884        params : array_like
885            Array of new parameters.
886        transformed : bool, optional
887            Whether or not `params` is already transformed. If set to False,
888            `transform_params` is called. Default is True..
889
890        Returns
891        -------
892        params : array_like
893            Array of parameters.
894
895        Notes
896        -----
897        Let `n = k_endog`, `m = k_factors`, and `p = factor_order`. Then the
898        `params` vector has length
899        :math:`[n \times m] + [n] + [m^2 \times p]`.
900        It is expanded in the following way:
901
902        - The first :math:`n \times m` parameters fill out the factor loading
903          matrix, starting from the [0,0] entry and then proceeding along rows.
904          These parameters are not modified in `transform_params`.
905        - The next :math:`n` parameters provide variances for the error_cov
906          errors in the observation equation. They fill in the diagonal of the
907          observation covariance matrix, and are constrained to be positive by
908          `transofrm_params`.
909        - The next :math:`m^2 \times p` parameters are used to create the `p`
910          coefficient matrices for the vector autoregression describing the
911          factor transition. They are transformed in `transform_params` to
912          enforce stationarity of the VAR(p). They are placed so as to make
913          the transition matrix a companion matrix for the VAR. In particular,
914          we assume that the first :math:`m^2` parameters fill the first
915          coefficient matrix (starting at [0,0] and filling along rows), the
916          second :math:`m^2` parameters fill the second matrix, etc.
917        """
918        params = self.handle_params(params, transformed=transformed,
919                                    includes_fixed=includes_fixed)
920
921        # 1. Factor loadings
922        # Update the design / factor loading matrix
923        self.ssm[self._idx_loadings] = (
924            params[self._params_loadings].reshape(self.k_endog, self.k_factors)
925        )
926
927        # 2. Exog
928        if self.k_exog > 0:
929            exog_params = params[self._params_exog].reshape(
930                self.k_endog, self.k_exog).T
931            self.ssm[self._idx_exog] = np.dot(self.exog, exog_params).T
932
933        # 3. Error covariances
934        if self.error_cov_type in ['scalar', 'diagonal']:
935            self.ssm[self._idx_error_cov] = (
936                params[self._params_error_cov])
937        elif self.error_cov_type == 'unstructured':
938            error_cov_lower = np.zeros((self.k_endog, self.k_endog),
939                                       dtype=params.dtype)
940            error_cov_lower[self._idx_lower_error_cov] = (
941                params[self._params_error_cov])
942            self.ssm[self._idx_error_cov] = (
943                np.dot(error_cov_lower, error_cov_lower.T))
944
945        # 4. Factor transition VAR
946        self.ssm[self._idx_factor_transition] = (
947            params[self._params_factor_transition].reshape(
948                self.k_factors, self.factor_order * self.k_factors))
949
950        # 5. Error transition VAR
951        if self.error_var:
952            self.ssm[self._idx_error_transition] = (
953                params[self._params_error_transition].reshape(
954                    self.k_endog, self._error_order))
955        else:
956            self.ssm[self._idx_error_transition] = (
957                params[self._params_error_transition])
958
959
960class DynamicFactorResults(MLEResults):
961    """
962    Class to hold results from fitting an DynamicFactor model.
963
964    Parameters
965    ----------
966    model : DynamicFactor instance
967        The fitted model instance
968
969    Attributes
970    ----------
971    specification : dictionary
972        Dictionary including all attributes from the DynamicFactor model
973        instance.
974    coefficient_matrices_var : ndarray
975        Array containing autoregressive lag polynomial coefficient matrices,
976        ordered from lowest degree to highest.
977
978    See Also
979    --------
980    statsmodels.tsa.statespace.kalman_filter.FilterResults
981    statsmodels.tsa.statespace.mlemodel.MLEResults
982    """
983    def __init__(self, model, params, filter_results, cov_type=None,
984                 **kwargs):
985        super(DynamicFactorResults, self).__init__(model, params,
986                                                   filter_results, cov_type,
987                                                   **kwargs)
988
989        self.df_resid = np.inf  # attribute required for wald tests
990
991        self.specification = Bunch(**{
992            # Model properties
993            'k_endog': self.model.k_endog,
994            'enforce_stationarity': self.model.enforce_stationarity,
995
996            # Factor-related properties
997            'k_factors': self.model.k_factors,
998            'factor_order': self.model.factor_order,
999
1000            # Error-related properties
1001            'error_order': self.model.error_order,
1002            'error_var': self.model.error_var,
1003            'error_cov_type': self.model.error_cov_type,
1004
1005            # Other properties
1006            'k_exog': self.model.k_exog
1007        })
1008
1009        # Polynomials / coefficient matrices
1010        self.coefficient_matrices_var = None
1011        if self.model.factor_order > 0:
1012            ar_params = (
1013                np.array(self.params[self.model._params_factor_transition]))
1014            k_factors = self.model.k_factors
1015            factor_order = self.model.factor_order
1016            self.coefficient_matrices_var = (
1017                ar_params.reshape(k_factors * factor_order, k_factors).T
1018            ).reshape(k_factors, k_factors, factor_order).T
1019
1020        self.coefficient_matrices_error = None
1021        if self.model.error_order > 0:
1022            ar_params = (
1023                np.array(self.params[self.model._params_error_transition]))
1024            k_endog = self.model.k_endog
1025            error_order = self.model.error_order
1026            if self.model.error_var:
1027                self.coefficient_matrices_error = (
1028                    ar_params.reshape(k_endog * error_order, k_endog).T
1029                ).reshape(k_endog, k_endog, error_order).T
1030            else:
1031                mat = np.zeros((k_endog, k_endog * error_order))
1032                mat[self.model._idx_error_diag] = ar_params
1033                self.coefficient_matrices_error = (
1034                    mat.T.reshape(error_order, k_endog, k_endog))
1035
1036    @property
1037    def factors(self):
1038        """
1039        Estimates of unobserved factors
1040
1041        Returns
1042        -------
1043        out : Bunch
1044            Has the following attributes shown in Notes.
1045
1046        Notes
1047        -----
1048        The output is a bunch of the following format:
1049
1050        - `filtered`: a time series array with the filtered estimate of
1051          the component
1052        - `filtered_cov`: a time series array with the filtered estimate of
1053          the variance/covariance of the component
1054        - `smoothed`: a time series array with the smoothed estimate of
1055          the component
1056        - `smoothed_cov`: a time series array with the smoothed estimate of
1057          the variance/covariance of the component
1058        - `offset`: an integer giving the offset in the state vector where
1059          this component begins
1060        """
1061        # If present, level is always the first component of the state vector
1062        out = None
1063        spec = self.specification
1064        if spec.k_factors > 0:
1065            offset = 0
1066            end = spec.k_factors
1067            res = self.filter_results
1068            out = Bunch(
1069                filtered=res.filtered_state[offset:end],
1070                filtered_cov=res.filtered_state_cov[offset:end, offset:end],
1071                smoothed=None, smoothed_cov=None,
1072                offset=offset)
1073            if self.smoothed_state is not None:
1074                out.smoothed = self.smoothed_state[offset:end]
1075            if self.smoothed_state_cov is not None:
1076                out.smoothed_cov = (
1077                    self.smoothed_state_cov[offset:end, offset:end])
1078        return out
1079
1080    @cache_readonly
1081    def coefficients_of_determination(self):
1082        """
1083        Coefficients of determination (:math:`R^2`) from regressions of
1084        individual estimated factors on endogenous variables.
1085
1086        Returns
1087        -------
1088        coefficients_of_determination : ndarray
1089            A `k_endog` x `k_factors` array, where
1090            `coefficients_of_determination[i, j]` represents the :math:`R^2`
1091            value from a regression of factor `j` and a constant on endogenous
1092            variable `i`.
1093
1094        Notes
1095        -----
1096        Although it can be difficult to interpret the estimated factor loadings
1097        and factors, it is often helpful to use the coefficients of
1098        determination from univariate regressions to assess the importance of
1099        each factor in explaining the variation in each endogenous variable.
1100
1101        In models with many variables and factors, this can sometimes lend
1102        interpretation to the factors (for example sometimes one factor will
1103        load primarily on real variables and another on nominal variables).
1104
1105        See Also
1106        --------
1107        plot_coefficients_of_determination
1108        """
1109        from statsmodels.tools import add_constant
1110        spec = self.specification
1111        coefficients = np.zeros((spec.k_endog, spec.k_factors))
1112        which = 'filtered' if self.smoothed_state is None else 'smoothed'
1113
1114        for i in range(spec.k_factors):
1115            exog = add_constant(self.factors[which][i])
1116            for j in range(spec.k_endog):
1117                endog = self.filter_results.endog[j]
1118                coefficients[j, i] = OLS(endog, exog).fit().rsquared
1119
1120        return coefficients
1121
1122    def plot_coefficients_of_determination(self, endog_labels=None,
1123                                           fig=None, figsize=None):
1124        """
1125        Plot the coefficients of determination
1126
1127        Parameters
1128        ----------
1129        endog_labels : bool, optional
1130            Whether or not to label the endogenous variables along the x-axis
1131            of the plots. Default is to include labels if there are 5 or fewer
1132            endogenous variables.
1133        fig : Figure, optional
1134            If given, subplots are created in this figure instead of in a new
1135            figure. Note that the grid will be created in the provided
1136            figure using `fig.add_subplot()`.
1137        figsize : tuple, optional
1138            If a figure is created, this argument allows specifying a size.
1139            The tuple is (width, height).
1140
1141        Notes
1142        -----
1143
1144        Produces a `k_factors` x 1 plot grid. The `i`th plot shows a bar plot
1145        of the coefficients of determination associated with factor `i`. The
1146        endogenous variables are arranged along the x-axis according to their
1147        position in the `endog` array.
1148
1149        See Also
1150        --------
1151        coefficients_of_determination
1152        """
1153        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
1154        _import_mpl()
1155        fig = create_mpl_fig(fig, figsize)
1156
1157        spec = self.specification
1158
1159        # Should we label endogenous variables?
1160        if endog_labels is None:
1161            endog_labels = spec.k_endog <= 5
1162
1163        # Plot the coefficients of determination
1164        coefficients_of_determination = self.coefficients_of_determination
1165        plot_idx = 1
1166        locations = np.arange(spec.k_endog)
1167        for coeffs in coefficients_of_determination.T:
1168            # Create the new axis
1169            ax = fig.add_subplot(spec.k_factors, 1, plot_idx)
1170            ax.set_ylim((0, 1))
1171            ax.set(title='Factor %i' % plot_idx, ylabel=r'$R^2$')
1172            bars = ax.bar(locations, coeffs)
1173
1174            if endog_labels:
1175                width = bars[0].get_width()
1176                ax.xaxis.set_ticks(locations + width / 2)
1177                ax.xaxis.set_ticklabels(self.model.endog_names)
1178            else:
1179                ax.set(xlabel='Endogenous variables')
1180                ax.xaxis.set_ticks([])
1181
1182            plot_idx += 1
1183
1184        return fig
1185
1186    @Appender(MLEResults.summary.__doc__)
1187    def summary(self, alpha=.05, start=None, separate_params=True):
1188        from statsmodels.iolib.summary import summary_params
1189        spec = self.specification
1190
1191        # Create the model name
1192        model_name = []
1193        if spec.k_factors > 0:
1194            if spec.factor_order > 0:
1195                model_type = ('DynamicFactor(factors=%d, order=%d)' %
1196                              (spec.k_factors, spec.factor_order))
1197            else:
1198                model_type = 'StaticFactor(factors=%d)' % spec.k_factors
1199
1200            model_name.append(model_type)
1201            if spec.k_exog > 0:
1202                model_name.append('%d regressors' % spec.k_exog)
1203        else:
1204            model_name.append('SUR(%d regressors)' % spec.k_exog)
1205
1206        if spec.error_order > 0:
1207            error_type = 'VAR' if spec.error_var else 'AR'
1208            model_name.append('%s(%d) errors' % (error_type, spec.error_order))
1209
1210        summary = super(DynamicFactorResults, self).summary(
1211            alpha=alpha, start=start, model_name=model_name,
1212            display_params=not separate_params
1213        )
1214
1215        if separate_params:
1216            indices = np.arange(len(self.params))
1217
1218            def make_table(self, mask, title, strip_end=True):
1219                res = (self, self.params[mask], self.bse[mask],
1220                       self.zvalues[mask], self.pvalues[mask],
1221                       self.conf_int(alpha)[mask])
1222
1223                param_names = [
1224                    '.'.join(name.split('.')[:-1]) if strip_end else name
1225                    for name in
1226                    np.array(self.data.param_names)[mask].tolist()
1227                ]
1228
1229                return summary_params(res, yname=None, xname=param_names,
1230                                      alpha=alpha, use_t=False, title=title)
1231
1232            k_endog = self.model.k_endog
1233            k_exog = self.model.k_exog
1234            k_factors = self.model.k_factors
1235            factor_order = self.model.factor_order
1236            _factor_order = self.model._factor_order
1237            _error_order = self.model._error_order
1238
1239            # Add parameter tables for each endogenous variable
1240            loading_indices = indices[self.model._params_loadings]
1241            loading_masks = []
1242            exog_indices = indices[self.model._params_exog]
1243            exog_masks = []
1244            for i in range(k_endog):
1245                # 1. Factor loadings
1246                # Recall these are in the form:
1247                # 'loading.f1.y1', 'loading.f2.y1', 'loading.f1.y2', ...
1248
1249                loading_mask = (
1250                    loading_indices[i * k_factors:(i + 1) * k_factors])
1251                loading_masks.append(loading_mask)
1252
1253                # 2. Exog
1254                # Recall these are in the form:
1255                # beta.x1.y1, beta.x2.y1, beta.x1.y2, ...
1256                exog_mask = exog_indices[i * k_exog:(i + 1) * k_exog]
1257                exog_masks.append(exog_mask)
1258
1259                # Create the table
1260                mask = np.concatenate([loading_mask, exog_mask])
1261                title = "Results for equation %s" % self.model.endog_names[i]
1262                table = make_table(self, mask, title)
1263                summary.tables.append(table)
1264
1265            # Add parameter tables for each factor
1266            factor_indices = indices[self.model._params_factor_transition]
1267            factor_masks = []
1268            if factor_order > 0:
1269                for i in range(k_factors):
1270                    start = i * _factor_order
1271                    factor_mask = factor_indices[start: start + _factor_order]
1272                    factor_masks.append(factor_mask)
1273
1274                    # Create the table
1275                    title = "Results for factor equation f%d" % (i+1)
1276                    table = make_table(self, factor_mask, title)
1277                    summary.tables.append(table)
1278
1279            # Add parameter tables for error transitions
1280            error_masks = []
1281            if spec.error_order > 0:
1282                error_indices = indices[self.model._params_error_transition]
1283                for i in range(k_endog):
1284                    if spec.error_var:
1285                        start = i * _error_order
1286                        end = (i + 1) * _error_order
1287                    else:
1288                        start = i * spec.error_order
1289                        end = (i + 1) * spec.error_order
1290
1291                    error_mask = error_indices[start:end]
1292                    error_masks.append(error_mask)
1293
1294                    # Create the table
1295                    title = ("Results for error equation e(%s)" %
1296                             self.model.endog_names[i])
1297                    table = make_table(self, error_mask, title)
1298                    summary.tables.append(table)
1299
1300            # Error covariance terms
1301            error_cov_mask = indices[self.model._params_error_cov]
1302            table = make_table(self, error_cov_mask,
1303                               "Error covariance matrix", strip_end=False)
1304            summary.tables.append(table)
1305
1306            # Add a table for all other parameters
1307            masks = []
1308            for m in (loading_masks, exog_masks, factor_masks,
1309                      error_masks, [error_cov_mask]):
1310                m = np.array(m).flatten()
1311                if len(m) > 0:
1312                    masks.append(m)
1313            masks = np.concatenate(masks)
1314            inverse_mask = np.array(list(set(indices).difference(set(masks))))
1315            if len(inverse_mask) > 0:
1316                table = make_table(self, inverse_mask, "Other parameters",
1317                                   strip_end=False)
1318                summary.tables.append(table)
1319
1320        return summary
1321
1322
1323class DynamicFactorResultsWrapper(MLEResultsWrapper):
1324    _attrs = {}
1325    _wrap_attrs = wrap.union_dicts(MLEResultsWrapper._wrap_attrs,
1326                                   _attrs)
1327    _methods = {}
1328    _wrap_methods = wrap.union_dicts(MLEResultsWrapper._wrap_methods,
1329                                     _methods)
1330wrap.populate_wrapper(DynamicFactorResultsWrapper,  # noqa:E305
1331                      DynamicFactorResults)
1332