1# -*- coding: utf-8 -*-
2"""
3Dynamic factor model.
4
5Author: Chad Fulton
6License: BSD-3
7"""
8from collections import OrderedDict
9from warnings import warn
10
11import numpy as np
12import pandas as pd
13from scipy.linalg import cho_factor, cho_solve, LinAlgError
14
15from statsmodels.tools.data import _is_using_pandas
16from statsmodels.tools.validation import int_like
17from statsmodels.tools.decorators import cache_readonly
18from statsmodels.regression.linear_model import OLS
19from statsmodels.genmod.generalized_linear_model import GLM
20from statsmodels.multivariate.pca import PCA
21
22from statsmodels.tsa.statespace.sarimax import SARIMAX
23from statsmodels.tsa.statespace._quarterly_ar1 import QuarterlyAR1
24from statsmodels.tsa.vector_ar.var_model import VAR
25from statsmodels.tools.tools import Bunch
26from statsmodels.tools.validation import string_like
27from statsmodels.tsa.tsatools import lagmat
28from statsmodels.tsa.statespace import mlemodel, initialization
29from statsmodels.tsa.statespace.tools import (
30    companion_matrix, is_invertible, constrain_stationary_univariate,
31    constrain_stationary_multivariate, unconstrain_stationary_univariate,
32    unconstrain_stationary_multivariate)
33from statsmodels.tsa.statespace.kalman_smoother import (
34    SMOOTHER_STATE, SMOOTHER_STATE_COV, SMOOTHER_STATE_AUTOCOV)
35from statsmodels.base.data import PandasData
36
37from statsmodels.iolib.table import SimpleTable
38from statsmodels.iolib.summary import Summary
39from statsmodels.iolib.tableformatting import fmt_params
40
41
42class FactorBlock(dict):
43    """
44    Helper class for describing and indexing a block of factors.
45
46    Parameters
47    ----------
48    factor_names : tuple of str
49        Tuple of factor names in the block (in the order that they will appear
50        in the state vector).
51    factor_order : int
52        Order of the vector autoregression governing the factor block dynamics.
53    endog_factor_map : pd.DataFrame
54        Mapping from endog variable names to factor names.
55    state_offset : int
56        Offset of this factor block in the state vector.
57    has_endog_Q : bool
58        Flag if the model contains quarterly data.
59
60    Notes
61    -----
62    The goal of this class is, in particular, to make it easier to retrieve
63    indexes of subsets of the state vector that are associated with a
64    particular block of factors.
65
66    - `factors_ix` is a matrix of indices, with rows corresponding to factors
67      in the block and columns corresponding to lags
68    - `factors` is vec(factors_ix) (i.e. it stacks columns, so that it is
69      `factors_ix.ravel(order='F')`). Thinking about a VAR system, the first
70       k*p elements correspond to the equation for the first variable. The next
71       k*p elements correspond to the equation for the second variable, and so
72       on. It contains all of the lags in the state vector, which is max(5, p)
73    - `factors_ar` is the subset of `factors` that have nonzero coefficients,
74      so it contains lags up to p.
75    - `factors_L1` only contains the first lag of the factors
76    - `factors_L1_5` contains the first - fifth lags of the factors
77
78    """
79
80    def __init__(self, factor_names, factor_order, endog_factor_map,
81                 state_offset, k_endog_Q):
82        self.factor_names = factor_names
83        self.k_factors = len(self.factor_names)
84        self.factor_order = factor_order
85        self.endog_factor_map = endog_factor_map.loc[:, factor_names]
86        self.state_offset = state_offset
87        self.k_endog_Q = k_endog_Q
88        if self.k_endog_Q > 0:
89            self._factor_order = max(5, self.factor_order)
90        else:
91            self._factor_order = self.factor_order
92        self.k_states = self.k_factors * self._factor_order
93
94        # Save items
95        self['factors'] = self.factors
96        self['factors_ar'] = self.factors_ar
97        self['factors_ix'] = self.factors_ix
98        self['factors_L1'] = self.factors_L1
99        self['factors_L1_5'] = self.factors_L1_5
100
101    @property
102    def factors_ix(self):
103        """Factor state index array, shaped (k_factors, lags)."""
104        # i.e. the position in the state vector of the second lag of the third
105        # factor is factors_ix[2, 1]
106        # ravel(order='F') gives e.g (f0.L1, f1.L1, f0.L2, f1.L2, f0.L3, ...)
107        # while
108        # ravel(order='C') gives e.g (f0.L1, f0.L2, f0.L3, f1.L1, f1.L2, ...)
109        o = self.state_offset
110        return np.reshape(o + np.arange(self.k_factors * self._factor_order),
111                          (self._factor_order, self.k_factors)).T
112
113    @property
114    def factors(self):
115        """Factors and all lags in the state vector (max(5, p))."""
116        # Note that this is equivalent to factors_ix with ravel(order='F')
117        o = self.state_offset
118        return np.s_[o:o + self.k_factors * self._factor_order]
119
120    @property
121    def factors_ar(self):
122        """Factors and all lags used in the factor autoregression (p)."""
123        o = self.state_offset
124        return np.s_[o:o + self.k_factors * self.factor_order]
125
126    @property
127    def factors_L1(self):
128        """Factors (first block / lag only)."""
129        o = self.state_offset
130        return np.s_[o:o + self.k_factors]
131
132    @property
133    def factors_L1_5(self):
134        """Factors plus four lags."""
135        o = self.state_offset
136        return np.s_[o:o + self.k_factors * 5]
137
138
139class DynamicFactorMQStates(dict):
140    """
141    Helper class for describing and indexing the state vector.
142
143    Parameters
144    ----------
145    k_endog_M : int
146        Number of monthly (or non-time-specific, if k_endog_Q=0) variables.
147    k_endog_Q : int
148        Number of quarterly variables.
149    endog_names : list
150        Names of the endogenous variables.
151    factors : int, list, or dict
152        Integer giving the number of (global) factors, a list with the names of
153        (global) factors, or a dictionary with:
154
155        - keys : names of endogenous variables
156        - values : lists of factor names.
157
158        If this is an integer, then the factor names will be 0, 1, ....
159    factor_orders : int or dict
160        Integer describing the order of the vector autoregression (VAR)
161        governing all factor block dynamics or dictionary with:
162
163        - keys : factor name or tuples of factor names in a block
164        - values : integer describing the VAR order for that factor block
165
166        If a dictionary, this defines the order of the factor blocks in the
167        state vector. Otherwise, factors are ordered so that factors that load
168        on more variables come first (and then alphabetically, to break ties).
169    factor_multiplicities : int or dict
170        This argument provides a convenient way to specify multiple factors
171        that load identically on variables. For example, one may want two
172        "global" factors (factors that load on all variables) that evolve
173        jointly according to a VAR. One could specify two global factors in the
174        `factors` argument and specify that they are in the same block in the
175        `factor_orders` argument, but it is easier to specify a single global
176        factor in the `factors` argument, and set the order in the
177        `factor_orders` argument, and then set the factor multiplicity to 2.
178
179        This argument must be an integer describing the factor multiplicity for
180        all factors or dictionary with:
181
182        - keys : factor name
183        - values : integer describing the factor multiplicity for the factors
184          in the given block
185    idiosyncratic_ar1 : bool
186        Whether or not to model the idiosyncratic component for each series as
187        an AR(1) process. If False, the idiosyncratic component is instead
188        modeled as white noise.
189
190    Attributes
191    ----------
192    k_endog : int
193        Total number of endogenous variables.
194    k_states : int
195        Total number of state variables (those associated with the factors and
196        those associated with the idiosyncratic disturbances).
197    k_posdef : int
198        Total number of state disturbance terms (those associated with the
199        factors and those associated with the idiosyncratic disturbances).
200    k_endog_M : int
201        Number of monthly (or non-time-specific, if k_endog_Q=0) variables.
202    k_endog_Q : int
203        Number of quarterly variables.
204    k_factors : int
205        Total number of factors. Note that factor multiplicities will have
206        already been expanded.
207    k_states_factors : int
208        The number of state variables associated with factors (includes both
209        factors and lags of factors included in the state vector).
210    k_posdef_factors : int
211        The number of state disturbance terms associated with factors.
212    k_states_idio : int
213        Total number of state variables associated with idiosyncratic
214        disturbances.
215    k_posdef_idio : int
216        Total number of state disturbance terms associated with idiosyncratic
217        disturbances.
218    k_states_idio_M : int
219        The number of state variables associated with idiosyncratic
220        disturbances for monthly (or non-time-specific if there are no
221        quarterly variables) variables. If the disturbances are AR(1), then
222        this will be equal to `k_endog_M`, otherwise it will be equal to zero.
223    k_states_idio_Q : int
224        The number of state variables associated with idiosyncratic
225        disturbances for quarterly variables. This will always be equal to
226        `k_endog_Q * 5`, even if the disturbances are not AR(1).
227    k_posdef_idio_M : int
228        The number of state disturbance terms associated with idiosyncratic
229        disturbances for monthly (or non-time-specific if there are no
230        quarterly variables) variables. If the disturbances are AR(1), then
231        this will be equal to `k_endog_M`, otherwise it will be equal to zero.
232    k_posdef_idio_Q : int
233        The number of state disturbance terms associated with idiosyncratic
234        disturbances for quarterly variables. This will always be equal to
235        `k_endog_Q`, even if the disturbances are not AR(1).
236    idiosyncratic_ar1 : bool
237        Whether or not to model the idiosyncratic component for each series as
238        an AR(1) process.
239    factor_blocks : list of FactorBlock
240        List of `FactorBlock` helper instances for each factor block.
241    factor_names : list of str
242        List of factor names.
243    factors : dict
244        Dictionary with:
245
246        - keys : names of endogenous variables
247        - values : lists of factor names.
248
249        Note that factor multiplicities will have already been expanded.
250    factor_orders : dict
251        Dictionary with:
252
253        - keys : tuple of factor names
254        - values : integer describing autoregression order
255
256        Note that factor multiplicities will have already been expanded.
257    max_factor_order : int
258        Maximum autoregression order across all factor blocks.
259    factor_block_orders : pd.Series
260        Series containing lag orders, with the factor block (a tuple of factor
261        names) as the index.
262    factor_multiplicities : dict
263        Dictionary with:
264
265        - keys : factor name
266        - values : integer describing the factor multiplicity for the factors
267          in the given block
268    endog_factor_map : dict
269        Dictionary with:
270
271        - keys : endog name
272        - values : list of factor names
273    loading_counts : pd.Series
274        Series containing number of endogenous variables loading on each
275        factor, with the factor name as the index.
276    block_loading_counts : dict
277        Dictionary with:
278
279        - keys : tuple of factor names
280        - values : average number of endogenous variables loading on the block
281          (note that average is over the factors in the block)
282
283    Notes
284    -----
285    The goal of this class is, in particular, to make it easier to retrieve
286    indexes of subsets of the state vector.
287
288    Note that the ordering of the factor blocks in the state vector is
289    determined by the `factor_orders` argument if a dictionary. Otherwise,
290    factors are ordered so that factors that load on more variables come first
291    (and then alphabetically, to break ties).
292
293    - `factors_L1` is an array with the indexes of first lag of the factors
294      from each block. Ordered first by block, and then by lag.
295    - `factors_L1_5` is an array with the indexes contains the first - fifth
296      lags of the factors from each block. Ordered first by block, and then by
297      lag.
298    - `factors_L1_5_ix` is an array shaped (5, k_factors) with the indexes
299      of the first - fifth lags of the factors from each block.
300    - `idio_ar_L1` is an array with the indexes of the first lag of the
301      idiosyncratic AR states, both monthly (if appliable) and quarterly.
302    - `idio_ar_M` is a slice with the indexes of the idiosyncratic disturbance
303      states for the monthly (or non-time-specific if there are no quarterly
304      variables) variables. It is an empty slice if
305      `idiosyncratic_ar1 = False`.
306    - `idio_ar_Q` is a slice with the indexes of the idiosyncratic disturbance
307      states and all lags, for the quarterly variables. It is an empty slice if
308      there are no quarterly variable.
309    - `idio_ar_Q_ix` is an array shaped (k_endog_Q, 5) with the indexes of the
310      first - fifth lags of the idiosyncratic disturbance states for the
311      quarterly variables.
312    - `endog_factor_iloc` is a list of lists, with entries for each endogenous
313      variable. The entry for variable `i`, `endog_factor_iloc[i]` is a list of
314      indexes of the factors that variable `i` loads on. This does not include
315      any lags, but it can be used with e.g. `factors_L1_5_ix` to get lags.
316
317    """
318
319    def __init__(self, k_endog_M, k_endog_Q, endog_names, factors,
320                 factor_orders, factor_multiplicities, idiosyncratic_ar1):
321        # Save model parameterization
322        self.k_endog_M = k_endog_M
323        self.k_endog_Q = k_endog_Q
324        self.k_endog = self.k_endog_M + self.k_endog_Q
325        self.idiosyncratic_ar1 = idiosyncratic_ar1
326
327        # Validate factor-related inputs
328        factors_is_int = np.issubdtype(type(factors), np.integer)
329        factors_is_list = isinstance(factors, (list, tuple))
330        orders_is_int = np.issubdtype(type(factor_orders), np.integer)
331        if factor_multiplicities is None:
332            factor_multiplicities = 1
333        mult_is_int = np.issubdtype(type(factor_multiplicities), np.integer)
334
335        if not (factors_is_int or factors_is_list or
336                isinstance(factors, dict)):
337            raise ValueError('`factors` argument must an integer number of'
338                             ' factors, a list of global factor names, or a'
339                             ' dictionary, mapping observed variables to'
340                             ' factors.')
341        if not (orders_is_int or isinstance(factor_orders, dict)):
342            raise ValueError('`factor_orders` argument must either be an'
343                             ' integer or a dictionary.')
344        if not (mult_is_int or isinstance(factor_multiplicities, dict)):
345            raise ValueError('`factor_multiplicities` argument must either be'
346                             ' an integer or a dictionary.')
347
348        # Expand integers
349        # If `factors` is an integer, we assume that it denotes the number of
350        # global factors (factors that load on each variable)
351        if factors_is_int or factors_is_list:
352            # Validate this here for a more informative error message
353            if ((factors_is_int and factors == 0) or
354                    (factors_is_list and len(factors) == 0)):
355                raise ValueError('The model must contain at least one factor.')
356
357            if factors_is_list:
358                factor_names = list(factors)
359            else:
360                factor_names = [f'{i}' for i in range(factors)]
361            factors = {name: factor_names[:] for name in endog_names}
362        _factor_names = []
363        for val in factors.values():
364            _factor_names.extend(val)
365        factor_names = set(_factor_names)
366        if orders_is_int:
367            factor_orders = {factor_name: factor_orders
368                             for factor_name in factor_names}
369        if mult_is_int:
370            factor_multiplicities = {factor_name: factor_multiplicities
371                                     for factor_name in factor_names}
372
373        # Apply the factor multiplicities
374        factors, factor_orders = self._apply_factor_multiplicities(
375            factors, factor_orders, factor_multiplicities)
376
377        # Save the (potentially expanded) variables
378        self.factors = factors
379        self.factor_orders = factor_orders
380        self.factor_multiplicities = factor_multiplicities
381
382        # Get the mapping between endog and factors
383        self.endog_factor_map = self._construct_endog_factor_map(
384            factors, endog_names)
385        self.k_factors = self.endog_factor_map.shape[1]
386
387        # Validate number of factors
388        # TODO: could do more extensive validation here.
389        if self.k_factors > self.k_endog_M:
390            raise ValueError(f'Number of factors ({self.k_factors}) cannot be'
391                             ' greater than the number of monthly endogenous'
392                             f' variables ({self.k_endog_M}).')
393
394        # Get `loading_counts`: factor -> # endog loading on the factor
395        self.loading_counts = (
396            self.endog_factor_map.sum(axis=0).rename('count')
397                .reset_index().sort_values(['count', 'factor'],
398                                           ascending=[False, True])
399                .set_index('factor'))
400        # `block_loading_counts`: block -> average of (# loading on factor)
401        # across each factor in the block
402        block_loading_counts = {
403            block: np.atleast_1d(
404                self.loading_counts.loc[list(block), 'count']).mean(axis=0)
405            for block in factor_orders.keys()}
406        ix = pd.Index(block_loading_counts.keys(), tupleize_cols=False,
407                      name='block')
408        self.block_loading_counts = pd.Series(
409            list(block_loading_counts.values()),
410            index=ix, name='count').to_frame().sort_values(
411                ['count', 'block'], ascending=[False, True])['count']
412
413        # Get the mapping between factor blocks and VAR order
414
415        # `factor_block_orders`: pd.Series of factor block -> lag order
416        ix = pd.Index(factor_orders.keys(), tupleize_cols=False, name='block')
417        self.factor_block_orders = pd.Series(
418            list(factor_orders.values()), index=ix, name='order')
419
420        # If the `factor_orders` variable was an integer, then it did not
421        # define an ordering for the factor blocks. In this case, we use the
422        # loading counts to do so. This ensures that e.g. global factors are
423        # listed first.
424        if orders_is_int:
425            keys = self.block_loading_counts.keys()
426            self.factor_block_orders = self.factor_block_orders.loc[keys]
427            self.factor_block_orders.index.name = 'block'
428
429        # Define factor_names based on factor_block_orders (instead of on those
430        # from `endog_factor_map`) to (a) make sure that factors are allocated
431        # to only one block, and (b) order the factor names to be consistent
432        # with the block definitions.
433        factor_names = pd.Series(
434            np.concatenate(list(self.factor_block_orders.index)))
435        missing = [name for name in self.endog_factor_map.columns
436                   if name not in factor_names.tolist()]
437        if len(missing):
438            ix = pd.Index([(factor_name,) for factor_name in missing],
439                          tupleize_cols=False, name='block')
440            default_block_orders = pd.Series(np.ones(len(ix), dtype=int),
441                                             index=ix, name='order')
442            self.factor_block_orders = (
443                self.factor_block_orders.append(default_block_orders))
444            factor_names = pd.Series(
445                np.concatenate(list(self.factor_block_orders.index)))
446        duplicates = factor_names.duplicated()
447        if duplicates.any():
448            duplicate_names = set(factor_names[duplicates])
449            raise ValueError('Each factor can be assigned to at most one'
450                             ' block of factors in `factor_orders`.'
451                             f' Duplicate entries for {duplicate_names}')
452        self.factor_names = factor_names.tolist()
453        self.max_factor_order = np.max(self.factor_block_orders)
454
455        # Re-order the columns of the endog factor mapping to reflect the
456        # orderings of endog_names and factor_names
457        self.endog_factor_map = (
458            self.endog_factor_map.loc[endog_names, factor_names])
459
460        # Create factor block helpers, and get factor-related state and posdef
461        # dimensions
462        self.k_states_factors = 0
463        self.k_posdef_factors = 0
464        state_offset = 0
465        self.factor_blocks = []
466        for factor_names, factor_order in self.factor_block_orders.items():
467            block = FactorBlock(factor_names, factor_order,
468                                self.endog_factor_map, state_offset,
469                                self.k_endog_Q)
470            self.k_states_factors += block.k_states
471            self.k_posdef_factors += block.k_factors
472            state_offset += block.k_states
473
474            self.factor_blocks.append(block)
475
476        # Idiosyncratic state dimensions
477        self.k_states_idio_M = self.k_endog_M if idiosyncratic_ar1 else 0
478        self.k_states_idio_Q = self.k_endog_Q * 5
479        self.k_states_idio = self.k_states_idio_M + self.k_states_idio_Q
480
481        # Idiosyncratic posdef dimensions
482        self.k_posdef_idio_M = self.k_endog_M if self.idiosyncratic_ar1 else 0
483        self.k_posdef_idio_Q = self.k_endog_Q
484        self.k_posdef_idio = self.k_posdef_idio_M + self.k_posdef_idio_Q
485
486        # Total states, posdef
487        self.k_states = self.k_states_factors + self.k_states_idio
488        self.k_posdef = self.k_posdef_factors + self.k_posdef_idio
489
490        # Cache
491        self._endog_factor_iloc = None
492
493    def _apply_factor_multiplicities(self, factors, factor_orders,
494                                     factor_multiplicities):
495        """
496        Expand `factors` and `factor_orders` to account for factor multiplity.
497
498        For example, if there is a `global` factor with multiplicity 2, then
499        this method expands that into `global.1` and `global.2` in both the
500        `factors` and `factor_orders` dictionaries.
501
502        Parameters
503        ----------
504        factors : dict
505            Dictionary of {endog_name: list of factor names}
506        factor_orders : dict
507            Dictionary of {tuple of factor names: factor order}
508        factor_multiplicities : dict
509            Dictionary of {factor name: factor multiplicity}
510
511        Returns
512        -------
513        new_factors : dict
514            Dictionary of {endog_name: list of factor names}, with factor names
515            expanded to incorporate multiplicities.
516        new_factors : dict
517            Dictionary of {tuple of factor names: factor order}, with factor
518            names in each tuple expanded to incorporate multiplicities.
519        """
520        # Expand the factors to account for the multiplicities
521        new_factors = {}
522        for endog_name, factors_list in factors.items():
523            new_factor_list = []
524            for factor_name in factors_list:
525                n = factor_multiplicities.get(factor_name, 1)
526                if n > 1:
527                    new_factor_list += [f'{factor_name}.{i + 1}'
528                                        for i in range(n)]
529                else:
530                    new_factor_list.append(factor_name)
531            new_factors[endog_name] = new_factor_list
532
533        # Expand the factor orders to account for the multiplicities
534        new_factor_orders = {}
535        for block, factor_order in factor_orders.items():
536            if not isinstance(block, tuple):
537                block = (block,)
538            new_block = []
539            for factor_name in block:
540                n = factor_multiplicities.get(factor_name, 1)
541                if n > 1:
542                    new_block += [f'{factor_name}.{i + 1}'
543                                  for i in range(n)]
544                else:
545                    new_block += [factor_name]
546            new_factor_orders[tuple(new_block)] = factor_order
547
548        return new_factors, new_factor_orders
549
550    def _construct_endog_factor_map(self, factors, endog_names):
551        """
552        Construct mapping of observed variables to factors.
553
554        Parameters
555        ----------
556        factors : dict
557            Dictionary of {endog_name: list of factor names}
558        endog_names : list of str
559            List of the names of the observed variables.
560
561        Returns
562        -------
563        endog_factor_map : pd.DataFrame
564            Boolean dataframe with `endog_names` as the index and the factor
565            names (computed from the `factors` input) as the columns. Each cell
566            is True if the associated factor is allowed to load on the
567            associated observed variable.
568
569        """
570        # Validate that all entries in the factors dictionary have associated
571        # factors
572        missing = []
573        for key, value in factors.items():
574            if not isinstance(value, (list, tuple)) or len(value) == 0:
575                missing.append(key)
576        if len(missing):
577            raise ValueError('Each observed variable must be mapped to at'
578                             ' least one factor in the `factors` dictionary.'
579                             f' Variables missing factors are: {missing}.')
580
581        # Validate that we have been told about the factors for each endog
582        # variable. This is because it doesn't make sense to include an
583        # observed variable that doesn't load on any factor
584        missing = set(endog_names).difference(set(factors.keys()))
585        if len(missing):
586            raise ValueError('If a `factors` dictionary is provided, then'
587                             ' it must include entries for each observed'
588                             f' variable. Missing variables are: {missing}.')
589
590        # Figure out the set of factor names
591        # (0 is just a dummy value for the dict - we just do it this way to
592        # collect the keys, in order, without duplicates.)
593        factor_names = {}
594        for key, value in factors.items():
595            if isinstance(value, str):
596                factor_names[value] = 0
597            else:
598                factor_names.update({v: 0 for v in value})
599        factor_names = list(factor_names.keys())
600        k_factors = len(factor_names)
601
602        endog_factor_map = pd.DataFrame(
603            np.zeros((self.k_endog, k_factors), dtype=bool),
604            index=pd.Index(endog_names, name='endog'),
605            columns=pd.Index(factor_names, name='factor'))
606        for key, value in factors.items():
607            endog_factor_map.loc[key, value] = True
608
609        return endog_factor_map
610
611    @property
612    def factors_L1(self):
613        """Factors."""
614        ix = np.arange(self.k_states_factors)
615        iloc = tuple(ix[block.factors_L1] for block in self.factor_blocks)
616        return np.concatenate(iloc)
617
618    @property
619    def factors_L1_5_ix(self):
620        """Factors plus any lags, index shaped (5, k_factors)."""
621        ix = np.arange(self.k_states_factors)
622        iloc = []
623        for block in self.factor_blocks:
624            iloc.append(ix[block.factors_L1_5].reshape(5, block.k_factors))
625        return np.concatenate(iloc, axis=1)
626
627    @property
628    def idio_ar_L1(self):
629        """Idiosyncratic AR states, (first block / lag only)."""
630        ix1 = self.k_states_factors
631        if self.idiosyncratic_ar1:
632            ix2 = ix1 + self.k_endog
633        else:
634            ix2 = ix1 + self.k_endog_Q
635        return np.s_[ix1:ix2]
636
637    @property
638    def idio_ar_M(self):
639        """Idiosyncratic AR states for monthly variables."""
640        ix1 = self.k_states_factors
641        ix2 = ix1
642        if self.idiosyncratic_ar1:
643            ix2 += self.k_endog_M
644        return np.s_[ix1:ix2]
645
646    @property
647    def idio_ar_Q(self):
648        """Idiosyncratic AR states and all lags for quarterly variables."""
649        # Note that this is equivalent to idio_ar_Q_ix with ravel(order='F')
650        ix1 = self.k_states_factors
651        if self.idiosyncratic_ar1:
652            ix1 += self.k_endog_M
653        ix2 = ix1 + self.k_endog_Q * 5
654        return np.s_[ix1:ix2]
655
656    @property
657    def idio_ar_Q_ix(self):
658        """Idiosyncratic AR (quarterly) state index, (k_endog_Q, lags)."""
659        # i.e. the position in the state vector of the second lag of the third
660        # quarterly variable is idio_ar_Q_ix[2, 1]
661        # ravel(order='F') gives e.g (y1.L1, y2.L1, y1.L2, y2.L3, y1.L3, ...)
662        # while
663        # ravel(order='C') gives e.g (y1.L1, y1.L2, y1.L3, y2.L1, y2.L2, ...)
664        start = self.k_states_factors
665        if self.idiosyncratic_ar1:
666            start += self.k_endog_M
667        return (start + np.reshape(
668                np.arange(5 * self.k_endog_Q), (5, self.k_endog_Q)).T)
669
670    @property
671    def endog_factor_iloc(self):
672        """List of list of int, factor indexes for each observed variable."""
673        # i.e. endog_factor_iloc[i] is a list of integer locations of the
674        # factors that load on the ith observed variable
675        if self._endog_factor_iloc is None:
676            ilocs = []
677            for i in range(self.k_endog):
678                ilocs.append(np.where(self.endog_factor_map.iloc[i])[0])
679            self._endog_factor_iloc = ilocs
680        return self._endog_factor_iloc
681
682    def __getitem__(self, key):
683        """
684        Use square brackets to access index / slice elements.
685
686        This is convenient in highlighting the indexing / slice quality of
687        these attributes in the code below.
688        """
689        if key in ['factors_L1', 'factors_L1_5_ix', 'idio_ar_L1', 'idio_ar_M',
690                   'idio_ar_Q', 'idio_ar_Q_ix']:
691            return getattr(self, key)
692        else:
693            raise KeyError(key)
694
695
696class DynamicFactorMQ(mlemodel.MLEModel):
697    r"""
698    Dynamic factor model with EM algorithm; option for monthly/quarterly data.
699
700    Implementation of the dynamic factor model of Bańbura and Modugno (2014)
701    ([1]_) and Bańbura, Giannone, and Reichlin (2011) ([2]_). Uses the EM
702    algorithm for parameter fitting, and so can accommodate a large number of
703    left-hand-side variables. Specifications can include any collection of
704    blocks of factors, including different factor autoregression orders, and
705    can include AR(1) processes for idiosyncratic disturbances. Can
706    incorporate monthly/quarterly mixed frequency data along the lines of
707    Mariano and Murasawa (2011) ([4]_). A special case of this model is the
708    Nowcasting model of Bok et al. (2017) ([3]_). Moreover, this model can be
709    used to compute the news associated with updated data releases.
710
711    Parameters
712    ----------
713    endog : array_like
714        Observed time-series process :math:`y`. See the "Notes" section for
715        details on how to set up a model with monthly/quarterly mixed frequency
716        data.
717    k_endog_monthly : int, optional
718        If specifying a monthly/quarterly mixed frequency model in which the
719        provided `endog` dataset contains both the monthly and quarterly data,
720        this variable should be used to indicate how many of the variables
721        are monthly. Note that when using the `k_endog_monthly` argument, the
722        columns with monthly variables in `endog` should be ordered first, and
723        the columns with quarterly variables should come afterwards. See the
724        "Notes" section for details on how to set up a model with
725        monthly/quarterly mixed frequency data.
726    factors : int, list, or dict, optional
727        Integer giving the number of (global) factors, a list with the names of
728        (global) factors, or a dictionary with:
729
730        - keys : names of endogenous variables
731        - values : lists of factor names.
732
733        If this is an integer, then the factor names will be 0, 1, .... The
734        default is a single factor that loads on all variables. Note that there
735        cannot be more factors specified than there are monthly variables.
736    factor_orders : int or dict, optional
737        Integer describing the order of the vector autoregression (VAR)
738        governing all factor block dynamics or dictionary with:
739
740        - keys : factor name or tuples of factor names in a block
741        - values : integer describing the VAR order for that factor block
742
743        If a dictionary, this defines the order of the factor blocks in the
744        state vector. Otherwise, factors are ordered so that factors that load
745        on more variables come first (and then alphabetically, to break ties).
746    factor_multiplicities : int or dict, optional
747        This argument provides a convenient way to specify multiple factors
748        that load identically on variables. For example, one may want two
749        "global" factors (factors that load on all variables) that evolve
750        jointly according to a VAR. One could specify two global factors in the
751        `factors` argument and specify that they are in the same block in the
752        `factor_orders` argument, but it is easier to specify a single global
753        factor in the `factors` argument, and set the order in the
754        `factor_orders` argument, and then set the factor multiplicity to 2.
755
756        This argument must be an integer describing the factor multiplicity for
757        all factors or dictionary with:
758
759        - keys : factor name
760        - values : integer describing the factor multiplicity for the factors
761          in the given block
762
763    idiosyncratic_ar1 : bool
764        Whether or not to model the idiosyncratic component for each series as
765        an AR(1) process. If False, the idiosyncratic component is instead
766        modeled as white noise.
767    standardize : bool or tuple, optional
768        If a boolean, whether or not to standardize each endogenous variable to
769        have mean zero and standard deviation 1 before fitting the model. See
770        "Notes" for details about how this option works with postestimation
771        output. If a tuple (usually only used internally), then the tuple must
772        have length 2, with each element containing a Pandas series with index
773        equal to the names of the endogenous variables. The first element
774        should contain the mean values and the second element should contain
775        the standard deviations. Default is True.
776    endog_quarterly : pandas.Series or pandas.DataFrame
777        Observed quarterly variables. If provided, must be a Pandas Series or
778        DataFrame with a DatetimeIndex or PeriodIndex at the quarterly
779        frequency. See the "Notes" section for details on how to set up a model
780        with monthly/quarterly mixed frequency data.
781    init_t0 : bool, optional
782        If True, this option initializes the Kalman filter with the
783        distribution for :math:`\alpha_0` rather than :math:`\alpha_1`. See
784        the "Notes" section for more details. This option is rarely used except
785        for testing. Default is False.
786    obs_cov_diag : bool, optional
787        If True and if `idiosyncratic_ar1 is True`, then this option puts small
788        positive values in the observation disturbance covariance matrix. This
789        is not required for estimation and is rarely used except for testing.
790        (It is sometimes used to prevent numerical errors, for example those
791        associated with a positive semi-definite forecast error covariance
792        matrix at the first time step when using EM initialization, but state
793        space models in Statsmodels switch to the univariate approach in those
794        cases, and so do not need to use this trick). Default is False.
795
796    Notes
797    -----
798    The basic model is:
799
800    .. math::
801
802        y_t & = \Lambda f_t + \epsilon_t \\
803        f_t & = A_1 f_{t-1} + \dots + A_p f_{t-p} + u_t
804
805    where:
806
807    - :math:`y_t` is observed data at time t
808    - :math:`\epsilon_t` is idiosyncratic disturbance at time t (see below for
809      details, including modeling serial correlation in this term)
810    - :math:`f_t` is the unobserved factor at time t
811    - :math:`u_t \sim N(0, Q)` is the factor disturbance at time t
812
813    and:
814
815    - :math:`\Lambda` is referred to as the matrix of factor loadings
816    - :math:`A_i` are matrices of autoregression coefficients
817
818    Furthermore, we allow the idiosyncratic disturbances to be serially
819    correlated, so that, if `idiosyncratic_ar1=True`,
820    :math:`\epsilon_{i,t} = \rho_i \epsilon_{i,t-1} + e_{i,t}`, where
821    :math:`e_{i,t} \sim N(0, \sigma_i^2)`. If `idiosyncratic_ar1=False`,
822    then we instead have :math:`\epsilon_{i,t} = e_{i,t}`.
823
824    This basic setup can be found in [1]_, [2]_, [3]_, and [4]_.
825
826    We allow for two generalizations of this model:
827
828    1. Following [2]_, we allow multiple "blocks" of factors, which are
829       independent from the other blocks of factors. Different blocks can be
830       set to load on different subsets of the observed variables, and can be
831       specified with different lag orders.
832    2. Following [4]_ and [2]_, we allow mixed frequency models in which both
833       monthly and quarterly data are used. See the section on "Mixed frequency
834       models", below, for more details.
835
836    Additional notes:
837
838    - The observed data may contain arbitrary patterns of missing entries.
839
840    **EM algorithm**
841
842    This model contains a potentially very large number of parameters, and it
843    can be difficult and take a prohibitively long time to numerically optimize
844    the likelihood function using quasi-Newton methods. Instead, the default
845    fitting method in this model uses the EM algorithm, as detailed in [1]_.
846    As a result, the model can accommodate datasets with hundreds of
847    observed variables.
848
849    **Mixed frequency data**
850
851    This model can handle mixed frequency data in two ways. In this section,
852    we only briefly describe this, and refer readers to [2]_ and [4]_ for all
853    details.
854
855    First, because there can be arbitrary patterns of missing data in the
856    observed vector, one can simply include lower frequency variables as
857    observed in a particular higher frequency period, and missing otherwise.
858    For example, in a monthly model, one could include quarterly data as
859    occurring on the third month of each quarter. To use this method, one
860    simply needs to combine the data into a single dataset at the higher
861    frequency that can be passed to this model as the `endog` argument.
862    However, depending on the type of variables used in the analysis and the
863    assumptions about the data generating process, this approach may not be
864    valid.
865
866    For example, suppose that we are interested in the growth rate of real GDP,
867    which is measured at a quarterly frequency. If the basic factor model is
868    specified at a monthly frequency, then the quarterly growth rate in the
869    third month of each quarter -- which is what we actually observe -- is
870    approximated by a particular weighted average of unobserved monthly growth
871    rates. We need to take this particular weight moving average into account
872    in constructing our model, and this is what the second approach does.
873
874    The second approach follows [2]_ and [4]_ in constructing a state space
875    form to explicitly model the quarterly growth rates in terms of the
876    unobserved monthly growth rates. To use this approach, there are two
877    methods:
878
879    1. Combine the monthly and quarterly data into a single dataset at the
880       monthly frequency, with the monthly data in the first columns and the
881       quarterly data in the last columns. Pass this dataset to the model as
882       the `endog` argument and give the number of the variables that are
883       monthly as the `k_endog_monthly` argument.
884    2. Construct a monthly dataset as a Pandas DataFrame with a DatetimeIndex
885       or PeriodIndex at the monthly frequency and separately construct a
886       quarterly dataset as a Pandas DataFrame with a DatetimeIndex or
887       PeriodIndex at the quarterly frequency. Pass the monthly DataFrame to
888       the model as the `endog` argument and pass the quarterly DataFrame to
889       the model as the `endog_quarterly` argument.
890
891    Note that this only incorporates one particular type of mixed frequency
892    data. See also Banbura et al. (2013). "Now-Casting and the Real-Time Data
893    Flow." for discussion about other types of mixed frequency data that are
894    not supported by this framework.
895
896    **Nowcasting and the news**
897
898    Through its support for monthly/quarterly mixed frequency data, this model
899    can allow for the nowcasting of quarterly variables based on monthly
900    observations. In particular, [2]_ and [3]_ use this model to construct
901    nowcasts of real GDP and analyze the impacts of "the news", derived from
902    incoming data on a real-time basis. This latter functionality can be
903    accessed through the `news` method of the results object.
904
905    **Standardizing data**
906
907    As is often the case in formulating a dynamic factor model, we do not
908    explicitly account for the mean of each observed variable. Instead, the
909    default behavior is to standardize each variable prior to estimation. Thus
910    if :math:`y_t` are the given observed data, the dynamic factor model is
911    actually estimated on the standardized data defined by:
912
913    .. math::
914
915        x_{i, t} = (y_{i, t} - \bar y_i) / s_i
916
917    where :math:`\bar y_i` is the sample mean and :math:`s_i` is the sample
918    standard deviation.
919
920    By default, if standardization is applied prior to estimation, results such
921    as in-sample predictions, out-of-sample forecasts, and the computation of
922    the "news"  are reported in the scale of the original data (i.e. the model
923    output has the reverse transformation applied before it is returned to the
924    user).
925
926    Standardization can be disabled by passing `standardization=False` to the
927    model constructor.
928
929    **Identification of factors and loadings**
930
931    The estimated factors and the factor loadings in this model are only
932    identified up to an invertible transformation. As described in (the working
933    paper version of) [2]_, while it is possible to impose normalizations to
934    achieve identification, the EM algorithm does will converge regardless.
935    Moreover, for nowcasting and forecasting purposes, identification is not
936    required. This model does not impose any normalization to identify the
937    factors and the factor loadings.
938
939    **Miscellaneous**
940
941    There are two arguments available in the model constructor that are rarely
942    used but which deserve a brief mention: `init_t0` and `obs_cov_diag`. These
943    arguments are provided to allow exactly matching the output of other
944    packages that have slight differences in how the underlying state space
945    model is set up / applied.
946
947    - `init_t0`: state space models in Statsmodels follow Durbin and Koopman in
948      initializing the model with :math:`\alpha_1 \sim N(a_1, P_1)`. Other
949      implementations sometimes initialize instead with
950      :math:`\alpha_0 \sim N(a_0, P_0)`. We can accommodate this by prepending
951      a row of NaNs to the observed dataset.
952    - `obs_cov_diag`: the state space form in [1]_ incorporates non-zero (but
953      very small) diagonal elements for the observation disturbance covariance
954      matrix.
955
956    Examples
957    --------
958    Constructing and fitting a `DynamicFactorMQ` model.
959
960    >>> data = sm.datasets.macrodata.load_pandas().data.iloc[-100:]
961    >>> data.index = pd.period_range(start='1984Q4', end='2009Q3', freq='Q')
962    >>> endog = data[['infl', 'tbilrate']].resample('M').last()
963    >>> endog_Q = np.log(data[['realgdp', 'realcons']]).diff().iloc[1:] * 400
964
965    **Basic usage**
966
967    In the simplest case, passing only the `endog` argument results in a model
968    with a single factor that follows an AR(1) process. Note that because we
969    are not also providing an `endog_quarterly` dataset, `endog` can be a numpy
970    array or Pandas DataFrame with any index (it does not have to be monthly).
971
972    The `summary` method can be useful in checking the model specification.
973
974    >>> mod = sm.tsa.DynamicFactorMQ(endog)
975    >>> print(mod.summary())
976                        Model Specification: Dynamic Factor Model
977    ==========================================================================
978    Model:         Dynamic Factor Model   # of monthly variables:          2
979                + 1 factors in 1 blocks   # of factors:                    1
980                  + AR(1) idiosyncratic   Idiosyncratic disturbances:  AR(1)
981    Sample:                     1984-10   Standardize variables:        True
982                              - 2009-09
983    Observed variables / factor loadings
984    ========================
985    Dep. variable          0
986    ------------------------
987             infl          X
988         tbilrate          X
989        Factor blocks:
990    =====================
991         block      order
992    ---------------------
993             0          1
994    =====================
995
996    **Factors**
997
998    With `factors=2`, there will be two independent factors that will each
999    evolve according to separate AR(1) processes.
1000
1001    >>> mod = sm.tsa.DynamicFactorMQ(endog, factors=2)
1002    >>> print(mod.summary())
1003                        Model Specification: Dynamic Factor Model
1004    ==========================================================================
1005    Model:         Dynamic Factor Model   # of monthly variables:          2
1006                + 2 factors in 2 blocks   # of factors:                    2
1007                  + AR(1) idiosyncratic   Idiosyncratic disturbances:  AR(1)
1008    Sample:                     1984-10   Standardize variables:        True
1009                              - 2009-09
1010    Observed variables / factor loadings
1011    ===================================
1012    Dep. variable          0          1
1013    -----------------------------------
1014             infl          X          X
1015         tbilrate          X          X
1016        Factor blocks:
1017    =====================
1018         block      order
1019    ---------------------
1020             0          1
1021             1          1
1022    =====================
1023
1024    **Factor multiplicities**
1025
1026    By instead specifying `factor_multiplicities=2`, we would still have two
1027    factors, but they would be dependent and would evolve jointly according
1028    to a VAR(1) process.
1029
1030    >>> mod = sm.tsa.DynamicFactorMQ(endog, factor_multiplicities=2)
1031    >>> print(mod.summary())
1032                        Model Specification: Dynamic Factor Model
1033    ==========================================================================
1034    Model:         Dynamic Factor Model   # of monthly variables:          2
1035                + 2 factors in 1 blocks   # of factors:                    2
1036                  + AR(1) idiosyncratic   Idiosyncratic disturbances:  AR(1)
1037    Sample:                     1984-10   Standardize variables:        True
1038                              - 2009-09
1039    Observed variables / factor loadings
1040    ===================================
1041    Dep. variable        0.1        0.2
1042    -----------------------------------
1043             infl         X          X
1044         tbilrate         X          X
1045        Factor blocks:
1046    =====================
1047         block      order
1048    ---------------------
1049      0.1, 0.2          1
1050    =====================
1051
1052    **Factor orders**
1053
1054    In either of the above cases, we could extend the order of the (vector)
1055    autoregressions by using the `factor_orders` argument. For example, the
1056    below model would contain two independent factors that each evolve
1057    according to a separate AR(2) process:
1058
1059    >>> mod = sm.tsa.DynamicFactorMQ(endog, factors=2, factor_orders=2)
1060    >>> print(mod.summary())
1061                        Model Specification: Dynamic Factor Model
1062    ==========================================================================
1063    Model:         Dynamic Factor Model   # of monthly variables:          2
1064                + 2 factors in 2 blocks   # of factors:                    2
1065                  + AR(1) idiosyncratic   Idiosyncratic disturbances:  AR(1)
1066    Sample:                     1984-10   Standardize variables:        True
1067                              - 2009-09
1068    Observed variables / factor loadings
1069    ===================================
1070    Dep. variable          0          1
1071    -----------------------------------
1072             infl          X          X
1073         tbilrate          X          X
1074        Factor blocks:
1075    =====================
1076         block      order
1077    ---------------------
1078             0          2
1079             1          2
1080    =====================
1081
1082    **Serial correlation in the idiosyncratic disturbances**
1083
1084    By default, the model allows each idiosyncratic disturbance terms to evolve
1085    according to an AR(1) process. If preferred, they can instead be specified
1086    to be serially independent by passing `ididosyncratic_ar1=False`.
1087
1088    >>> mod = sm.tsa.DynamicFactorMQ(endog, idiosyncratic_ar1=False)
1089    >>> print(mod.summary())
1090                        Model Specification: Dynamic Factor Model
1091    ==========================================================================
1092    Model:         Dynamic Factor Model   # of monthly variables:          2
1093                + 1 factors in 1 blocks   # of factors:                    1
1094                    + iid idiosyncratic   Idiosyncratic disturbances:    iid
1095    Sample:                     1984-10   Standardize variables:        True
1096                              - 2009-09
1097    Observed variables / factor loadings
1098    ========================
1099    Dep. variable          0
1100    ------------------------
1101             infl          X
1102         tbilrate          X
1103        Factor blocks:
1104    =====================
1105         block      order
1106    ---------------------
1107             0          1
1108    =====================
1109
1110    *Monthly / Quarterly mixed frequency*
1111
1112    To specify a monthly / quarterly mixed frequency model see the (Notes
1113    section for more details about these models):
1114
1115    >>> mod = sm.tsa.DynamicFactorMQ(endog, endog_quarterly=endog_Q)
1116    >>> print(mod.summary())
1117                        Model Specification: Dynamic Factor Model
1118    ==========================================================================
1119    Model:         Dynamic Factor Model   # of monthly variables:          2
1120                + 1 factors in 1 blocks   # of quarterly variables:        2
1121                + Mixed frequency (M/Q)   # of factors:                    1
1122                  + AR(1) idiosyncratic   Idiosyncratic disturbances:  AR(1)
1123    Sample:                     1984-10   Standardize variables:        True
1124                              - 2009-09
1125    Observed variables / factor loadings
1126    ========================
1127    Dep. variable          0
1128    ------------------------
1129             infl          X
1130         tbilrate          X
1131          realgdp          X
1132         realcons          X
1133        Factor blocks:
1134    =====================
1135         block      order
1136    ---------------------
1137             0          1
1138    =====================
1139
1140    *Customize observed variable / factor loadings*
1141
1142    To specify that certain that certain observed variables only load on
1143    certain factors, it is possible to pass a dictionary to the `factors`
1144    argument.
1145
1146    >>> factors = {'infl': ['global']
1147    ...            'tbilrate': ['global']
1148    ...            'realgdp': ['global', 'real']
1149    ...            'realcons': ['global', 'real']}
1150    >>> mod = sm.tsa.DynamicFactorMQ(endog, endog_quarterly=endog_Q)
1151    >>> print(mod.summary())
1152                        Model Specification: Dynamic Factor Model
1153    ==========================================================================
1154    Model:         Dynamic Factor Model   # of monthly variables:          2
1155                + 2 factors in 2 blocks   # of quarterly variables:        2
1156                + Mixed frequency (M/Q)   # of factor blocks:              2
1157                  + AR(1) idiosyncratic   Idiosyncratic disturbances:  AR(1)
1158    Sample:                     1984-10   Standardize variables:        True
1159                              - 2009-09
1160    Observed variables / factor loadings
1161    ===================================
1162    Dep. variable     global       real
1163    -----------------------------------
1164             infl       X
1165         tbilrate       X
1166          realgdp       X           X
1167         realcons       X           X
1168        Factor blocks:
1169    =====================
1170         block      order
1171    ---------------------
1172        global          1
1173          real          1
1174    =====================
1175
1176    **Fitting parameters**
1177
1178    To fit the model, use the `fit` method. This method uses the EM algorithm
1179    by default.
1180
1181    >>> mod = sm.tsa.DynamicFactorMQ(endog)
1182    >>> res = mod.fit()
1183    >>> print(res.summary())
1184                              Dynamic Factor Results
1185    ==========================================================================
1186    Dep. Variable:      ['infl', 'tbilrate']   No. Observations:         300
1187    Model:              Dynamic Factor Model   Log Likelihood       -127.909
1188                     + 1 factors in 1 blocks   AIC                   271.817
1189                       + AR(1) idiosyncratic   BIC                   301.447
1190    Date:                   Tue, 04 Aug 2020   HQIC                  283.675
1191    Time:                           15:59:11   EM Iterations              83
1192    Sample:                       10-31-1984
1193                                - 09-30-2009
1194    Covariance Type:            Not computed
1195                        Observation equation:
1196    ==============================================================
1197    Factor loadings:          0    idiosyncratic: AR(1)       var.
1198    --------------------------------------------------------------
1199                infl      -0.67                    0.39       0.73
1200            tbilrate      -0.63                    0.99       0.01
1201           Transition: Factor block 0
1202    =======================================
1203                     L1.0    error variance
1204    ---------------------------------------
1205             0       0.98              0.01
1206    =======================================
1207    Warnings:
1208    [1] Covariance matrix not calculated.
1209
1210    *Displaying iteration progress*
1211
1212    To display information about the EM iterations, use the `disp` argument.
1213
1214    >>> mod = sm.tsa.DynamicFactorMQ(endog)
1215    >>> res = mod.fit(disp=10)
1216    EM start iterations, llf=-291.21
1217    EM iteration 10, llf=-157.17, convergence criterion=0.053801
1218    EM iteration 20, llf=-128.99, convergence criterion=0.0035545
1219    EM iteration 30, llf=-127.97, convergence criterion=0.00010224
1220    EM iteration 40, llf=-127.93, convergence criterion=1.3281e-05
1221    EM iteration 50, llf=-127.92, convergence criterion=5.4725e-06
1222    EM iteration 60, llf=-127.91, convergence criterion=2.8665e-06
1223    EM iteration 70, llf=-127.91, convergence criterion=1.6999e-06
1224    EM iteration 80, llf=-127.91, convergence criterion=1.1085e-06
1225    EM converged at iteration 83, llf=-127.91,
1226       convergence criterion=9.9004e-07 < tolerance=1e-06
1227
1228    **Results: forecasting, impulse responses, and more**
1229
1230    One the model is fitted, there are a number of methods available from the
1231    results object. Some examples include:
1232
1233    *Forecasting*
1234
1235    >>> mod = sm.tsa.DynamicFactorMQ(endog)
1236    >>> res = mod.fit()
1237    >>> print(res.forecast(steps=5))
1238                 infl  tbilrate
1239    2009-10  1.784169  0.260401
1240    2009-11  1.735848  0.305981
1241    2009-12  1.730674  0.350968
1242    2010-01  1.742110  0.395369
1243    2010-02  1.759786  0.439194
1244
1245    *Impulse responses*
1246
1247    >>> mod = sm.tsa.DynamicFactorMQ(endog)
1248    >>> res = mod.fit()
1249    >>> print(res.impulse_responses(steps=5))
1250           infl  tbilrate
1251    0 -1.511956 -1.341498
1252    1 -1.483172 -1.315960
1253    2 -1.454937 -1.290908
1254    3 -1.427240 -1.266333
1255    4 -1.400069 -1.242226
1256    5 -1.373416 -1.218578
1257
1258    For other available methods (including in-sample prediction, simulation of
1259    time series, extending the results to incorporate new data, and the news),
1260    see the documentation for state space models.
1261
1262    References
1263    ----------
1264    .. [1] Bańbura, Marta, and Michele Modugno.
1265           "Maximum likelihood estimation of factor models on datasets with
1266           arbitrary pattern of missing data."
1267           Journal of Applied Econometrics 29, no. 1 (2014): 133-160.
1268    .. [2] Bańbura, Marta, Domenico Giannone, and Lucrezia Reichlin.
1269           "Nowcasting."
1270           The Oxford Handbook of Economic Forecasting. July 8, 2011.
1271    .. [3] Bok, Brandyn, Daniele Caratelli, Domenico Giannone,
1272           Argia M. Sbordone, and Andrea Tambalotti. 2018.
1273           "Macroeconomic Nowcasting and Forecasting with Big Data."
1274           Annual Review of Economics 10 (1): 615-43.
1275           https://doi.org/10.1146/annurev-economics-080217-053214.
1276    .. [4] Mariano, Roberto S., and Yasutomo Murasawa.
1277           "A coincident index, common factors, and monthly real GDP."
1278           Oxford Bulletin of Economics and Statistics 72, no. 1 (2010): 27-46.
1279
1280    """
1281
1282    def __init__(self, endog, k_endog_monthly=None, factors=1, factor_orders=1,
1283                 factor_multiplicities=None, idiosyncratic_ar1=True,
1284                 standardize=True, endog_quarterly=None, init_t0=False,
1285                 obs_cov_diag=False, **kwargs):
1286        # Handle endog variables
1287        if endog_quarterly is not None:
1288            if k_endog_monthly is not None:
1289                raise ValueError('If `endog_quarterly` is specified, then'
1290                                 ' `endog` must contain only monthly'
1291                                 ' variables, and so `k_endog_monthly` cannot'
1292                                 ' be specified since it will be inferred from'
1293                                 ' the shape of `endog`.')
1294            endog, k_endog_monthly = self.construct_endog(
1295                endog, endog_quarterly)
1296        endog_is_pandas = _is_using_pandas(endog, None)
1297
1298        if endog_is_pandas:
1299            if isinstance(endog, pd.Series):
1300                endog = endog.to_frame()
1301        else:
1302            if np.ndim(endog) < 2:
1303                endog = np.atleast_2d(endog).T
1304
1305        if k_endog_monthly is None:
1306            k_endog_monthly = endog.shape[1]
1307
1308        if endog_is_pandas:
1309            endog_names = endog.columns.tolist()
1310        else:
1311            if endog.shape[1] == 1:
1312                endog_names = ['y']
1313            else:
1314                endog_names = [f'y{i + 1}' for i in range(endog.shape[1])]
1315
1316        self.k_endog_M = int_like(k_endog_monthly, 'k_endog_monthly')
1317        self.k_endog_Q = endog.shape[1] - self.k_endog_M
1318
1319        # Compute helper for handling factors / state indexing
1320        s = self._s = DynamicFactorMQStates(
1321            self.k_endog_M, self.k_endog_Q, endog_names, factors,
1322            factor_orders, factor_multiplicities, idiosyncratic_ar1)
1323
1324        # Save parameterization
1325        self.factors = factors
1326        self.factor_orders = factor_orders
1327        self.factor_multiplicities = factor_multiplicities
1328
1329        self.endog_factor_map = self._s.endog_factor_map
1330        self.factor_block_orders = self._s.factor_block_orders
1331        self.factor_names = self._s.factor_names
1332        self.k_factors = self._s.k_factors
1333        self.k_factor_blocks = len(self.factor_block_orders)
1334        self.max_factor_order = self._s.max_factor_order
1335        self.idiosyncratic_ar1 = idiosyncratic_ar1
1336        self.init_t0 = init_t0
1337        self.obs_cov_diag = obs_cov_diag
1338
1339        if self.init_t0:
1340            # TODO: test each of these options
1341            if endog_is_pandas:
1342                ix = pd.period_range(endog.index[0] - 1, endog.index[-1],
1343                                     freq='M')
1344                endog = endog.reindex(ix)
1345            else:
1346                endog = np.c_[[np.nan] * endog.shape[1], endog.T].T
1347
1348        # Standardize endog, if requested
1349        # Note: endog_mean and endog_std will always each be 1-dimensional with
1350        # length equal to the number of endog variables
1351        if isinstance(standardize, tuple) and len(standardize) == 2:
1352            endog_mean, endog_std = standardize
1353
1354            # Validate the input
1355            n = endog.shape[1]
1356            if (isinstance(endog_mean, pd.Series) and not
1357                    endog_mean.index.equals(pd.Index(endog_names))):
1358                raise ValueError('Invalid value passed for `standardize`:'
1359                                 ' if a Pandas Series, must have index'
1360                                 f' {endog_names}. Got {endog_mean.index}.')
1361            else:
1362                endog_mean = np.atleast_1d(endog_mean)
1363            if (isinstance(endog_std, pd.Series) and not
1364                    endog_std.index.equals(pd.Index(endog_names))):
1365                raise ValueError('Invalid value passed for `standardize`:'
1366                                 ' if a Pandas Series, must have index'
1367                                 f' {endog_names}. Got {endog_std.index}.')
1368            else:
1369                endog_std = np.atleast_1d(endog_std)
1370
1371            if (np.shape(endog_mean) != (n,) or np.shape(endog_std) != (n,)):
1372                raise ValueError('Invalid value passed for `standardize`: each'
1373                                 f' element must be shaped ({n},).')
1374            standardize = True
1375
1376            # Make sure we have Pandas if endog is Pandas
1377            if endog_is_pandas:
1378                endog_mean = pd.Series(endog_mean, index=endog_names)
1379                endog_std = pd.Series(endog_std, index=endog_names)
1380
1381        elif standardize in [1, True]:
1382            endog_mean = endog.mean(axis=0)
1383            endog_std = endog.std(axis=0)
1384        elif standardize in [0, False]:
1385            endog_mean = np.zeros(endog.shape[1])
1386            endog_std = np.ones(endog.shape[1])
1387        else:
1388            raise ValueError('Invalid value passed for `standardize`.')
1389        self._endog_mean = endog_mean
1390        self._endog_std = endog_std
1391        self.standardize = standardize
1392        if np.any(self._endog_std < 1e-10):
1393            ix = np.where(self._endog_std < 1e-10)
1394            names = np.array(endog_names)[ix[0]].tolist()
1395            raise ValueError('Constant variable(s) found in observed'
1396                             ' variables, but constants cannot be included'
1397                             f' in this model. These variables are: {names}.')
1398
1399        if self.standardize:
1400            endog = (endog - self._endog_mean) / self._endog_std
1401
1402        # Observation / states slices
1403        o = self._o = {
1404            'M': np.s_[:self.k_endog_M],
1405            'Q': np.s_[self.k_endog_M:]}
1406
1407        # Construct the basic state space representation
1408        super().__init__(endog, k_states=s.k_states, k_posdef=s.k_posdef,
1409                         **kwargs)
1410
1411        # Revert the standardization for orig_endog
1412        if self.standardize:
1413            self.data.orig_endog = (
1414                self.data.orig_endog * self._endog_std + self._endog_mean)
1415
1416        # State initialization
1417        # Note: we could just initialize the entire thing as stationary, but
1418        # doing each block separately should be faster and avoid numerical
1419        # issues
1420        if 'initialization' not in kwargs:
1421            self.ssm.initialize(self._default_initialization())
1422
1423        # Fixed components of the state space representation
1424
1425        # > design
1426        if self.idiosyncratic_ar1:
1427            self['design', o['M'], s['idio_ar_M']] = np.eye(self.k_endog_M)
1428        multipliers = [1, 2, 3, 2, 1]
1429        for i in range(len(multipliers)):
1430            m = multipliers[i]
1431            self['design', o['Q'], s['idio_ar_Q_ix'][:, i]] = (
1432                m * np.eye(self.k_endog_Q))
1433
1434        # > obs cov
1435        if self.obs_cov_diag:
1436            self['obs_cov'] = np.eye(self.k_endog) * 1e-4
1437
1438        # > transition
1439        for block in s.factor_blocks:
1440            if block.k_factors == 1:
1441                tmp = 0
1442            else:
1443                tmp = np.zeros((block.k_factors, block.k_factors))
1444            self['transition', block['factors'], block['factors']] = (
1445                companion_matrix([1] + [tmp] * block._factor_order).T)
1446        if self.k_endog_Q == 1:
1447            tmp = 0
1448        else:
1449            tmp = np.zeros((self.k_endog_Q, self.k_endog_Q))
1450        self['transition', s['idio_ar_Q'], s['idio_ar_Q']] = (
1451            companion_matrix([1] + [tmp] * 5).T)
1452
1453        # > selection
1454        ix1 = ix2 = 0
1455        for block in s.factor_blocks:
1456            ix2 += block.k_factors
1457            self['selection', block['factors_ix'][:, 0], ix1:ix2] = (
1458                np.eye(block.k_factors))
1459            ix1 = ix2
1460        if self.idiosyncratic_ar1:
1461            ix2 = ix1 + self.k_endog_M
1462            self['selection', s['idio_ar_M'], ix1:ix2] = np.eye(self.k_endog_M)
1463            ix1 = ix2
1464
1465        ix2 = ix1 + self.k_endog_Q
1466        self['selection', s['idio_ar_Q_ix'][:, 0], ix1:ix2] = (
1467            np.eye(self.k_endog_Q))
1468
1469        # Parameters
1470        self.params = OrderedDict([
1471            ('loadings', np.sum(self.endog_factor_map.values)),
1472            ('factor_ar', np.sum([block.k_factors**2 * block.factor_order
1473                                  for block in s.factor_blocks])),
1474            ('factor_cov', np.sum([block.k_factors * (block.k_factors + 1) // 2
1475                                   for block in s.factor_blocks])),
1476            ('idiosyncratic_ar1',
1477                self.k_endog if self.idiosyncratic_ar1 else 0),
1478            ('idiosyncratic_var', self.k_endog)])
1479        self.k_params = np.sum(list(self.params.values()))
1480
1481        # Parameter slices
1482        ix = np.split(np.arange(self.k_params),
1483                      np.cumsum(list(self.params.values()))[:-1])
1484        self._p = dict(zip(self.params.keys(), ix))
1485
1486        # Cache
1487        self._loading_constraints = {}
1488
1489        # Initialization kwarg keys, e.g. for cloning
1490        self._init_keys += [
1491            'factors', 'factor_orders', 'factor_multiplicities',
1492            'idiosyncratic_ar1', 'standardize', 'init_t0',
1493            'obs_cov_diag'] + list(kwargs.keys())
1494
1495    @classmethod
1496    def construct_endog(cls, endog_monthly, endog_quarterly):
1497        """
1498        Construct a combined dataset from separate monthly and quarterly data.
1499
1500        Parameters
1501        ----------
1502        endog_monthly : array_like
1503            Monthly dataset. If a quarterly dataset is given, then this must
1504            be a Pandas object with a PeriodIndex or DatetimeIndex at a monthly
1505            frequency.
1506        endog_quarterly : array_like or None
1507            Quarterly dataset. If not None, then this must be a Pandas object
1508            with a PeriodIndex or DatetimeIndex at a quarterly frequency.
1509
1510        Returns
1511        -------
1512        endog : array_like
1513            If both endog_monthly and endog_quarterly were given, this is a
1514            Pandas DataFrame with a PeriodIndex at the monthly frequency, with
1515            all of the columns from `endog_monthly` ordered first and the
1516            columns from `endog_quarterly` ordered afterwards. Otherwise it is
1517            simply the input `endog_monthly` dataset.
1518        k_endog_monthly : int
1519            The number of monthly variables (which are ordered first) in the
1520            returned `endog` dataset.
1521        """
1522        # Create combined dataset
1523        if endog_quarterly is not None:
1524            # Validate endog_monthly
1525            base_msg = ('If given both monthly and quarterly data'
1526                        ' then the monthly dataset must be a Pandas'
1527                        ' object with a date index at a monthly frequency.')
1528            if not isinstance(endog_monthly, (pd.Series, pd.DataFrame)):
1529                raise ValueError('Given monthly dataset is not a'
1530                                 ' Pandas object. ' + base_msg)
1531            elif endog_monthly.index.inferred_type not in ("datetime64",
1532                                                           "period"):
1533                raise ValueError('Given monthly dataset has an'
1534                                 ' index with non-date values. ' + base_msg)
1535            elif not getattr(endog_monthly.index, 'freqstr', 'N')[0] == 'M':
1536                freqstr = getattr(endog_monthly.index, 'freqstr', 'None')
1537                raise ValueError('Index of given monthly dataset has a'
1538                                 ' non-monthly frequency (to check this,'
1539                                 ' examine the `freqstr` attribute of the'
1540                                 ' index of the dataset - it should start with'
1541                                 ' M if it is monthly).'
1542                                 f' Got {freqstr}. ' + base_msg)
1543
1544            # Validate endog_quarterly
1545            base_msg = ('If a quarterly dataset is given, then it must be a'
1546                        ' Pandas object with a date index at a quarterly'
1547                        ' frequency.')
1548            if not isinstance(endog_quarterly, (pd.Series, pd.DataFrame)):
1549                raise ValueError('Given quarterly dataset is not a'
1550                                 ' Pandas object. ' + base_msg)
1551            elif endog_quarterly.index.inferred_type not in ("datetime64",
1552                                                             "period"):
1553                raise ValueError('Given quarterly dataset has an'
1554                                 ' index with non-date values. ' + base_msg)
1555            elif not getattr(endog_quarterly.index, 'freqstr', 'N')[0] == 'Q':
1556                freqstr = getattr(endog_quarterly.index, 'freqstr', 'None')
1557                raise ValueError('Index of given quarterly dataset'
1558                                 ' has a non-quarterly frequency (to check'
1559                                 ' this, examine the `freqstr` attribute of'
1560                                 ' the index of the dataset - it should start'
1561                                 ' with Q if it is quarterly).'
1562                                 f' Got {freqstr}. ' + base_msg)
1563
1564            # Convert to PeriodIndex, if applicable
1565            if hasattr(endog_monthly.index, 'to_period'):
1566                endog_monthly = endog_monthly.to_period('M')
1567            if hasattr(endog_quarterly.index, 'to_period'):
1568                endog_quarterly = endog_quarterly.to_period('Q')
1569
1570            # Combine the datasets
1571            endog = pd.concat([
1572                endog_monthly,
1573                endog_quarterly.resample('M', convention='end').first()],
1574                axis=1)
1575
1576            # Make sure we didn't accidentally get duplicate column names
1577            column_counts = endog.columns.value_counts()
1578            if column_counts.max() > 1:
1579                columns = endog.columns.values.astype(object)
1580                for name in column_counts.index:
1581                    count = column_counts.loc[name]
1582                    if count == 1:
1583                        continue
1584                    mask = columns == name
1585                    columns[mask] = [f'{name}{i + 1}' for i in range(count)]
1586                endog.columns = columns
1587        else:
1588            endog = endog_monthly.copy()
1589        shape = endog_monthly.shape
1590        k_endog_monthly = shape[1] if len(shape) == 2 else 1
1591
1592        return endog, k_endog_monthly
1593
1594    def clone(self, endog, k_endog_monthly=None, endog_quarterly=None,
1595              retain_standardization=False, **kwargs):
1596        """
1597        Clone state space model with new data and optionally new specification.
1598
1599        Parameters
1600        ----------
1601        endog : array_like
1602            The observed time-series process :math:`y`
1603        k_endog_monthly : int, optional
1604            If specifying a monthly/quarterly mixed frequency model in which
1605            the provided `endog` dataset contains both the monthly and
1606            quarterly data, this variable should be used to indicate how many
1607            of the variables are monthly.
1608        endog_quarterly : array_like, optional
1609            Observations of quarterly variables. If provided, must be a
1610            Pandas Series or DataFrame with a DatetimeIndex or PeriodIndex at
1611            the quarterly frequency.
1612        kwargs
1613            Keyword arguments to pass to the new model class to change the
1614            model specification.
1615
1616        Returns
1617        -------
1618        model : DynamicFactorMQ instance
1619        """
1620        if retain_standardization and self.standardize:
1621            kwargs['standardize'] = (self._endog_mean, self._endog_std)
1622        mod = self._clone_from_init_kwds(
1623            endog, k_endog_monthly=k_endog_monthly,
1624            endog_quarterly=endog_quarterly, **kwargs)
1625        return mod
1626
1627    @property
1628    def _res_classes(self):
1629        return {'fit': (DynamicFactorMQResults, mlemodel.MLEResultsWrapper)}
1630
1631    def _default_initialization(self):
1632        s = self._s
1633        init = initialization.Initialization(self.k_states)
1634        for block in s.factor_blocks:
1635            init.set(block['factors'], 'stationary')
1636        if self.idiosyncratic_ar1:
1637            for i in range(s['idio_ar_M'].start, s['idio_ar_M'].stop):
1638                init.set(i, 'stationary')
1639        init.set(s['idio_ar_Q'], 'stationary')
1640        return init
1641
1642    def _get_endog_names(self, truncate=None, as_string=None):
1643        if truncate is None:
1644            truncate = False if as_string is False or self.k_endog == 1 else 24
1645        if as_string is False and truncate is not False:
1646            raise ValueError('Can only truncate endog names if they'
1647                             ' are returned as a string.')
1648        if as_string is None:
1649            as_string = truncate is not False
1650
1651        # The base `endog_names` property is only a list if there are at least
1652        # two variables; often, we need it to be a list
1653        endog_names = self.endog_names
1654        if not isinstance(endog_names, list):
1655            endog_names = [endog_names]
1656
1657        if as_string:
1658            endog_names = [str(name) for name in endog_names]
1659
1660        if truncate is not False:
1661            n = truncate
1662            endog_names = [name if len(name) <= n else name[:n] + '...'
1663                           for name in endog_names]
1664
1665        return endog_names
1666
1667    @property
1668    def _model_name(self):
1669        model_name = [
1670            'Dynamic Factor Model',
1671            f'{self.k_factors} factors in {self.k_factor_blocks} blocks']
1672        if self.k_endog_Q > 0:
1673            model_name.append('Mixed frequency (M/Q)')
1674
1675        error_type = 'AR(1)' if self.idiosyncratic_ar1 else 'iid'
1676        model_name.append(f'{error_type} idiosyncratic')
1677
1678        return model_name
1679
1680    def summary(self, truncate_endog_names=None):
1681        """
1682        Create a summary table describing the model.
1683
1684        Parameters
1685        ----------
1686        truncate_endog_names : int, optional
1687            The number of characters to show for names of observed variables.
1688            Default is 24 if there is more than one observed variable, or
1689            an unlimited number of there is only one.
1690        """
1691        # Get endog names
1692        endog_names = self._get_endog_names(truncate=truncate_endog_names,
1693                                            as_string=True)
1694
1695        title = 'Model Specification: Dynamic Factor Model'
1696
1697        if self._index_dates:
1698            ix = self._index
1699            d = ix[0]
1700            sample = ['%s' % d]
1701            d = ix[-1]
1702            sample += ['- ' + '%s' % d]
1703        else:
1704            sample = [str(0), ' - ' + str(self.nobs)]
1705
1706        # Standardize the model name as a list of str
1707        model_name = self._model_name
1708
1709        # - Top summary table ------------------------------------------------
1710        top_left = []
1711        top_left.append(('Model:', [model_name[0]]))
1712        for i in range(1, len(model_name)):
1713            top_left.append(('', ['+ ' + model_name[i]]))
1714        top_left += [
1715            ('Sample:', [sample[0]]),
1716            ('', [sample[1]])]
1717
1718        top_right = []
1719        if self.k_endog_Q > 0:
1720            top_right += [
1721                ('# of monthly variables:', [self.k_endog_M]),
1722                ('# of quarterly variables:', [self.k_endog_Q])]
1723        else:
1724            top_right += [('# of observed variables:', [self.k_endog])]
1725        if self.k_factor_blocks == 1:
1726            top_right += [('# of factors:', [self.k_factors])]
1727        else:
1728            top_right += [('# of factor blocks:', [self.k_factor_blocks])]
1729        top_right += [('Idiosyncratic disturbances:',
1730                       ['AR(1)' if self.idiosyncratic_ar1 else 'iid']),
1731                      ('Standardize variables:', [self.standardize])]
1732
1733        summary = Summary()
1734        self.model = self
1735        summary.add_table_2cols(self, gleft=top_left, gright=top_right,
1736                                title=title)
1737        table_ix = 1
1738        del self.model
1739
1740        # - Endog / factor map -----------------------------------------------
1741        data = self.endog_factor_map.replace({True: 'X', False: ''})
1742        data.index = endog_names
1743        for name, col in data.iteritems():
1744            data[name] = data[name] + (' ' * (len(name) // 2))
1745        data.index.name = 'Dep. variable'
1746        data = data.reset_index()
1747
1748        params_data = data.values
1749        params_header = data.columns.map(str).tolist()
1750        params_stubs = None
1751
1752        title = 'Observed variables / factor loadings'
1753        table = SimpleTable(
1754            params_data, params_header, params_stubs,
1755            txt_fmt=fmt_params, title=title)
1756
1757        summary.tables.insert(table_ix, table)
1758        table_ix += 1
1759
1760        # - Factor blocks summary table --------------------------------------
1761        data = self.factor_block_orders.reset_index()
1762        data['block'] = data['block'].map(
1763            lambda factor_names: ', '.join(factor_names))
1764        data[['order']] = (
1765            data[['order']].applymap(str))
1766
1767        params_data = data.values
1768        params_header = data.columns.map(str).tolist()
1769        params_stubs = None
1770
1771        title = 'Factor blocks:'
1772        table = SimpleTable(
1773            params_data, params_header, params_stubs,
1774            txt_fmt=fmt_params, title=title)
1775
1776        summary.tables.insert(table_ix, table)
1777        table_ix += 1
1778
1779        return summary
1780
1781    def __str__(self):
1782        """Summary tables showing model specification."""
1783        return str(self.summary())
1784
1785    @property
1786    def state_names(self):
1787        """(list of str) List of human readable names for unobserved states."""
1788        # Factors
1789        state_names = []
1790        for block in self._s.factor_blocks:
1791            state_names += [f'{name}' for name in block.factor_names[:]]
1792            for s in range(1, block._factor_order):
1793                state_names += [f'L{s}.{name}'
1794                                for name in block.factor_names]
1795
1796        # Monthly error
1797        endog_names = self._get_endog_names()
1798        if self.idiosyncratic_ar1:
1799            endog_names_M = endog_names[self._o['M']]
1800            state_names += [f'eps_M.{name}' for name in endog_names_M]
1801        endog_names_Q = endog_names[self._o['Q']]
1802
1803        # Quarterly error
1804        state_names += [f'eps_Q.{name}' for name in endog_names_Q]
1805        for s in range(1, 5):
1806            state_names += [f'L{s}.eps_Q.{name}' for name in endog_names_Q]
1807        return state_names
1808
1809    @property
1810    def param_names(self):
1811        """(list of str) List of human readable parameter names."""
1812        param_names = []
1813        # Loadings
1814        # So that Lambda = params[ix].reshape(self.k_endog, self.k_factors)
1815        # (where Lambda stacks Lambda_M and Lambda_Q)
1816        endog_names = self._get_endog_names(as_string=False)
1817        for endog_name in endog_names:
1818            for block in self._s.factor_blocks:
1819                for factor_name in block.factor_names:
1820                    if self.endog_factor_map.loc[endog_name, factor_name]:
1821                        param_names.append(
1822                            f'loading.{factor_name}->{endog_name}')
1823
1824        # Factor VAR
1825        for block in self._s.factor_blocks:
1826            for to_factor in block.factor_names:
1827                param_names += [f'L{i}.{from_factor}->{to_factor}'
1828                                for i in range(1, block.factor_order + 1)
1829                                for from_factor in block.factor_names]
1830
1831        # Factor covariance
1832        for i in range(len(self._s.factor_blocks)):
1833            block = self._s.factor_blocks[i]
1834            param_names += [f'fb({i}).cov.chol[{j + 1},{k + 1}]'
1835                            for j in range(block.k_factors)
1836                            for k in range(j + 1)]
1837
1838        # Error AR(1)
1839        if self.idiosyncratic_ar1:
1840            endog_names_M = endog_names[self._o['M']]
1841            param_names += [f'L1.eps_M.{name}' for name in endog_names_M]
1842
1843            endog_names_Q = endog_names[self._o['Q']]
1844            param_names += [f'L1.eps_Q.{name}' for name in endog_names_Q]
1845
1846        # Error innovation variances
1847        param_names += [f'sigma2.{name}' for name in endog_names]
1848
1849        return param_names
1850
1851    @property
1852    def start_params(self):
1853        """(array) Starting parameters for maximum likelihood estimation."""
1854        params = np.zeros(self.k_params, dtype=np.float64)
1855
1856        # (1) estimate factors one at a time, where the first step uses
1857        # PCA on all `endog` variables that load on the first factor, and
1858        # subsequent steps use residuals from the previous steps.
1859        # TODO: what about factors that only load on quarterly variables?
1860        endog_factor_map_M = self.endog_factor_map.iloc[:self.k_endog_M]
1861        factors = []
1862        endog = (pd.DataFrame(self.endog).interpolate()
1863                                         .fillna(method='backfill')
1864                                         .values)
1865        for name in self.factor_names:
1866            # Try to retrieve this from monthly variables, which is most
1867            # consistent
1868            endog_ix = np.where(endog_factor_map_M.loc[:, name])[0]
1869            # But fall back to quarterly if necessary
1870            if len(endog_ix) == 0:
1871                endog_ix = np.where(self.endog_factor_map.loc[:, name])[0]
1872            factor_endog = endog[:, endog_ix]
1873
1874            res_pca = PCA(factor_endog, ncomp=1, method='eig', normalize=False)
1875            factors.append(res_pca.factors)
1876            endog[:, endog_ix] -= res_pca.projection
1877        factors = np.concatenate(factors, axis=1)
1878
1879        # (2) Estimate coefficients for each endog, one at a time (OLS for
1880        # monthly variables, restricted OLS for quarterly). Also, compute
1881        # residuals.
1882        loadings = []
1883        resid = []
1884        for i in range(self.k_endog_M):
1885            factor_ix = self._s.endog_factor_iloc[i]
1886            factor_exog = factors[:, factor_ix]
1887            mod_ols = OLS(self.endog[:, i], exog=factor_exog, missing='drop')
1888            res_ols = mod_ols.fit()
1889            loadings += res_ols.params.tolist()
1890            resid.append(res_ols.resid)
1891        for i in range(self.k_endog_M, self.k_endog):
1892            factor_ix = self._s.endog_factor_iloc[i]
1893            factor_exog = lagmat(factors[:, factor_ix], 4, original='in')
1894            mod_glm = GLM(self.endog[:, i], factor_exog, missing='drop')
1895            res_glm = mod_glm.fit_constrained(self.loading_constraints(i))
1896            loadings += res_glm.params[:len(factor_ix)].tolist()
1897            resid.append(res_glm.resid_response)
1898        params[self._p['loadings']] = loadings
1899
1900        # (3) For each factor block, use an AR or VAR model to get coefficients
1901        # and covariance estimate
1902        # Factor transitions
1903        stationary = True
1904
1905        factor_ar = []
1906        factor_cov = []
1907        i = 0
1908        for block in self._s.factor_blocks:
1909            factors_endog = factors[:, i:i + block.k_factors]
1910            i += block.k_factors
1911
1912            if block.factor_order == 0:
1913                continue
1914
1915            if block.k_factors == 1:
1916                mod_factors = SARIMAX(factors_endog,
1917                                      order=(block.factor_order, 0, 0))
1918                sp = mod_factors.start_params
1919                block_factor_ar = sp[:-1]
1920                block_factor_cov = sp[-1:]
1921
1922                coefficient_matrices = mod_factors.start_params[:-1]
1923            elif block.k_factors > 1:
1924                mod_factors = VAR(factors_endog)
1925                res_factors = mod_factors.fit(
1926                    maxlags=block.factor_order, ic=None, trend='n')
1927
1928                block_factor_ar = res_factors.params.T.ravel()
1929                L = np.linalg.cholesky(res_factors.sigma_u)
1930                block_factor_cov = L[np.tril_indices_from(L)]
1931
1932                coefficient_matrices = np.transpose(
1933                    np.reshape(block_factor_ar,
1934                               (block.k_factors, block.k_factors,
1935                                block.factor_order)), (2, 0, 1))
1936
1937            # Test for stationarity
1938            stationary = is_invertible([1] + list(-coefficient_matrices))
1939
1940            # Check for stationarity
1941            if not stationary:
1942                warn('Non-stationary starting factor autoregressive'
1943                     ' parameters found for factor block'
1944                     f' {block.factor_names}. Using zeros as starting'
1945                     ' parameters.')
1946                block_factor_ar[:] = 0
1947                cov_factor = np.diag(factors_endog.std(axis=0))
1948                block_factor_cov = (
1949                    cov_factor[np.tril_indices(block.k_factors)])
1950            factor_ar += block_factor_ar.tolist()
1951            factor_cov += block_factor_cov.tolist()
1952        params[self._p['factor_ar']] = factor_ar
1953        params[self._p['factor_cov']] = factor_cov
1954
1955        # (4) Use residuals from step (2) to estimate the idiosyncratic
1956        # component
1957        # Idiosyncratic component
1958        if self.idiosyncratic_ar1:
1959            idio_ar1 = []
1960            idio_var = []
1961            for i in range(self.k_endog_M):
1962                mod_idio = SARIMAX(resid[i], order=(1, 0, 0), trend='c')
1963                sp = mod_idio.start_params
1964                idio_ar1.append(np.clip(sp[1], -0.99, 0.99))
1965                idio_var.append(np.clip(sp[-1], 1e-5, np.inf))
1966            for i in range(self.k_endog_M, self.k_endog):
1967                y = self.endog[:, i].copy()
1968                y[~np.isnan(y)] = resid[i]
1969                mod_idio = QuarterlyAR1(y)
1970                res_idio = mod_idio.fit(maxiter=10, return_params=True,
1971                                        disp=False)
1972                res_idio = mod_idio.fit_em(res_idio, maxiter=5,
1973                                           return_params=True)
1974                idio_ar1.append(np.clip(res_idio[0], -0.99, 0.99))
1975                idio_var.append(np.clip(res_idio[1], 1e-5, np.inf))
1976            params[self._p['idiosyncratic_ar1']] = idio_ar1
1977            params[self._p['idiosyncratic_var']] = idio_var
1978        else:
1979            idio_var = [np.var(resid[i]) for i in range(self.k_endog_M)]
1980            for i in range(self.k_endog_M, self.k_endog):
1981                y = self.endog[:, i].copy()
1982                y[~np.isnan(y)] = resid[i]
1983                mod_idio = QuarterlyAR1(y)
1984                res_idio = mod_idio.fit(return_params=True, disp=False)
1985                idio_var.append(np.clip(res_idio[1], 1e-5, np.inf))
1986            params[self._p['idiosyncratic_var']] = idio_var
1987
1988        return params
1989
1990    def transform_params(self, unconstrained):
1991        """
1992        Transform parameters from optimizer space to model space.
1993
1994        Transform unconstrained parameters used by the optimizer to constrained
1995        parameters used in likelihood evaluation.
1996
1997        Parameters
1998        ----------
1999        unconstrained : array_like
2000            Array of unconstrained parameters used by the optimizer, to be
2001            transformed.
2002
2003        Returns
2004        -------
2005        constrained : array_like
2006            Array of constrained parameters which may be used in likelihood
2007            evaluation.
2008        """
2009        constrained = unconstrained.copy()
2010
2011        # Stationary factor VAR
2012        unconstrained_factor_ar = unconstrained[self._p['factor_ar']]
2013        constrained_factor_ar = []
2014        i = 0
2015        for block in self._s.factor_blocks:
2016            length = block.k_factors**2 * block.factor_order
2017            tmp_coeff = np.reshape(
2018                unconstrained_factor_ar[i:i + length],
2019                (block.k_factors, block.k_factors * block.factor_order))
2020            tmp_cov = np.eye(block.k_factors)
2021            tmp_coeff, _ = constrain_stationary_multivariate(tmp_coeff,
2022                                                             tmp_cov)
2023            constrained_factor_ar += tmp_coeff.ravel().tolist()
2024            i += length
2025        constrained[self._p['factor_ar']] = constrained_factor_ar
2026
2027        # Stationary idiosyncratic AR(1)
2028        if self.idiosyncratic_ar1:
2029            idio_ar1 = unconstrained[self._p['idiosyncratic_ar1']]
2030            constrained[self._p['idiosyncratic_ar1']] = [
2031                constrain_stationary_univariate(idio_ar1[i:i + 1])[0]
2032                for i in range(self.k_endog)]
2033
2034        # Positive idiosyncratic variances
2035        constrained[self._p['idiosyncratic_var']] = (
2036            constrained[self._p['idiosyncratic_var']]**2)
2037
2038        return constrained
2039
2040    def untransform_params(self, constrained):
2041        """
2042        Transform parameters from model space to optimizer space.
2043
2044        Transform constrained parameters used in likelihood evaluation
2045        to unconstrained parameters used by the optimizer.
2046
2047        Parameters
2048        ----------
2049        constrained : array_like
2050            Array of constrained parameters used in likelihood evaluation, to
2051            be transformed.
2052
2053        Returns
2054        -------
2055        unconstrained : array_like
2056            Array of unconstrained parameters used by the optimizer.
2057        """
2058        unconstrained = constrained.copy()
2059
2060        # Stationary factor VAR
2061        constrained_factor_ar = constrained[self._p['factor_ar']]
2062        unconstrained_factor_ar = []
2063        i = 0
2064        for block in self._s.factor_blocks:
2065            length = block.k_factors**2 * block.factor_order
2066            tmp_coeff = np.reshape(
2067                constrained_factor_ar[i:i + length],
2068                (block.k_factors, block.k_factors * block.factor_order))
2069            tmp_cov = np.eye(block.k_factors)
2070            tmp_coeff, _ = unconstrain_stationary_multivariate(tmp_coeff,
2071                                                               tmp_cov)
2072            unconstrained_factor_ar += tmp_coeff.ravel().tolist()
2073            i += length
2074        unconstrained[self._p['factor_ar']] = unconstrained_factor_ar
2075
2076        # Stationary idiosyncratic AR(1)
2077        if self.idiosyncratic_ar1:
2078            idio_ar1 = constrained[self._p['idiosyncratic_ar1']]
2079            unconstrained[self._p['idiosyncratic_ar1']] = [
2080                unconstrain_stationary_univariate(idio_ar1[i:i + 1])[0]
2081                for i in range(self.k_endog)]
2082
2083        # Positive idiosyncratic variances
2084        unconstrained[self._p['idiosyncratic_var']] = (
2085            unconstrained[self._p['idiosyncratic_var']]**0.5)
2086
2087        return unconstrained
2088
2089    def update(self, params, **kwargs):
2090        """
2091        Update the parameters of the model.
2092
2093        Parameters
2094        ----------
2095        params : array_like
2096            Array of new parameters.
2097        transformed : bool, optional
2098            Whether or not `params` is already transformed. If set to False,
2099            `transform_params` is called. Default is True.
2100
2101        """
2102        params = super().update(params, **kwargs)
2103
2104        # Local copies
2105        o = self._o
2106        s = self._s
2107        p = self._p
2108
2109        # Loadings
2110        loadings = params[p['loadings']]
2111        start = 0
2112        for i in range(self.k_endog_M):
2113            iloc = self._s.endog_factor_iloc[i]
2114            k_factors = len(iloc)
2115            factor_ix = s['factors_L1'][iloc]
2116            self['design', i, factor_ix] = loadings[start:start + k_factors]
2117            start += k_factors
2118        multipliers = np.array([1, 2, 3, 2, 1])[:, None]
2119        for i in range(self.k_endog_M, self.k_endog):
2120            iloc = self._s.endog_factor_iloc[i]
2121            k_factors = len(iloc)
2122            factor_ix = s['factors_L1_5_ix'][:, iloc]
2123            self['design', i, factor_ix.ravel()] = np.ravel(
2124                loadings[start:start + k_factors] * multipliers)
2125            start += k_factors
2126
2127        # Factor VAR
2128        factor_ar = params[p['factor_ar']]
2129        start = 0
2130        for block in s.factor_blocks:
2131            k_params = block.k_factors**2 * block.factor_order
2132            A = np.reshape(
2133                factor_ar[start:start + k_params],
2134                (block.k_factors, block.k_factors * block.factor_order))
2135            start += k_params
2136            self['transition', block['factors_L1'], block['factors_ar']] = A
2137
2138        # Factor covariance
2139        factor_cov = params[p['factor_cov']]
2140        start = 0
2141        ix1 = 0
2142        for block in s.factor_blocks:
2143            k_params = block.k_factors * (block.k_factors + 1) // 2
2144            L = np.zeros((block.k_factors, block.k_factors),
2145                         dtype=params.dtype)
2146            L[np.tril_indices_from(L)] = factor_cov[start:start + k_params]
2147            start += k_params
2148            Q = L @ L.T
2149            ix2 = ix1 + block.k_factors
2150            self['state_cov', ix1:ix2, ix1:ix2] = Q
2151            ix1 = ix2
2152
2153        # Error AR(1)
2154        if self.idiosyncratic_ar1:
2155            alpha = np.diag(params[p['idiosyncratic_ar1']])
2156            self['transition', s['idio_ar_L1'], s['idio_ar_L1']] = alpha
2157
2158        # Error variances
2159        if self.idiosyncratic_ar1:
2160            self['state_cov', self.k_factors:, self.k_factors:] = (
2161                np.diag(params[p['idiosyncratic_var']]))
2162        else:
2163            idio_var = params[p['idiosyncratic_var']]
2164            self['obs_cov', o['M'], o['M']] = np.diag(idio_var[o['M']])
2165            self['state_cov', self.k_factors:, self.k_factors:] = (
2166                np.diag(idio_var[o['Q']]))
2167
2168    @property
2169    def loglike_constant(self):
2170        """
2171        Constant term in the joint log-likelihood function.
2172
2173        Useful in facilitating comparisons to other packages that exclude the
2174        constant from the log-likelihood computation.
2175        """
2176        return -0.5 * (1 - np.isnan(self.endog)).sum() * np.log(2 * np.pi)
2177
2178    def loading_constraints(self, i):
2179        r"""
2180        Matrix formulation of quarterly variables' factor loading constraints.
2181
2182        Parameters
2183        ----------
2184        i : int
2185            Index of the `endog` variable to compute constraints for.
2186
2187        Returns
2188        -------
2189        R : array (k_constraints, k_factors * 5)
2190        q : array (k_constraints,)
2191
2192        Notes
2193        -----
2194        If the factors were known, then the factor loadings for the ith
2195        quarterly variable would be computed by a linear regression of the form
2196
2197        y_i = A_i' f + B_i' L1.f + C_i' L2.f + D_i' L3.f + E_i' L4.f
2198
2199        where:
2200
2201        - f is (k_i x 1) and collects all of the factors that load on y_i
2202        - L{j}.f is (k_i x 1) and collects the jth lag of each factor
2203        - A_i, ..., E_i are (k_i x 1) and collect factor loadings
2204
2205        As the observed variable is quarterly while the factors are monthly, we
2206        want to restrict the estimated regression coefficients to be:
2207
2208        y_i = A_i f + 2 A_i L1.f + 3 A_i L2.f + 2 A_i L3.f + A_i L4.f
2209
2210        Stack the unconstrained coefficients: \Lambda_i = [A_i' B_i' ... E_i']'
2211
2212        Then the constraints can be written as follows, for l = 1, ..., k_i
2213
2214        - 2 A_{i,l} - B_{i,l} = 0
2215        - 3 A_{i,l} - C_{i,l} = 0
2216        - 2 A_{i,l} - D_{i,l} = 0
2217        - A_{i,l} - E_{i,l} = 0
2218
2219        So that k_constraints = 4 * k_i. In matrix form the constraints are:
2220
2221        .. math::
2222
2223            R \Lambda_i = q
2224
2225        where :math:`\Lambda_i` is shaped `(k_i * 5,)`, :math:`R` is shaped
2226        `(k_constraints, k_i * 5)`, and :math:`q` is shaped `(k_constraints,)`.
2227
2228
2229        For example, for the case that k_i = 2, we can write:
2230
2231        |  2 0   -1  0    0  0    0  0    0  0  |   | A_{i,1} |     | 0 |
2232        |  0 2    0 -1    0  0    0  0    0  0  |   | A_{i,2} |     | 0 |
2233        |  3 0    0  0   -1  0    0  0    0  0  |   | B_{i,1} |     | 0 |
2234        |  0 3    0  0    0 -1    0  0    0  0  |   | B_{i,2} |     | 0 |
2235        |  2 0    0  0    0  0   -1  0    0  0  |   | C_{i,1} |  =  | 0 |
2236        |  0 2    0  0    0  0    0 -1    0  0  |   | C_{i,2} |     | 0 |
2237        |  1 0    0  0    0  0    0  0   -1  0  |   | D_{i,1} |     | 0 |
2238        |  0 1    0  0    0  0    0  0    0 -1  |   | D_{i,2} |     | 0 |
2239                                                    | E_{i,1} |     | 0 |
2240                                                    | E_{i,2} |     | 0 |
2241
2242        """
2243        if i < self.k_endog_M:
2244            raise ValueError('No constraints for monthly variables.')
2245        if i not in self._loading_constraints:
2246            k_factors = self.endog_factor_map.iloc[i].sum()
2247
2248            R = np.zeros((k_factors * 4, k_factors * 5))
2249            q = np.zeros(R.shape[0])
2250
2251            # Let R = [R_1 R_2]
2252            # Then R_1 is multiples of the identity matrix
2253            multipliers = np.array([1, 2, 3, 2, 1])
2254            R[:, :k_factors] = np.reshape(
2255                (multipliers[1:] * np.eye(k_factors)[..., None]).T,
2256                (k_factors * 4, k_factors))
2257
2258            # And R_2 is the identity
2259            R[:, k_factors:] = np.diag([-1] * (k_factors * 4))
2260
2261            self._loading_constraints[i] = (R, q)
2262        return self._loading_constraints[i]
2263
2264    def fit(self, start_params=None, transformed=True, includes_fixed=False,
2265            cov_type='none', cov_kwds=None, method='em', maxiter=500,
2266            tolerance=1e-6, em_initialization=True, mstep_method=None,
2267            full_output=1, disp=False, callback=None, return_params=False,
2268            optim_score=None, optim_complex_step=None, optim_hessian=None,
2269            flags=None, low_memory=False, llf_decrease_action='revert',
2270            llf_decrease_tolerance=1e-4, **kwargs):
2271        """
2272        Fits the model by maximum likelihood via Kalman filter.
2273
2274        Parameters
2275        ----------
2276        start_params : array_like, optional
2277            Initial guess of the solution for the loglikelihood maximization.
2278            If None, the default is given by Model.start_params.
2279        transformed : bool, optional
2280            Whether or not `start_params` is already transformed. Default is
2281            True.
2282        includes_fixed : bool, optional
2283            If parameters were previously fixed with the `fix_params` method,
2284            this argument describes whether or not `start_params` also includes
2285            the fixed parameters, in addition to the free parameters. Default
2286            is False.
2287        cov_type : str, optional
2288            The `cov_type` keyword governs the method for calculating the
2289            covariance matrix of parameter estimates. Can be one of:
2290
2291            - 'opg' for the outer product of gradient estimator
2292            - 'oim' for the observed information matrix estimator, calculated
2293              using the method of Harvey (1989)
2294            - 'approx' for the observed information matrix estimator,
2295              calculated using a numerical approximation of the Hessian matrix.
2296            - 'robust' for an approximate (quasi-maximum likelihood) covariance
2297              matrix that may be valid even in the presence of some
2298              misspecifications. Intermediate calculations use the 'oim'
2299              method.
2300            - 'robust_approx' is the same as 'robust' except that the
2301              intermediate calculations use the 'approx' method.
2302            - 'none' for no covariance matrix calculation.
2303
2304            Default is 'none', since computing this matrix can be very slow
2305            when there are a large number of parameters.
2306        cov_kwds : dict or None, optional
2307            A dictionary of arguments affecting covariance matrix computation.
2308
2309            **opg, oim, approx, robust, robust_approx**
2310
2311            - 'approx_complex_step' : bool, optional - If True, numerical
2312              approximations are computed using complex-step methods. If False,
2313              numerical approximations are computed using finite difference
2314              methods. Default is True.
2315            - 'approx_centered' : bool, optional - If True, numerical
2316              approximations computed using finite difference methods use a
2317              centered approximation. Default is False.
2318        method : str, optional
2319            The `method` determines which solver from `scipy.optimize`
2320            is used, and it can be chosen from among the following strings:
2321
2322            - 'em' for the EM algorithm
2323            - 'newton' for Newton-Raphson
2324            - 'nm' for Nelder-Mead
2325            - 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS)
2326            - 'lbfgs' for limited-memory BFGS with optional box constraints
2327            - 'powell' for modified Powell's method
2328            - 'cg' for conjugate gradient
2329            - 'ncg' for Newton-conjugate gradient
2330            - 'basinhopping' for global basin-hopping solver
2331
2332            The explicit arguments in `fit` are passed to the solver,
2333            with the exception of the basin-hopping solver. Each
2334            solver has several optional arguments that are not the same across
2335            solvers. See the notes section below (or scipy.optimize) for the
2336            available arguments and for the list of explicit arguments that the
2337            basin-hopping solver supports.
2338        maxiter : int, optional
2339            The maximum number of iterations to perform.
2340        tolerance : float, optional
2341            Tolerance to use for convergence checking when using the EM
2342            algorithm. To set the tolerance for other methods, pass
2343            the optimizer-specific keyword argument(s).
2344        full_output : bool, optional
2345            Set to True to have all available output in the Results object's
2346            mle_retvals attribute. The output is dependent on the solver.
2347            See LikelihoodModelResults notes section for more information.
2348        disp : bool, optional
2349            Set to True to print convergence messages.
2350        callback : callable callback(xk), optional
2351            Called after each iteration, as callback(xk), where xk is the
2352            current parameter vector.
2353        return_params : bool, optional
2354            Whether or not to return only the array of maximizing parameters.
2355            Default is False.
2356        optim_score : {'harvey', 'approx'} or None, optional
2357            The method by which the score vector is calculated. 'harvey' uses
2358            the method from Harvey (1989), 'approx' uses either finite
2359            difference or complex step differentiation depending upon the
2360            value of `optim_complex_step`, and None uses the built-in gradient
2361            approximation of the optimizer. Default is None. This keyword is
2362            only relevant if the optimization method uses the score.
2363        optim_complex_step : bool, optional
2364            Whether or not to use complex step differentiation when
2365            approximating the score; if False, finite difference approximation
2366            is used. Default is True. This keyword is only relevant if
2367            `optim_score` is set to 'harvey' or 'approx'.
2368        optim_hessian : {'opg','oim','approx'}, optional
2369            The method by which the Hessian is numerically approximated. 'opg'
2370            uses outer product of gradients, 'oim' uses the information
2371            matrix formula from Harvey (1989), and 'approx' uses numerical
2372            approximation. This keyword is only relevant if the
2373            optimization method uses the Hessian matrix.
2374        low_memory : bool, optional
2375            If set to True, techniques are applied to substantially reduce
2376            memory usage. If used, some features of the results object will
2377            not be available (including smoothed results and in-sample
2378            prediction), although out-of-sample forecasting is possible.
2379            Note that this option is not available when using the EM algorithm
2380            (which is the default for this model). Default is False.
2381        llf_decrease_action : {'ignore', 'warn', 'revert'}, optional
2382            Action to take if the log-likelihood decreases in an EM iteration.
2383            'ignore' continues the iterations, 'warn' issues a warning but
2384            continues the iterations, while 'revert' ends the iterations and
2385            returns the result from the last good iteration. Default is 'warn'.
2386        llf_decrease_tolerance : float, optional
2387            Minimum size of the log-likelihood decrease required to trigger a
2388            warning or to end the EM iterations. Setting this value slightly
2389            larger than zero allows small decreases in the log-likelihood that
2390            may be caused by numerical issues. If set to zero, then any
2391            decrease will trigger the `llf_decrease_action`. Default is 1e-4.
2392        **kwargs
2393            Additional keyword arguments to pass to the optimizer.
2394
2395        Returns
2396        -------
2397        MLEResults
2398
2399        See Also
2400        --------
2401        statsmodels.base.model.LikelihoodModel.fit
2402        statsmodels.tsa.statespace.mlemodel.MLEResults
2403        """
2404        if method == 'em':
2405            return self.fit_em(
2406                start_params=start_params, transformed=transformed,
2407                cov_type=cov_type, cov_kwds=cov_kwds, maxiter=maxiter,
2408                tolerance=tolerance, em_initialization=em_initialization,
2409                mstep_method=mstep_method, full_output=full_output, disp=disp,
2410                return_params=return_params, low_memory=low_memory,
2411                llf_decrease_action=llf_decrease_action,
2412                llf_decrease_tolerance=llf_decrease_tolerance, **kwargs)
2413        else:
2414            return super().fit(
2415                start_params=start_params, transformed=transformed,
2416                includes_fixed=includes_fixed, cov_type=cov_type,
2417                cov_kwds=cov_kwds, method=method, maxiter=maxiter,
2418                full_output=full_output, disp=disp,
2419                callback=callback, return_params=return_params,
2420                optim_score=optim_score,
2421                optim_complex_step=optim_complex_step,
2422                optim_hessian=optim_hessian, flags=flags,
2423                low_memory=low_memory, **kwargs)
2424
2425    def fit_em(self, start_params=None, transformed=True, cov_type='none',
2426               cov_kwds=None, maxiter=500, tolerance=1e-6, disp=False,
2427               em_initialization=True, mstep_method=None, full_output=True,
2428               return_params=False, low_memory=False,
2429               llf_decrease_action='revert', llf_decrease_tolerance=1e-4):
2430        """
2431        Fits the model by maximum likelihood via the EM algorithm.
2432
2433        Parameters
2434        ----------
2435        start_params : array_like, optional
2436            Initial guess of the solution for the loglikelihood maximization.
2437            The default is to use `DynamicFactorMQ.start_params`.
2438        transformed : bool, optional
2439            Whether or not `start_params` is already transformed. Default is
2440            True.
2441        cov_type : str, optional
2442            The `cov_type` keyword governs the method for calculating the
2443            covariance matrix of parameter estimates. Can be one of:
2444
2445            - 'opg' for the outer product of gradient estimator
2446            - 'oim' for the observed information matrix estimator, calculated
2447              using the method of Harvey (1989)
2448            - 'approx' for the observed information matrix estimator,
2449              calculated using a numerical approximation of the Hessian matrix.
2450            - 'robust' for an approximate (quasi-maximum likelihood) covariance
2451              matrix that may be valid even in the presence of some
2452              misspecifications. Intermediate calculations use the 'oim'
2453              method.
2454            - 'robust_approx' is the same as 'robust' except that the
2455              intermediate calculations use the 'approx' method.
2456            - 'none' for no covariance matrix calculation.
2457
2458            Default is 'none', since computing this matrix can be very slow
2459            when there are a large number of parameters.
2460        cov_kwds : dict or None, optional
2461            A dictionary of arguments affecting covariance matrix computation.
2462
2463            **opg, oim, approx, robust, robust_approx**
2464
2465            - 'approx_complex_step' : bool, optional - If True, numerical
2466              approximations are computed using complex-step methods. If False,
2467              numerical approximations are computed using finite difference
2468              methods. Default is True.
2469            - 'approx_centered' : bool, optional - If True, numerical
2470              approximations computed using finite difference methods use a
2471              centered approximation. Default is False.
2472        maxiter : int, optional
2473            The maximum number of EM iterations to perform.
2474        tolerance : float, optional
2475            Parameter governing convergence of the EM algorithm. The
2476            `tolerance` is the minimum relative increase in the likelihood
2477            for which convergence will be declared. A smaller value for the
2478            `tolerance` will typically yield more precise parameter estimates,
2479            but will typically require more EM iterations. Default is 1e-6.
2480        disp : int or bool, optional
2481            Controls printing of EM iteration progress. If an integer, progress
2482            is printed at every `disp` iterations. A value of True is
2483            interpreted as the value of 1. Default is False (nothing will be
2484            printed).
2485        em_initialization : bool, optional
2486            Whether or not to also update the Kalman filter initialization
2487            using the EM algorithm. Default is True.
2488        mstep_method : {None, 'missing', 'nonmissing'}, optional
2489            The EM algorithm maximization step. If there are no NaN values
2490            in the dataset, this can be set to "nonmissing" (which is slightly
2491            faster) or "missing", otherwise it must be "missing". Default is
2492            "nonmissing" if there are no NaN values or "missing" if there are.
2493        full_output : bool, optional
2494            Set to True to have all available output from EM iterations in
2495            the Results object's mle_retvals attribute.
2496        return_params : bool, optional
2497            Whether or not to return only the array of maximizing parameters.
2498            Default is False.
2499        low_memory : bool, optional
2500            This option cannot be used with the EM algorithm and will raise an
2501            error if set to True. Default is False.
2502        llf_decrease_action : {'ignore', 'warn', 'revert'}, optional
2503            Action to take if the log-likelihood decreases in an EM iteration.
2504            'ignore' continues the iterations, 'warn' issues a warning but
2505            continues the iterations, while 'revert' ends the iterations and
2506            returns the result from the last good iteration. Default is 'warn'.
2507        llf_decrease_tolerance : float, optional
2508            Minimum size of the log-likelihood decrease required to trigger a
2509            warning or to end the EM iterations. Setting this value slightly
2510            larger than zero allows small decreases in the log-likelihood that
2511            may be caused by numerical issues. If set to zero, then any
2512            decrease will trigger the `llf_decrease_action`. Default is 1e-4.
2513
2514        Returns
2515        -------
2516        DynamicFactorMQResults
2517
2518        See Also
2519        --------
2520        statsmodels.tsa.statespace.mlemodel.MLEModel.fit
2521        statsmodels.tsa.statespace.mlemodel.MLEResults
2522        """
2523        if self._has_fixed_params:
2524            raise NotImplementedError('Cannot fit using the EM algorithm while'
2525                                      ' holding some parameters fixed.')
2526        if low_memory:
2527            raise ValueError('Cannot fit using the EM algorithm when using'
2528                             ' low_memory option.')
2529
2530        if start_params is None:
2531            start_params = self.start_params
2532            transformed = True
2533        else:
2534            start_params = np.array(start_params, ndmin=1)
2535
2536        if not transformed:
2537            start_params = self.transform_params(start_params)
2538
2539        llf_decrease_action = string_like(
2540            llf_decrease_action, 'llf_decrease_action',
2541            options=['ignore', 'warn', 'revert'])
2542
2543        disp = int(disp)
2544
2545        # Perform expectation-maximization
2546        s = self._s
2547        llf = []
2548        params = [start_params]
2549        init = None
2550        inits = [self.ssm.initialization]
2551        i = 0
2552        delta = 0
2553        terminate = False
2554        # init_stationary = None if em_initialization else True
2555        while i < maxiter and not terminate and (i < 1 or (delta > tolerance)):
2556            out = self._em_iteration(params[-1], init=init,
2557                                     mstep_method=mstep_method)
2558            new_llf = out[0].llf_obs.sum()
2559
2560            # If we are not using EM initialization, then we need to check for
2561            # non-stationary parameters
2562            if not em_initialization:
2563                self.update(out[1])
2564                switch_init = []
2565                T = self['transition']
2566                init = self.ssm.initialization
2567                iloc = np.arange(self.k_states)
2568
2569                # We may only have global initialization if we have no
2570                # quarterly variables and idiosyncratic_ar1=False
2571                if self.k_endog_Q == 0 and not self.idiosyncratic_ar1:
2572                    block = s.factor_blocks[0]
2573                    if init.initialization_type == 'stationary':
2574                        Tb = T[block['factors'], block['factors']]
2575                        if not np.all(np.linalg.eigvals(Tb) < (1 - 1e-10)):
2576                            init.set(block['factors'], 'diffuse')
2577                            switch_init.append(
2578                                'factor block:'
2579                                f' {tuple(block.factor_names)}')
2580                else:
2581                    # Factor blocks
2582                    for block in s.factor_blocks:
2583                        b = tuple(iloc[block['factors']])
2584                        init_type = init.blocks[b].initialization_type
2585                        if init_type == 'stationary':
2586                            Tb = T[block['factors'], block['factors']]
2587                            if not np.all(np.linalg.eigvals(Tb) < (1 - 1e-10)):
2588                                init.set(block['factors'], 'diffuse')
2589                                switch_init.append(
2590                                    'factor block:'
2591                                    f' {tuple(block.factor_names)}')
2592
2593                if self.idiosyncratic_ar1:
2594                    endog_names = self._get_endog_names(as_string=True)
2595                    # Monthly variables
2596                    for j in range(s['idio_ar_M'].start, s['idio_ar_M'].stop):
2597                        init_type = init.blocks[(j,)].initialization_type
2598                        if init_type == 'stationary':
2599                            if not np.abs(T[j, j]) < (1 - 1e-10):
2600                                init.set(j, 'diffuse')
2601                                name = endog_names[j - s['idio_ar_M'].start]
2602                                switch_init.append(
2603                                    'idiosyncratic AR(1) for monthly'
2604                                    f' variable: {name}')
2605
2606                    # Quarterly variables
2607                    if self.k_endog_Q > 0:
2608                        b = tuple(iloc[s['idio_ar_Q']])
2609                        init_type = init.blocks[b].initialization_type
2610                        if init_type == 'stationary':
2611                            Tb = T[s['idio_ar_Q'], s['idio_ar_Q']]
2612                            if not np.all(np.linalg.eigvals(Tb) < (1 - 1e-10)):
2613                                init.set(s['idio_ar_Q'], 'diffuse')
2614                                switch_init.append(
2615                                    'idiosyncratic AR(1) for the'
2616                                    ' block of quarterly variables')
2617
2618                if len(switch_init) > 0:
2619                    warn('Non-stationary parameters found at EM iteration'
2620                         f' {i + 1}, which is not compatible with'
2621                         ' stationary initialization. Initialization was'
2622                         ' switched to diffuse for the following: '
2623                         f' {switch_init}, and fitting was restarted.')
2624                    results = self.fit_em(
2625                        start_params=params[-1], transformed=transformed,
2626                        cov_type=cov_type, cov_kwds=cov_kwds,
2627                        maxiter=maxiter, tolerance=tolerance,
2628                        em_initialization=em_initialization,
2629                        mstep_method=mstep_method, full_output=full_output,
2630                        disp=disp, return_params=return_params,
2631                        low_memory=low_memory,
2632                        llf_decrease_action=llf_decrease_action,
2633                        llf_decrease_tolerance=llf_decrease_tolerance)
2634                    self.ssm.initialize(self._default_initialization())
2635                    return results
2636
2637            # Check for decrease in the log-likelihood
2638            # Note: allow a little numerical error before declaring a decrease
2639            llf_decrease = (
2640                i > 0 and (new_llf - llf[-1]) < -llf_decrease_tolerance)
2641
2642            if llf_decrease_action == 'revert' and llf_decrease:
2643                warn(f'Log-likelihood decreased at EM iteration {i + 1}.'
2644                     f' Reverting to the results from EM iteration {i}'
2645                     ' (prior to the decrease) and returning the solution.')
2646                # Terminated iteration
2647                i -= 1
2648                terminate = True
2649            else:
2650                if llf_decrease_action == 'warn' and llf_decrease:
2651                    warn(f'Log-likelihood decreased at EM iteration {i + 1},'
2652                         ' which can indicate numerical issues.')
2653                llf.append(new_llf)
2654                params.append(out[1])
2655                if em_initialization:
2656                    init = initialization.Initialization(
2657                        self.k_states, 'known',
2658                        constant=out[0].smoothed_state[..., 0],
2659                        stationary_cov=out[0].smoothed_state_cov[..., 0])
2660                    inits.append(init)
2661                if i > 0:
2662                    delta = (2 * np.abs(llf[-1] - llf[-2]) /
2663                             (np.abs(llf[-1]) + np.abs(llf[-2])))
2664                else:
2665                    delta = np.inf
2666
2667                # If `disp` is not False, display the first iteration
2668                if disp and i == 0:
2669                    print(f'EM start iterations, llf={llf[-1]:.5g}')
2670                # Print output every `disp` observations
2671                elif disp and ((i + 1) % disp) == 0:
2672                    print(f'EM iteration {i + 1}, llf={llf[-1]:.5g},'
2673                          f' convergence criterion={delta:.5g}')
2674
2675            # Advance the iteration counter
2676            i += 1
2677
2678        # Check for convergence
2679        not_converged = (i == maxiter and delta > tolerance)
2680
2681        # If no convergence without explicit termination, warn users
2682        if not_converged:
2683            warn(f'EM reached maximum number of iterations ({maxiter}),'
2684                 f' without achieving convergence: llf={llf[-1]:.5g},'
2685                 f' convergence criterion={delta:.5g}'
2686                 f' (while specified tolerance was {tolerance:.5g})')
2687
2688        # If `disp` is not False, display the final iteration
2689        if disp:
2690            if terminate:
2691                print(f'EM terminated at iteration {i}, llf={llf[-1]:.5g},'
2692                      f' convergence criterion={delta:.5g}'
2693                      f' (while specified tolerance was {tolerance:.5g})')
2694            elif not_converged:
2695                print(f'EM reached maximum number of iterations ({maxiter}),'
2696                      f' without achieving convergence: llf={llf[-1]:.5g},'
2697                      f' convergence criterion={delta:.5g}'
2698                      f' (while specified tolerance was {tolerance:.5g})')
2699            else:
2700                print(f'EM converged at iteration {i}, llf={llf[-1]:.5g},'
2701                      f' convergence criterion={delta:.5g}'
2702                      f' < tolerance={tolerance:.5g}')
2703
2704        # Just return the fitted parameters if requested
2705        if return_params:
2706            result = params[-1]
2707        # Otherwise construct the results class if desired
2708        else:
2709            if em_initialization:
2710                base_init = self.ssm.initialization
2711                self.ssm.initialization = init
2712            # Note that because we are using params[-1], we are actually using
2713            # the results from one additional iteration compared to the
2714            # iteration at which we declared convergence.
2715            result = self.smooth(params[-1], transformed=True,
2716                                 cov_type=cov_type, cov_kwds=cov_kwds)
2717            if em_initialization:
2718                self.ssm.initialization = base_init
2719
2720            # Save the output
2721            if full_output:
2722                llf.append(result.llf)
2723                em_retvals = Bunch(**{'params': np.array(params),
2724                                      'llf': np.array(llf),
2725                                      'iter': i,
2726                                      'inits': inits})
2727                em_settings = Bunch(**{'method': 'em',
2728                                       'tolerance': tolerance,
2729                                       'maxiter': maxiter})
2730            else:
2731                em_retvals = None
2732                em_settings = None
2733
2734            result._results.mle_retvals = em_retvals
2735            result._results.mle_settings = em_settings
2736
2737        return result
2738
2739    def _em_iteration(self, params0, init=None, mstep_method=None):
2740        """EM iteration."""
2741        # (E)xpectation step
2742        res = self._em_expectation_step(params0, init=init)
2743
2744        # (M)aximization step
2745        params1 = self._em_maximization_step(res, params0,
2746                                             mstep_method=mstep_method)
2747
2748        return res, params1
2749
2750    def _em_expectation_step(self, params0, init=None):
2751        """EM expectation step."""
2752        # (E)xpectation step
2753        self.update(params0)
2754        # Re-initialize state, if new initialization is given
2755        if init is not None:
2756            base_init = self.ssm.initialization
2757            self.ssm.initialization = init
2758        # Perform smoothing, only saving what is required
2759        res = self.ssm.smooth(
2760            SMOOTHER_STATE | SMOOTHER_STATE_COV | SMOOTHER_STATE_AUTOCOV,
2761            update_filter=False)
2762        res.llf_obs = np.array(
2763            self.ssm._kalman_filter.loglikelihood, copy=True)
2764        # Reset initialization
2765        if init is not None:
2766            self.ssm.initialization = base_init
2767
2768        return res
2769
2770    def _em_maximization_step(self, res, params0, mstep_method=None):
2771        """EM maximization step."""
2772        s = self._s
2773
2774        a = res.smoothed_state.T[..., None]
2775        cov_a = res.smoothed_state_cov.transpose(2, 0, 1)
2776        acov_a = res.smoothed_state_autocov.transpose(2, 0, 1)
2777
2778        # E[a_t a_t'], t = 0, ..., T
2779        Eaa = cov_a.copy() + np.matmul(a, a.transpose(0, 2, 1))
2780        # E[a_t a_{t-1}'], t = 1, ..., T
2781        Eaa1 = acov_a[:-1] + np.matmul(a[1:], a[:-1].transpose(0, 2, 1))
2782
2783        # Observation equation
2784        has_missing = np.any(res.nmissing)
2785        if mstep_method is None:
2786            mstep_method = 'missing' if has_missing else 'nonmissing'
2787        mstep_method = mstep_method.lower()
2788        if mstep_method == 'nonmissing' and has_missing:
2789            raise ValueError('Cannot use EM algorithm option'
2790                             ' `mstep_method="nonmissing"` with missing data.')
2791
2792        if mstep_method == 'nonmissing':
2793            func = self._em_maximization_obs_nonmissing
2794        elif mstep_method == 'missing':
2795            func = self._em_maximization_obs_missing
2796        else:
2797            raise ValueError('Invalid maximization step method: "%s".'
2798                             % mstep_method)
2799        # TODO: compute H is pretty slow
2800        Lambda, H = func(res, Eaa, a, compute_H=(not self.idiosyncratic_ar1))
2801
2802        # Factor VAR and covariance
2803        factor_ar = []
2804        factor_cov = []
2805        for b in s.factor_blocks:
2806            A = Eaa[:-1, b['factors_ar'], b['factors_ar']].sum(axis=0)
2807            B = Eaa1[:, b['factors_L1'], b['factors_ar']].sum(axis=0)
2808            C = Eaa[1:, b['factors_L1'], b['factors_L1']].sum(axis=0)
2809            nobs = Eaa.shape[0] - 1
2810
2811            # want: x = B A^{-1}, so solve: x A = B or solve: A' x' = B'
2812            try:
2813                f_A = cho_solve(cho_factor(A), B.T).T
2814            except LinAlgError:
2815                # Fall back to general solver if there are problems with
2816                # postive-definiteness
2817                f_A = np.linalg.solve(A, B.T).T
2818
2819            f_Q = (C - f_A @ B.T) / nobs
2820            factor_ar += f_A.ravel().tolist()
2821            factor_cov += (
2822                np.linalg.cholesky(f_Q)[np.tril_indices_from(f_Q)].tolist())
2823
2824        # Idiosyncratic AR(1) and variances
2825        if self.idiosyncratic_ar1:
2826            ix = s['idio_ar_L1']
2827
2828            Ad = Eaa[:-1, ix, ix].sum(axis=0).diagonal()
2829            Bd = Eaa1[:, ix, ix].sum(axis=0).diagonal()
2830            Cd = Eaa[1:, ix, ix].sum(axis=0).diagonal()
2831            nobs = Eaa.shape[0] - 1
2832
2833            alpha = Bd / Ad
2834            sigma2 = (Cd - alpha * Bd) / nobs
2835        else:
2836            ix = s['idio_ar_L1']
2837            C = Eaa[:, ix, ix].sum(axis=0)
2838            sigma2 = np.r_[H.diagonal()[self._o['M']],
2839                           C.diagonal() / Eaa.shape[0]]
2840
2841        # Save parameters
2842        params1 = np.zeros_like(params0)
2843        loadings = []
2844        for i in range(self.k_endog):
2845            iloc = self._s.endog_factor_iloc[i]
2846            factor_ix = s['factors_L1'][iloc]
2847            loadings += Lambda[i, factor_ix].tolist()
2848        params1[self._p['loadings']] = loadings
2849        params1[self._p['factor_ar']] = factor_ar
2850        params1[self._p['factor_cov']] = factor_cov
2851        if self.idiosyncratic_ar1:
2852            params1[self._p['idiosyncratic_ar1']] = alpha
2853        params1[self._p['idiosyncratic_var']] = sigma2
2854
2855        return params1
2856
2857    def _em_maximization_obs_nonmissing(self, res, Eaa, a, compute_H=False):
2858        """EM maximization step, observation equation without missing data."""
2859        s = self._s
2860        dtype = Eaa.dtype
2861
2862        # Observation equation (non-missing)
2863        # Note: we only compute loadings for monthly variables because
2864        # quarterly variables will always have missing entries, so we would
2865        # never choose this method in that case
2866        k = s.k_states_factors
2867        Lambda = np.zeros((self.k_endog, k), dtype=dtype)
2868        for i in range(self.k_endog):
2869            y = self.endog[:, i:i + 1]
2870            iloc = self._s.endog_factor_iloc[i]
2871            factor_ix = s['factors_L1'][iloc]
2872
2873            ix = (np.s_[:],) + np.ix_(factor_ix, factor_ix)
2874            A = Eaa[ix].sum(axis=0)
2875            B = y.T @ a[:, factor_ix, 0]
2876            if self.idiosyncratic_ar1:
2877                ix1 = s.k_states_factors + i
2878                ix2 = ix1 + 1
2879                B -= Eaa[:, ix1:ix2, factor_ix].sum(axis=0)
2880
2881            # want: x = B A^{-1}, so solve: x A = B or solve: A' x' = B'
2882            try:
2883                Lambda[i, factor_ix] = cho_solve(cho_factor(A), B.T).T
2884            except LinAlgError:
2885                # Fall back to general solver if there are problems with
2886                # postive-definiteness
2887                Lambda[i, factor_ix] = np.linalg.solve(A, B.T).T
2888
2889        # Compute new obs cov
2890        # Note: this is unnecessary if `idiosyncratic_ar1=True`.
2891        # This is written in a slightly more general way than
2892        # Banbura and Modugno (2014), equation (7); see instead equation (13)
2893        # of Wu et al. (1996)
2894        # "An algorithm for estimating parameters of state-space models"
2895        if compute_H:
2896            Z = self['design'].copy()
2897            Z[:, :k] = Lambda
2898            BL = self.endog.T @ a[..., 0] @ Z.T
2899            C = self.endog.T @ self.endog
2900
2901            H = (C + -BL - BL.T + Z @ Eaa.sum(axis=0) @ Z.T) / self.nobs
2902        else:
2903            H = np.zeros((self.k_endog, self.k_endog), dtype=dtype) * np.nan
2904
2905        return Lambda, H
2906
2907    def _em_maximization_obs_missing(self, res, Eaa, a, compute_H=False):
2908        """EM maximization step, observation equation with missing data."""
2909        s = self._s
2910        dtype = Eaa.dtype
2911
2912        # Observation equation (missing)
2913        k = s.k_states_factors
2914        Lambda = np.zeros((self.k_endog, k), dtype=dtype)
2915
2916        W = (1 - res.missing.T)
2917        mask = W.astype(bool)
2918
2919        # Compute design for monthly
2920        # Note: the relevant A changes for each i
2921        for i in range(self.k_endog_M):
2922            iloc = self._s.endog_factor_iloc[i]
2923            factor_ix = s['factors_L1'][iloc]
2924
2925            m = mask[:, i]
2926            yt = self.endog[m, i:i + 1]
2927
2928            ix = np.ix_(m, factor_ix, factor_ix)
2929            Ai = Eaa[ix].sum(axis=0)
2930            Bi = yt.T @ a[np.ix_(m, factor_ix)][..., 0]
2931            if self.idiosyncratic_ar1:
2932                ix1 = s.k_states_factors + i
2933                ix2 = ix1 + 1
2934                Bi -= Eaa[m, ix1:ix2][..., factor_ix].sum(axis=0)
2935            # want: x = B A^{-1}, so solve: x A = B or solve: A' x' = B'
2936            try:
2937                Lambda[i, factor_ix] = cho_solve(cho_factor(Ai), Bi.T).T
2938            except LinAlgError:
2939                # Fall back to general solver if there are problems with
2940                # postive-definiteness
2941                Lambda[i, factor_ix] = np.linalg.solve(Ai, Bi.T).T
2942
2943        # Compute unrestricted design for quarterly
2944        # See Banbura at al. (2011), where this is described in Appendix C,
2945        # between equations (13) and (14).
2946        if self.k_endog_Q > 0:
2947            # Note: the relevant A changes for each i
2948            multipliers = np.array([1, 2, 3, 2, 1])[:, None]
2949            for i in range(self.k_endog_M, self.k_endog):
2950                iloc = self._s.endog_factor_iloc[i]
2951                factor_ix = s['factors_L1_5_ix'][:, iloc].ravel().tolist()
2952
2953                R, _ = self.loading_constraints(i)
2954
2955                iQ = i - self.k_endog_M
2956                m = mask[:, i]
2957                yt = self.endog[m, i:i + 1]
2958                ix = np.ix_(m, factor_ix, factor_ix)
2959                Ai = Eaa[ix].sum(axis=0)
2960                BiQ = yt.T @ a[np.ix_(m, factor_ix)][..., 0]
2961                if self.idiosyncratic_ar1:
2962                    ix = (np.s_[:],) + np.ix_(s['idio_ar_Q_ix'][iQ], factor_ix)
2963                    Eepsf = Eaa[ix]
2964                    BiQ -= (multipliers * Eepsf[m].sum(axis=0)).sum(axis=0)
2965
2966                # Note that there was a typo in Banbura et al. (2011) for
2967                # the formula applying the restrictions. In their notation,
2968                # they show (C D C')^{-1} while it should be (C D^{-1} C')^{-1}
2969                # Note: in reality, this is:
2970                # unrestricted - Aii @ R.T @ RARi @ (R @ unrestricted - q)
2971                # where the restrictions are defined as: R @ unrestricted = q
2972                # However, here q = 0, so we can simplify.
2973                try:
2974                    L_and_lower = cho_factor(Ai)
2975                    # x = BQ A^{-1}, or x A = BQ, so solve A' x' = (BQ)'
2976                    unrestricted = cho_solve(L_and_lower, BiQ.T).T[0]
2977                    AiiRT = cho_solve(L_and_lower, R.T)
2978
2979                    L_and_lower = cho_factor(R @ AiiRT)
2980                    RAiiRTiR = cho_solve(L_and_lower, R)
2981                    restricted = unrestricted - AiiRT @ RAiiRTiR @ unrestricted
2982                except LinAlgError:
2983                    # Fall back to slower method if there are problems with
2984                    # postive-definiteness
2985                    Aii = np.linalg.inv(Ai)
2986                    unrestricted = (BiQ @ Aii)[0]
2987                    RARi = np.linalg.inv(R @ Aii @ R.T)
2988                    restricted = (unrestricted -
2989                                  Aii @ R.T @ RARi @ R @ unrestricted)
2990                Lambda[i, factor_ix] = restricted
2991
2992        # Compute new obs cov
2993        # Note: this is unnecessary if `idiosyncratic_ar1=True`.
2994        # See Banbura and Modugno (2014), equation (12)
2995        # This does not literally follow their formula, e.g. multiplying by the
2996        # W_t selection matrices, because those formulas require loops that are
2997        # relatively slow. The formulation here is vectorized.
2998        if compute_H:
2999            Z = self['design'].copy()
3000            Z[:, :Lambda.shape[1]] = Lambda
3001
3002            y = np.nan_to_num(self.endog)
3003            C = y.T @ y
3004            W = W[..., None]
3005            IW = 1 - W
3006
3007            WL = W * Z
3008            WLT = WL.transpose(0, 2, 1)
3009            BL = y[..., None] @ a.transpose(0, 2, 1) @ WLT
3010            A = Eaa
3011
3012            BLT = BL.transpose(0, 2, 1)
3013            IWT = IW.transpose(0, 2, 1)
3014
3015            H = (C + (-BL - BLT + WL @ A @ WLT +
3016                      IW * self['obs_cov'] * IWT).sum(axis=0)) / self.nobs
3017        else:
3018            H = np.zeros((self.k_endog, self.k_endog), dtype=dtype) * np.nan
3019
3020        return Lambda, H
3021
3022    def smooth(self, params, transformed=True, includes_fixed=False,
3023               complex_step=False, cov_type='none', cov_kwds=None,
3024               return_ssm=False, results_class=None,
3025               results_wrapper_class=None, **kwargs):
3026        """
3027        Kalman smoothing.
3028
3029        Parameters
3030        ----------
3031        params : array_like
3032            Array of parameters at which to evaluate the loglikelihood
3033            function.
3034        transformed : bool, optional
3035            Whether or not `params` is already transformed. Default is True.
3036        return_ssm : bool,optional
3037            Whether or not to return only the state space output or a full
3038            results object. Default is to return a full results object.
3039        cov_type : str, optional
3040            See `MLEResults.fit` for a description of covariance matrix types
3041            for results object. Default is None.
3042        cov_kwds : dict or None, optional
3043            See `MLEResults.get_robustcov_results` for a description required
3044            keywords for alternative covariance estimators
3045        **kwargs
3046            Additional keyword arguments to pass to the Kalman filter. See
3047            `KalmanFilter.filter` for more details.
3048        """
3049        return super().smooth(
3050            params, transformed=transformed, includes_fixed=includes_fixed,
3051            complex_step=complex_step, cov_type=cov_type, cov_kwds=cov_kwds,
3052            return_ssm=return_ssm, results_class=results_class,
3053            results_wrapper_class=results_wrapper_class, **kwargs)
3054
3055    def filter(self, params, transformed=True, includes_fixed=False,
3056               complex_step=False, cov_type='none', cov_kwds=None,
3057               return_ssm=False, results_class=None,
3058               results_wrapper_class=None, low_memory=False, **kwargs):
3059        """
3060        Kalman filtering.
3061
3062        Parameters
3063        ----------
3064        params : array_like
3065            Array of parameters at which to evaluate the loglikelihood
3066            function.
3067        transformed : bool, optional
3068            Whether or not `params` is already transformed. Default is True.
3069        return_ssm : bool,optional
3070            Whether or not to return only the state space output or a full
3071            results object. Default is to return a full results object.
3072        cov_type : str, optional
3073            See `MLEResults.fit` for a description of covariance matrix types
3074            for results object. Default is 'none'.
3075        cov_kwds : dict or None, optional
3076            See `MLEResults.get_robustcov_results` for a description required
3077            keywords for alternative covariance estimators
3078        low_memory : bool, optional
3079            If set to True, techniques are applied to substantially reduce
3080            memory usage. If used, some features of the results object will
3081            not be available (including in-sample prediction), although
3082            out-of-sample forecasting is possible. Default is False.
3083        **kwargs
3084            Additional keyword arguments to pass to the Kalman filter. See
3085            `KalmanFilter.filter` for more details.
3086        """
3087        return super().filter(
3088            params, transformed=transformed, includes_fixed=includes_fixed,
3089            complex_step=complex_step, cov_type=cov_type, cov_kwds=cov_kwds,
3090            return_ssm=return_ssm, results_class=results_class,
3091            results_wrapper_class=results_wrapper_class, **kwargs)
3092
3093    def simulate(self, params, nsimulations, measurement_shocks=None,
3094                 state_shocks=None, initial_state=None, anchor=None,
3095                 repetitions=None, exog=None, extend_model=None,
3096                 extend_kwargs=None, transformed=True, includes_fixed=False,
3097                 original_scale=True, **kwargs):
3098        r"""
3099        Simulate a new time series following the state space model.
3100
3101        Parameters
3102        ----------
3103        params : array_like
3104            Array of parameters to use in constructing the state space
3105            representation to use when simulating.
3106        nsimulations : int
3107            The number of observations to simulate. If the model is
3108            time-invariant this can be any number. If the model is
3109            time-varying, then this number must be less than or equal to the
3110            number of observations.
3111        measurement_shocks : array_like, optional
3112            If specified, these are the shocks to the measurement equation,
3113            :math:`\varepsilon_t`. If unspecified, these are automatically
3114            generated using a pseudo-random number generator. If specified,
3115            must be shaped `nsimulations` x `k_endog`, where `k_endog` is the
3116            same as in the state space model.
3117        state_shocks : array_like, optional
3118            If specified, these are the shocks to the state equation,
3119            :math:`\eta_t`. If unspecified, these are automatically
3120            generated using a pseudo-random number generator. If specified,
3121            must be shaped `nsimulations` x `k_posdef` where `k_posdef` is the
3122            same as in the state space model.
3123        initial_state : array_like, optional
3124            If specified, this is the initial state vector to use in
3125            simulation, which should be shaped (`k_states` x 1), where
3126            `k_states` is the same as in the state space model. If unspecified,
3127            but the model has been initialized, then that initialization is
3128            used. This must be specified if `anchor` is anything other than
3129            "start" or 0 (or else you can use the `simulate` method on a
3130            results object rather than on the model object).
3131        anchor : int, str, or datetime, optional
3132            First period for simulation. The simulation will be conditional on
3133            all existing datapoints prior to the `anchor`.  Type depends on the
3134            index of the given `endog` in the model. Two special cases are the
3135            strings 'start' and 'end'. `start` refers to beginning the
3136            simulation at the first period of the sample, and `end` refers to
3137            beginning the simulation at the first period after the sample.
3138            Integer values can run from 0 to `nobs`, or can be negative to
3139            apply negative indexing. Finally, if a date/time index was provided
3140            to the model, then this argument can be a date string to parse or a
3141            datetime type. Default is 'start'.
3142        repetitions : int, optional
3143            Number of simulated paths to generate. Default is 1 simulated path.
3144        exog : array_like, optional
3145            New observations of exogenous regressors, if applicable.
3146        transformed : bool, optional
3147            Whether or not `params` is already transformed. Default is
3148            True.
3149        includes_fixed : bool, optional
3150            If parameters were previously fixed with the `fix_params` method,
3151            this argument describes whether or not `params` also includes
3152            the fixed parameters, in addition to the free parameters. Default
3153            is False.
3154        original_scale : bool, optional
3155            If the model specification standardized the data, whether or not
3156            to return simulations in the original scale of the data (i.e.
3157            before it was standardized by the model). Default is True.
3158
3159        Returns
3160        -------
3161        simulated_obs : ndarray
3162            An array of simulated observations. If `repetitions=None`, then it
3163            will be shaped (nsimulations x k_endog) or (nsimulations,) if
3164            `k_endog=1`. Otherwise it will be shaped
3165            (nsimulations x k_endog x repetitions). If the model was given
3166            Pandas input then the output will be a Pandas object. If
3167            `k_endog > 1` and `repetitions` is not None, then the output will
3168            be a Pandas DataFrame that has a MultiIndex for the columns, with
3169            the first level containing the names of the `endog` variables and
3170            the second level containing the repetition number.
3171        """
3172        # Get usual simulations (in the possibly-standardized scale)
3173        sim = super().simulate(
3174            params, nsimulations, measurement_shocks=measurement_shocks,
3175            state_shocks=state_shocks, initial_state=initial_state,
3176            anchor=anchor, repetitions=repetitions, exog=exog,
3177            extend_model=extend_model, extend_kwargs=extend_kwargs,
3178            transformed=transformed, includes_fixed=includes_fixed, **kwargs)
3179
3180        # If applicable, convert predictions back to original space
3181        if self.standardize and original_scale:
3182            use_pandas = isinstance(self.data, PandasData)
3183            shape = sim.shape
3184
3185            if use_pandas:
3186                # pd.Series (k_endog=1, replications=None)
3187                if len(shape) == 1:
3188                    sim = sim * self._endog_std[0] + self._endog_mean[0]
3189                # pd.DataFrame (k_endog > 1, replications=None)
3190                # [or]
3191                # pd.DataFrame with MultiIndex (replications > 0)
3192                elif len(shape) == 2:
3193                    sim = (sim.multiply(self._endog_std, axis=1, level=0)
3194                              .add(self._endog_mean, axis=1, level=0))
3195            else:
3196                # 1-dim array (k_endog=1, replications=None)
3197                if len(shape) == 1:
3198                    sim = sim * self._endog_std + self._endog_mean
3199                # 2-dim array (k_endog > 1, replications=None)
3200                elif len(shape) == 2:
3201                    sim = sim * self._endog_std + self._endog_mean
3202                # 3-dim array with MultiIndex (replications > 0)
3203                else:
3204                    # Get arrays into the form that can be used for
3205                    # broadcasting
3206                    std = np.atleast_2d(self._endog_std)[..., None]
3207                    mean = np.atleast_2d(self._endog_mean)[..., None]
3208                    sim = sim * std + mean
3209
3210        return sim
3211
3212    def impulse_responses(self, params, steps=1, impulse=0,
3213                          orthogonalized=False, cumulative=False, anchor=None,
3214                          exog=None, extend_model=None, extend_kwargs=None,
3215                          transformed=True, includes_fixed=False,
3216                          original_scale=True, **kwargs):
3217        """
3218        Impulse response function.
3219
3220        Parameters
3221        ----------
3222        params : array_like
3223            Array of model parameters.
3224        steps : int, optional
3225            The number of steps for which impulse responses are calculated.
3226            Default is 1. Note that for time-invariant models, the initial
3227            impulse is not counted as a step, so if `steps=1`, the output will
3228            have 2 entries.
3229        impulse : int or array_like
3230            If an integer, the state innovation to pulse; must be between 0
3231            and `k_posdef-1`. Alternatively, a custom impulse vector may be
3232            provided; must be shaped `k_posdef x 1`.
3233        orthogonalized : bool, optional
3234            Whether or not to perform impulse using orthogonalized innovations.
3235            Note that this will also affect custum `impulse` vectors. Default
3236            is False.
3237        cumulative : bool, optional
3238            Whether or not to return cumulative impulse responses. Default is
3239            False.
3240        anchor : int, str, or datetime, optional
3241            Time point within the sample for the state innovation impulse. Type
3242            depends on the index of the given `endog` in the model. Two special
3243            cases are the strings 'start' and 'end', which refer to setting the
3244            impulse at the first and last points of the sample, respectively.
3245            Integer values can run from 0 to `nobs - 1`, or can be negative to
3246            apply negative indexing. Finally, if a date/time index was provided
3247            to the model, then this argument can be a date string to parse or a
3248            datetime type. Default is 'start'.
3249        exog : array_like, optional
3250            New observations of exogenous regressors for our-of-sample periods,
3251            if applicable.
3252        transformed : bool, optional
3253            Whether or not `params` is already transformed. Default is
3254            True.
3255        includes_fixed : bool, optional
3256            If parameters were previously fixed with the `fix_params` method,
3257            this argument describes whether or not `params` also includes
3258            the fixed parameters, in addition to the free parameters. Default
3259            is False.
3260        original_scale : bool, optional
3261            If the model specification standardized the data, whether or not
3262            to return impulse responses in the original scale of the data (i.e.
3263            before it was standardized by the model). Default is True.
3264        **kwargs
3265            If the model has time-varying design or transition matrices and the
3266            combination of `anchor` and `steps` implies creating impulse
3267            responses for the out-of-sample period, then these matrices must
3268            have updated values provided for the out-of-sample steps. For
3269            example, if `design` is a time-varying component, `nobs` is 10,
3270            `anchor=1`, and `steps` is 15, a (`k_endog` x `k_states` x 7)
3271            matrix must be provided with the new design matrix values.
3272
3273        Returns
3274        -------
3275        impulse_responses : ndarray
3276            Responses for each endogenous variable due to the impulse
3277            given by the `impulse` argument. For a time-invariant model, the
3278            impulse responses are given for `steps + 1` elements (this gives
3279            the "initial impulse" followed by `steps` responses for the
3280            important cases of VAR and SARIMAX models), while for time-varying
3281            models the impulse responses are only given for `steps` elements
3282            (to avoid having to unexpectedly provide updated time-varying
3283            matrices).
3284
3285        """
3286        # Get usual simulations (in the possibly-standardized scale)
3287        irfs = super().impulse_responses(
3288            params, steps=steps, impulse=impulse,
3289            orthogonalized=orthogonalized, cumulative=cumulative,
3290            anchor=anchor, exog=exog, extend_model=extend_model,
3291            extend_kwargs=extend_kwargs, transformed=transformed,
3292            includes_fixed=includes_fixed, original_scale=original_scale,
3293            **kwargs)
3294
3295        # If applicable, convert predictions back to original space
3296        if self.standardize and original_scale:
3297            use_pandas = isinstance(self.data, PandasData)
3298            shape = irfs.shape
3299
3300            if use_pandas:
3301                # pd.Series (k_endog=1, replications=None)
3302                if len(shape) == 1:
3303                    irfs = irfs * self._endog_std[0]
3304                # pd.DataFrame (k_endog > 1)
3305                # [or]
3306                # pd.DataFrame with MultiIndex (replications > 0)
3307                elif len(shape) == 2:
3308                    irfs = irfs.multiply(self._endog_std, axis=1, level=0)
3309            else:
3310                # 1-dim array (k_endog=1)
3311                if len(shape) == 1:
3312                    irfs = irfs * self._endog_std
3313                # 2-dim array (k_endog > 1)
3314                elif len(shape) == 2:
3315                    irfs = irfs * self._endog_std
3316
3317        return irfs
3318
3319
3320class DynamicFactorMQResults(mlemodel.MLEResults):
3321    """
3322    Results from fitting a dynamic factor model
3323    """
3324    def __init__(self, model, params, filter_results, cov_type=None, **kwargs):
3325        super(DynamicFactorMQResults, self).__init__(
3326            model, params, filter_results, cov_type, **kwargs)
3327
3328    @property
3329    def factors(self):
3330        """
3331        Estimates of unobserved factors.
3332
3333        Returns
3334        -------
3335        out : Bunch
3336            Has the following attributes shown in Notes.
3337
3338        Notes
3339        -----
3340        The output is a bunch of the following format:
3341
3342        - `filtered`: a time series array with the filtered estimate of
3343          the component
3344        - `filtered_cov`: a time series array with the filtered estimate of
3345          the variance/covariance of the component
3346        - `smoothed`: a time series array with the smoothed estimate of
3347          the component
3348        - `smoothed_cov`: a time series array with the smoothed estimate of
3349          the variance/covariance of the component
3350        - `offset`: an integer giving the offset in the state vector where
3351          this component begins
3352        """
3353        out = None
3354        if self.model.k_factors > 0:
3355            iloc = self.model._s.factors_L1
3356            ix = np.array(self.model.state_names)[iloc].tolist()
3357            out = Bunch(
3358                filtered=self.states.filtered.loc[:, ix],
3359                filtered_cov=self.states.filtered_cov.loc[np.s_[ix, :], ix],
3360                smoothed=None, smoothed_cov=None)
3361            if self.smoothed_state is not None:
3362                out.smoothed = self.states.smoothed.loc[:, ix]
3363            if self.smoothed_state_cov is not None:
3364                out.smoothed_cov = (
3365                    self.states.smoothed_cov.loc[np.s_[ix, :], ix])
3366        return out
3367
3368    def get_coefficients_of_determination(self, method='individual',
3369                                          which=None):
3370        """
3371        Get coefficients of determination (R-squared) for variables / factors.
3372
3373        Parameters
3374        ----------
3375        method : {'individual', 'joint', 'cumulative'}, optional
3376            The type of R-squared values to generate. "individual" plots
3377            the R-squared of each variable on each factor; "joint" plots the
3378            R-squared of each variable on each factor that it loads on;
3379            "cumulative" plots the successive R-squared values as each
3380            additional factor is added to the regression, for each variable.
3381            Default is 'individual'.
3382        which: {None, 'filtered', 'smoothed'}, optional
3383            Whether to compute R-squared values based on filtered or smoothed
3384            estimates of the factors. Default is 'smoothed' if smoothed results
3385            are available and 'filtered' otherwise.
3386
3387        Returns
3388        -------
3389        rsquared : pd.DataFrame or pd.Series
3390            The R-squared values from regressions of observed variables on
3391            one or more of the factors. If method='individual' or
3392            method='cumulative', this will be a Pandas DataFrame with observed
3393            variables as the index and factors as the columns . If
3394            method='joint', will be a Pandas Series with observed variables as
3395            the index.
3396
3397        See Also
3398        --------
3399        plot_coefficients_of_determination
3400        coefficients_of_determination
3401        """
3402        from statsmodels.tools import add_constant
3403
3404        method = string_like(method, 'method', options=['individual', 'joint',
3405                                                        'cumulative'])
3406        if which is None:
3407            which = 'filtered' if self.smoothed_state is None else 'smoothed'
3408
3409        k_endog = self.model.k_endog
3410        k_factors = self.model.k_factors
3411        ef_map = self.model._s.endog_factor_map
3412        endog_names = self.model.endog_names
3413        factor_names = self.model.factor_names
3414
3415        if method == 'individual':
3416            coefficients = np.zeros((k_endog, k_factors))
3417
3418            for i in range(k_factors):
3419                exog = add_constant(self.factors[which].iloc[:, i])
3420                for j in range(k_endog):
3421                    if ef_map.iloc[j, i]:
3422                        endog = self.filter_results.endog[j]
3423                        coefficients[j, i] = (
3424                            OLS(endog, exog, missing='drop').fit().rsquared)
3425                    else:
3426                        coefficients[j, i] = np.nan
3427
3428            coefficients = pd.DataFrame(coefficients, index=endog_names,
3429                                        columns=factor_names)
3430        elif method == 'joint':
3431            coefficients = np.zeros((k_endog,))
3432            exog = add_constant(self.factors[which])
3433            for j in range(k_endog):
3434                endog = self.filter_results.endog[j]
3435                ix = np.r_[True, ef_map.iloc[j]].tolist()
3436                X = exog.loc[:, ix]
3437                coefficients[j] = (
3438                    OLS(endog, X, missing='drop').fit().rsquared)
3439            coefficients = pd.Series(coefficients, index=endog_names)
3440        elif method == 'cumulative':
3441            coefficients = np.zeros((k_endog, k_factors))
3442            exog = add_constant(self.factors[which])
3443            for j in range(k_endog):
3444                endog = self.filter_results.endog[j]
3445
3446                for i in range(k_factors):
3447                    if self.model._s.endog_factor_map.iloc[j, i]:
3448                        ix = np.r_[True, ef_map.iloc[j, :i + 1],
3449                                   [False] * (k_factors - i - 1)]
3450                        X = exog.loc[:, ix.astype(bool).tolist()]
3451                        coefficients[j, i] = (
3452                            OLS(endog, X, missing='drop').fit().rsquared)
3453                    else:
3454                        coefficients[j, i] = np.nan
3455            coefficients = pd.DataFrame(coefficients, index=endog_names,
3456                                        columns=factor_names)
3457
3458        return coefficients
3459
3460    @cache_readonly
3461    def coefficients_of_determination(self):
3462        """
3463        Individual coefficients of determination (:math:`R^2`).
3464
3465        Coefficients of determination (:math:`R^2`) from regressions of
3466        endogenous variables on individual estimated factors.
3467
3468        Returns
3469        -------
3470        coefficients_of_determination : ndarray
3471            A `k_endog` x `k_factors` array, where
3472            `coefficients_of_determination[i, j]` represents the :math:`R^2`
3473            value from a regression of factor `j` and a constant on endogenous
3474            variable `i`.
3475
3476        Notes
3477        -----
3478        Although it can be difficult to interpret the estimated factor loadings
3479        and factors, it is often helpful to use the coefficients of
3480        determination from univariate regressions to assess the importance of
3481        each factor in explaining the variation in each endogenous variable.
3482
3483        In models with many variables and factors, this can sometimes lend
3484        interpretation to the factors (for example sometimes one factor will
3485        load primarily on real variables and another on nominal variables).
3486
3487        See Also
3488        --------
3489        get_coefficients_of_determination
3490        plot_coefficients_of_determination
3491        """
3492        return self.get_coefficients_of_determination(method='individual')
3493
3494    def plot_coefficients_of_determination(self, method='individual',
3495                                           which=None, endog_labels=None,
3496                                           fig=None, figsize=None):
3497        """
3498        Plot coefficients of determination (R-squared) for variables / factors.
3499
3500        Parameters
3501        ----------
3502        method : {'individual', 'joint', 'cumulative'}, optional
3503            The type of R-squared values to generate. "individual" plots
3504            the R-squared of each variable on each factor; "joint" plots the
3505            R-squared of each variable on each factor that it loads on;
3506            "cumulative" plots the successive R-squared values as each
3507            additional factor is added to the regression, for each variable.
3508            Default is 'individual'.
3509        which: {None, 'filtered', 'smoothed'}, optional
3510            Whether to compute R-squared values based on filtered or smoothed
3511            estimates of the factors. Default is 'smoothed' if smoothed results
3512            are available and 'filtered' otherwise.
3513        endog_labels : bool, optional
3514            Whether or not to label the endogenous variables along the x-axis
3515            of the plots. Default is to include labels if there are 5 or fewer
3516            endogenous variables.
3517        fig : Figure, optional
3518            If given, subplots are created in this figure instead of in a new
3519            figure. Note that the grid will be created in the provided
3520            figure using `fig.add_subplot()`.
3521        figsize : tuple, optional
3522            If a figure is created, this argument allows specifying a size.
3523            The tuple is (width, height).
3524
3525        Notes
3526        -----
3527        The endogenous variables are arranged along the x-axis according to
3528        their position in the model's `endog` array.
3529
3530        See Also
3531        --------
3532        get_coefficients_of_determination
3533        """
3534        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
3535        _import_mpl()
3536        fig = create_mpl_fig(fig, figsize)
3537
3538        method = string_like(method, 'method', options=['individual', 'joint',
3539                                                        'cumulative'])
3540
3541        # Should we label endogenous variables?
3542        if endog_labels is None:
3543            endog_labels = self.model.k_endog <= 5
3544
3545        # Plot the coefficients of determination
3546        rsquared = self.get_coefficients_of_determination(method=method,
3547                                                          which=which)
3548
3549        if method in ['individual', 'cumulative']:
3550            plot_idx = 1
3551            for factor_name, coeffs in rsquared.T.iterrows():
3552                # Create the new axis
3553                ax = fig.add_subplot(self.model.k_factors, 1, plot_idx)
3554                ax.set_ylim((0, 1))
3555                ax.set(title=f'{factor_name}', ylabel=r'$R^2$')
3556
3557                coeffs.plot(ax=ax, kind='bar')
3558                if plot_idx < len(rsquared.columns) or not endog_labels:
3559                    ax.xaxis.set_ticklabels([])
3560
3561                plot_idx += 1
3562        elif method == 'joint':
3563            ax = fig.add_subplot(1, 1, 1)
3564            ax.set_ylim((0, 1))
3565            ax.set(title=r'$R^2$ - regression on all loaded factors',
3566                   ylabel=r'$R^2$')
3567            rsquared.plot(ax=ax, kind='bar')
3568            if not endog_labels:
3569                ax.xaxis.set_ticklabels([])
3570
3571        return fig
3572
3573    def get_prediction(self, start=None, end=None, dynamic=False,
3574                       index=None, exog=None, extend_model=None,
3575                       extend_kwargs=None, original_scale=True, **kwargs):
3576        """
3577        In-sample prediction and out-of-sample forecasting.
3578
3579        Parameters
3580        ----------
3581        start : int, str, or datetime, optional
3582            Zero-indexed observation number at which to start forecasting,
3583            i.e., the first forecast is start. Can also be a date string to
3584            parse or a datetime type. Default is the the zeroth observation.
3585        end : int, str, or datetime, optional
3586            Zero-indexed observation number at which to end forecasting, i.e.,
3587            the last forecast is end. Can also be a date string to
3588            parse or a datetime type. However, if the dates index does not
3589            have a fixed frequency, end must be an integer index if you
3590            want out of sample prediction. Default is the last observation in
3591            the sample.
3592        dynamic : bool, int, str, or datetime, optional
3593            Integer offset relative to `start` at which to begin dynamic
3594            prediction. Can also be an absolute date string to parse or a
3595            datetime type (these are not interpreted as offsets).
3596            Prior to this observation, true endogenous values will be used for
3597            prediction; starting with this observation and continuing through
3598            the end of prediction, forecasted endogenous values will be used
3599            instead.
3600        original_scale : bool, optional
3601            If the model specification standardized the data, whether or not
3602            to return predictions in the original scale of the data (i.e.
3603            before it was standardized by the model). Default is True.
3604        **kwargs
3605            Additional arguments may required for forecasting beyond the end
3606            of the sample. See `FilterResults.predict` for more details.
3607
3608        Returns
3609        -------
3610        forecast : ndarray
3611            Array of out of in-sample predictions and / or out-of-sample
3612            forecasts. An (npredict x k_endog) array.
3613        """
3614        # Get usual predictions (in the possibly-standardized scale)
3615        res = super().get_prediction(start=start, end=end, dynamic=dynamic,
3616                                     index=index, exog=exog,
3617                                     extend_model=extend_model,
3618                                     extend_kwargs=extend_kwargs, **kwargs)
3619
3620        # If applicable, convert predictions back to original space
3621        if self.model.standardize and original_scale:
3622            prediction_results = res.prediction_results
3623            k_endog, nobs = prediction_results.endog.shape
3624
3625            mean = np.array(self.model._endog_mean)
3626            std = np.array(self.model._endog_std)
3627
3628            if self.model.k_endog > 1:
3629                mean = mean[None, :]
3630                std = std[None, :]
3631
3632            if not prediction_results.results.memory_no_forecast_mean:
3633                res._results._predicted_mean = (
3634                    res._results._predicted_mean * std + mean)
3635
3636            if not prediction_results.results.memory_no_forecast_cov:
3637                if k_endog == 1:
3638                    res._results._var_pred_mean *= std**2
3639                else:
3640                    res._results._var_pred_mean = (
3641                        std * res._results._var_pred_mean * std.T)
3642
3643        return res
3644
3645    def news(self, comparison, impact_date=None, impacted_variable=None,
3646             start=None, end=None, periods=None, exog=None,
3647             comparison_type=None, return_raw=False, tolerance=1e-10,
3648             endog_quarterly=None, original_scale=True, **kwargs):
3649        """
3650        Compute impacts from updated data (news and revisions).
3651
3652        Parameters
3653        ----------
3654        comparison : array_like or MLEResults
3655            An updated dataset with updated and/or revised data from which the
3656            news can be computed, or an updated or previous results object
3657            to use in computing the news.
3658        impact_date : int, str, or datetime, optional
3659            A single specific period of impacts from news and revisions to
3660            compute. Can also be a date string to parse or a datetime type.
3661            This argument cannot be used in combination with `start`, `end`, or
3662            `periods`. Default is the first out-of-sample observation.
3663        impacted_variable : str, list, array, or slice, optional
3664            Observation variable label or slice of labels specifying that only
3665            specific impacted variables should be shown in the News output. The
3666            impacted variable(s) describe the variables that were *affected* by
3667            the news. If you do not know the labels for the variables, check
3668            the `endog_names` attribute of the model instance.
3669        start : int, str, or datetime, optional
3670            The first period of impacts from news and revisions to compute.
3671            Can also be a date string to parse or a datetime type. Default is
3672            the first out-of-sample observation.
3673        end : int, str, or datetime, optional
3674            The last period of impacts from news and revisions to compute.
3675            Can also be a date string to parse or a datetime type. Default is
3676            the first out-of-sample observation.
3677        periods : int, optional
3678            The number of periods of impacts from news and revisions to
3679            compute.
3680        exog : array_like, optional
3681            Array of exogenous regressors for the out-of-sample period, if
3682            applicable.
3683        comparison_type : {None, 'previous', 'updated'}
3684            This denotes whether the `comparison` argument represents a
3685            *previous* results object or dataset or an *updated* results object
3686            or dataset. If not specified, then an attempt is made to determine
3687            the comparison type.
3688        return_raw : bool, optional
3689            Whether or not to return only the specific output or a full
3690            results object. Default is to return a full results object.
3691        tolerance : float, optional
3692            The numerical threshold for determining zero impact. Default is
3693            that any impact less than 1e-10 is assumed to be zero.
3694        endog_quarterly : array_like, optional
3695            New observations of quarterly variables, if `comparison` was
3696            provided as an updated monthly dataset. If this argument is
3697            provided, it must be a Pandas Series or DataFrame with a
3698            DatetimeIndex or PeriodIndex at the quarterly frequency.
3699
3700        References
3701        ----------
3702        .. [1] Bańbura, Marta, and Michele Modugno.
3703               "Maximum likelihood estimation of factor models on datasets with
3704               arbitrary pattern of missing data."
3705               Journal of Applied Econometrics 29, no. 1 (2014): 133-160.
3706        .. [2] Bańbura, Marta, Domenico Giannone, and Lucrezia Reichlin.
3707               "Nowcasting."
3708               The Oxford Handbook of Economic Forecasting. July 8, 2011.
3709        .. [3] Bańbura, Marta, Domenico Giannone, Michele Modugno, and Lucrezia
3710               Reichlin.
3711               "Now-casting and the real-time data flow."
3712               In Handbook of economic forecasting, vol. 2, pp. 195-237.
3713               Elsevier, 2013.
3714        """
3715        news_results = super().news(
3716            comparison, impact_date=impact_date,
3717            impacted_variable=impacted_variable, start=start, end=end,
3718            periods=periods, exog=exog, comparison_type=comparison_type,
3719            return_raw=return_raw, tolerance=tolerance,
3720            endog_quarterly=endog_quarterly, **kwargs)
3721
3722        # If we have standardized the data, we may want to report the news in
3723        # the original scale. If so, we need to modify the data to "undo" the
3724        # standardization.
3725        if not return_raw and self.model.standardize and original_scale:
3726            endog_mean = self.model._endog_mean
3727            endog_std = self.model._endog_std
3728
3729            # Don't need to add in the mean for the impacts, since they are
3730            # the difference of two forecasts
3731            news_results.total_impacts = (
3732                news_results.total_impacts * endog_std)
3733            news_results.update_impacts = (
3734                news_results.update_impacts * endog_std)
3735            if news_results.revision_impacts is not None:
3736                news_results.revision_impacts = (
3737                    news_results.revision_impacts * endog_std)
3738
3739            # Update forecasts
3740            for name in ['prev_impacted_forecasts', 'news', 'update_realized',
3741                         'update_forecasts', 'post_impacted_forecasts']:
3742                dta = getattr(news_results, name)
3743
3744                # for pd.Series, dta.multiply(...) removes the name attribute;
3745                # save it now so that we can add it back in
3746                orig_name = None
3747                if hasattr(dta, 'name'):
3748                    orig_name = dta.name
3749
3750                dta = dta.multiply(endog_std, level=1)
3751
3752                # add back in the name attribute if it was removed
3753                if orig_name is not None:
3754                    dta.name = orig_name
3755
3756                if name != 'news':
3757                    dta = dta.add(endog_mean, level=1)
3758                setattr(news_results, name, dta)
3759
3760            # For the weights: rows correspond to update (date, variable) and
3761            # columns correspond to the impacted variable.
3762            # 1. Because we have modified the updates (realized, forecasts, and
3763            #    forecast errors) to be in the scale of the original updated
3764            #    variable, we need to essentially reverse that change for each
3765            #    row of the weights by dividing by the standard deviation of
3766            #    that row's updated variable
3767            # 2. Because we want the impacts to be in the scale of the original
3768            #    impacted variable, we need to multiply each column by the
3769            #    standard deviation of that column's impacted variable
3770            news_results.weights = (
3771                news_results.weights.divide(endog_std, axis=0, level=1)
3772                                    .multiply(endog_std, axis=1, level=1))
3773
3774        return news_results
3775
3776    def append(self, endog, endog_quarterly=None, refit=False, fit_kwargs=None,
3777               copy_initialization=True, retain_standardization=True,
3778               **kwargs):
3779        """
3780        Recreate the results object with new data appended to original data.
3781
3782        Creates a new result object applied to a dataset that is created by
3783        appending new data to the end of the model's original data. The new
3784        results can then be used for analysis or forecasting.
3785
3786        Parameters
3787        ----------
3788        endog : array_like
3789            New observations from the modeled time-series process.
3790        endog_quarterly : array_like, optional
3791            New observations of quarterly variables. If provided, must be a
3792            Pandas Series or DataFrame with a DatetimeIndex or PeriodIndex at
3793            the quarterly frequency.
3794        refit : bool, optional
3795            Whether to re-fit the parameters, based on the combined dataset.
3796            Default is False (so parameters from the current results object
3797            are used to create the new results object).
3798        fit_kwargs : dict, optional
3799            Keyword arguments to pass to `fit` (if `refit=True`) or `filter` /
3800            `smooth`.
3801        copy_initialization : bool, optional
3802            Whether or not to copy the initialization from the current results
3803            set to the new model. Default is True.
3804        retain_standardization : bool, optional
3805            Whether or not to use the mean and standard deviations that were
3806            used to standardize the data in the current model in the new model.
3807            Default is True.
3808        **kwargs
3809            Keyword arguments may be used to modify model specification
3810            arguments when created the new model object.
3811
3812        Returns
3813        -------
3814        results
3815            Updated Results object, that includes results from both the
3816            original dataset and the new dataset.
3817
3818        Notes
3819        -----
3820        The `endog` and `exog` arguments to this method must be formatted in
3821        the same way (e.g. Pandas Series versus Numpy array) as were the
3822        `endog` and `exog` arrays passed to the original model.
3823
3824        The `endog` (and, if applicable, `endog_quarterly`) arguments to this
3825        method should consist of new observations that occurred directly after
3826        the last element of `endog`. For any other kind of dataset, see the
3827        `apply` method.
3828
3829        This method will apply filtering to all of the original data as well
3830        as to the new data. To apply filtering only to the new data (which
3831        can be much faster if the original dataset is large), see the `extend`
3832        method.
3833
3834        See Also
3835        --------
3836        extend
3837        apply
3838        """
3839        # Construct the combined dataset, if necessary
3840        endog, k_endog_monthly = DynamicFactorMQ.construct_endog(
3841            endog, endog_quarterly)
3842
3843        # Check for compatible dimensions
3844        k_endog = endog.shape[1] if len(endog.shape) == 2 else 1
3845        if (k_endog_monthly != self.model.k_endog_M or
3846                k_endog != self.model.k_endog):
3847            raise ValueError('Cannot append data of a different dimension to'
3848                             ' a model.')
3849
3850        kwargs['k_endog_monthly'] = k_endog_monthly
3851
3852        return super().append(
3853            endog, refit=refit, fit_kwargs=fit_kwargs,
3854            copy_initialization=copy_initialization,
3855            retain_standardization=retain_standardization, **kwargs)
3856
3857    def extend(self, endog, endog_quarterly=None, fit_kwargs=None,
3858               retain_standardization=True, **kwargs):
3859        """
3860        Recreate the results object for new data that extends original data.
3861
3862        Creates a new result object applied to a new dataset that is assumed to
3863        follow directly from the end of the model's original data. The new
3864        results can then be used for analysis or forecasting.
3865
3866        Parameters
3867        ----------
3868        endog : array_like
3869            New observations from the modeled time-series process.
3870        endog_quarterly : array_like, optional
3871            New observations of quarterly variables. If provided, must be a
3872            Pandas Series or DataFrame with a DatetimeIndex or PeriodIndex at
3873            the quarterly frequency.
3874        fit_kwargs : dict, optional
3875            Keyword arguments to pass to `filter` or `smooth`.
3876        retain_standardization : bool, optional
3877            Whether or not to use the mean and standard deviations that were
3878            used to standardize the data in the current model in the new model.
3879            Default is True.
3880        **kwargs
3881            Keyword arguments may be used to modify model specification
3882            arguments when created the new model object.
3883
3884        Returns
3885        -------
3886        results
3887            Updated Results object, that includes results only for the new
3888            dataset.
3889
3890        See Also
3891        --------
3892        append
3893        apply
3894
3895        Notes
3896        -----
3897        The `endog` argument to this method should consist of new observations
3898        that occurred directly after the last element of the model's original
3899        `endog` array. For any other kind of dataset, see the `apply` method.
3900
3901        This method will apply filtering only to the new data provided by the
3902        `endog` argument, which can be much faster than re-filtering the entire
3903        dataset. However, the returned results object will only have results
3904        for the new data. To retrieve results for both the new data and the
3905        original data, see the `append` method.
3906        """
3907        # Construct the combined dataset, if necessary
3908        endog, k_endog_monthly = DynamicFactorMQ.construct_endog(
3909            endog, endog_quarterly)
3910
3911        # Check for compatible dimensions
3912        k_endog = endog.shape[1] if len(endog.shape) == 2 else 1
3913        if (k_endog_monthly != self.model.k_endog_M or
3914                k_endog != self.model.k_endog):
3915            raise ValueError('Cannot append data of a different dimension to'
3916                             ' a model.')
3917
3918        kwargs['k_endog_monthly'] = k_endog_monthly
3919        return super().extend(
3920            endog, fit_kwargs=fit_kwargs,
3921            retain_standardization=retain_standardization, **kwargs)
3922
3923    def apply(self, endog, k_endog_monthly=None, endog_quarterly=None,
3924              refit=False, fit_kwargs=None, copy_initialization=False,
3925              retain_standardization=True, **kwargs):
3926        """
3927        Apply the fitted parameters to new data unrelated to the original data.
3928
3929        Creates a new result object using the current fitted parameters,
3930        applied to a completely new dataset that is assumed to be unrelated to
3931        the model's original data. The new results can then be used for
3932        analysis or forecasting.
3933
3934        Parameters
3935        ----------
3936        endog : array_like
3937            New observations from the modeled time-series process.
3938        k_endog_monthly : int, optional
3939            If specifying a monthly/quarterly mixed frequency model in which
3940            the provided `endog` dataset contains both the monthly and
3941            quarterly data, this variable should be used to indicate how many
3942            of the variables are monthly.
3943        endog_quarterly : array_like, optional
3944            New observations of quarterly variables. If provided, must be a
3945            Pandas Series or DataFrame with a DatetimeIndex or PeriodIndex at
3946            the quarterly frequency.
3947        refit : bool, optional
3948            Whether to re-fit the parameters, using the new dataset.
3949            Default is False (so parameters from the current results object
3950            are used to create the new results object).
3951        fit_kwargs : dict, optional
3952            Keyword arguments to pass to `fit` (if `refit=True`) or `filter` /
3953            `smooth`.
3954        copy_initialization : bool, optional
3955            Whether or not to copy the initialization from the current results
3956            set to the new model. Default is False.
3957        retain_standardization : bool, optional
3958            Whether or not to use the mean and standard deviations that were
3959            used to standardize the data in the current model in the new model.
3960            Default is True.
3961        **kwargs
3962            Keyword arguments may be used to modify model specification
3963            arguments when created the new model object.
3964
3965        Returns
3966        -------
3967        results
3968            Updated Results object, that includes results only for the new
3969            dataset.
3970
3971        See Also
3972        --------
3973        statsmodels.tsa.statespace.mlemodel.MLEResults.append
3974        statsmodels.tsa.statespace.mlemodel.MLEResults.apply
3975
3976        Notes
3977        -----
3978        The `endog` argument to this method should consist of new observations
3979        that are not necessarily related to the original model's `endog`
3980        dataset. For observations that continue that original dataset by follow
3981        directly after its last element, see the `append` and `extend` methods.
3982        """
3983        mod = self.model.clone(endog, k_endog_monthly=k_endog_monthly,
3984                               endog_quarterly=endog_quarterly,
3985                               retain_standardization=retain_standardization,
3986                               **kwargs)
3987        if copy_initialization:
3988            res = self.filter_results
3989            init = initialization.Initialization(
3990                self.model.k_states, 'known', constant=res.initial_state,
3991                stationary_cov=res.initial_state_cov)
3992            mod.ssm.initialization = init
3993
3994        res = self._apply(mod, refit=refit, fit_kwargs=fit_kwargs, **kwargs)
3995
3996        return res
3997
3998    def summary(self, alpha=.05, start=None, title=None, model_name=None,
3999                display_params=True, display_diagnostics=False,
4000                display_params_as_list=False, truncate_endog_names=None,
4001                display_max_endog=3):
4002        """
4003        Summarize the Model.
4004
4005        Parameters
4006        ----------
4007        alpha : float, optional
4008            Significance level for the confidence intervals. Default is 0.05.
4009        start : int, optional
4010            Integer of the start observation. Default is 0.
4011        title : str, optional
4012            The title used for the summary table.
4013        model_name : str, optional
4014            The name of the model used. Default is to use model class name.
4015
4016        Returns
4017        -------
4018        summary : Summary instance
4019            This holds the summary table and text, which can be printed or
4020            converted to various output formats.
4021
4022        See Also
4023        --------
4024        statsmodels.iolib.summary.Summary
4025        """
4026        mod = self.model
4027
4028        # Default title / model name
4029        if title is None:
4030            title = 'Dynamic Factor Results'
4031        if model_name is None:
4032            model_name = self.model._model_name
4033
4034        # Get endog names
4035        endog_names = self.model._get_endog_names(
4036            truncate=truncate_endog_names)
4037
4038        # Get extra elements for top summary table
4039        extra_top_left = None
4040        extra_top_right = []
4041        mle_retvals = getattr(self, 'mle_retvals', None)
4042        mle_settings = getattr(self, 'mle_settings', None)
4043        if mle_settings is not None and mle_settings.method == 'em':
4044            extra_top_right += [('EM Iterations', [f'{mle_retvals.iter}'])]
4045
4046        # Get the basic summary tables
4047        summary = super().summary(
4048            alpha=alpha, start=start, title=title, model_name=model_name,
4049            display_params=(display_params and display_params_as_list),
4050            display_diagnostics=display_diagnostics,
4051            truncate_endog_names=truncate_endog_names,
4052            display_max_endog=display_max_endog,
4053            extra_top_left=extra_top_left, extra_top_right=extra_top_right)
4054
4055        # Get tables of parameters
4056        table_ix = 1
4057        if not display_params_as_list:
4058
4059            # Observation equation table
4060            data = pd.DataFrame(
4061                self.filter_results.design[:, mod._s['factors_L1'], 0],
4062                index=endog_names, columns=mod.factor_names)
4063            data = data.applymap(lambda s: '%.2f' % s)
4064
4065            # Idiosyncratic terms
4066            # data['   '] = '   '
4067            k_idio = 1
4068            if mod.idiosyncratic_ar1:
4069                data['   idiosyncratic: AR(1)'] = (
4070                    self.params[mod._p['idiosyncratic_ar1']])
4071                k_idio += 1
4072            data['var.'] = self.params[mod._p['idiosyncratic_var']]
4073            data.iloc[:, -k_idio:] = data.iloc[:, -k_idio:].applymap(
4074                lambda s: '%.2f' % s)
4075
4076            data.index.name = 'Factor loadings:'
4077
4078            # Clear entries for non-loading factors
4079            base_iloc = np.arange(mod.k_factors)
4080            for i in range(mod.k_endog):
4081                iloc = [j for j in base_iloc
4082                        if j not in mod._s.endog_factor_iloc[i]]
4083                data.iloc[i, iloc] = '.'
4084
4085            data = data.reset_index()
4086
4087            # Build the table
4088            params_data = data.values
4089            params_header = data.columns.tolist()
4090            params_stubs = None
4091
4092            title = 'Observation equation:'
4093            table = SimpleTable(
4094                params_data, params_header, params_stubs,
4095                txt_fmt=fmt_params, title=title)
4096            summary.tables.insert(table_ix, table)
4097            table_ix += 1
4098
4099            # Factor transitions
4100            ix1 = 0
4101            ix2 = 0
4102            for i in range(len(mod._s.factor_blocks)):
4103                block = mod._s.factor_blocks[i]
4104                ix2 += block.k_factors
4105
4106                T = self.filter_results.transition
4107                lag_names = []
4108                for j in range(block.factor_order):
4109                    lag_names += [f'L{j + 1}.{name}'
4110                                  for name in block.factor_names]
4111                data = pd.DataFrame(T[block.factors_L1, block.factors_ar, 0],
4112                                    index=block.factor_names,
4113                                    columns=lag_names)
4114                data.index.name = ''
4115                data = data.applymap(lambda s: '%.2f' % s)
4116
4117                Q = self.filter_results.state_cov
4118                # data[' '] = ''
4119                if block.k_factors == 1:
4120                    data['   error variance'] = Q[ix1, ix1]
4121                else:
4122                    data['   error covariance'] = block.factor_names
4123                    for j in range(block.k_factors):
4124                        data[block.factor_names[j]] = Q[ix1:ix2, ix1 + j]
4125                data.iloc[:, -block.k_factors:] = (
4126                    data.iloc[:, -block.k_factors:].applymap(
4127                        lambda s: '%.2f' % s))
4128
4129                data = data.reset_index()
4130
4131                params_data = data.values
4132                params_header = data.columns.tolist()
4133                params_stubs = None
4134
4135                title = f'Transition: Factor block {i}'
4136                table = SimpleTable(
4137                    params_data, params_header, params_stubs,
4138                    txt_fmt=fmt_params, title=title)
4139                summary.tables.insert(table_ix, table)
4140                table_ix += 1
4141
4142                ix1 = ix2
4143
4144        return summary
4145