1import os
2import numpy as np
3import pandas as pd
4from scipy.io import matlab
5
6import pytest
7from numpy.testing import assert_allclose
8
9from statsmodels.tsa.statespace import initialization, dynamic_factor_mq
10
11# Load dataset
12current_path = os.path.dirname(os.path.abspath(__file__))
13results_path = os.path.join(current_path, 'results', 'frbny_nowcast')
14data_path = os.path.join(results_path, 'Nowcasting', 'data', 'US')
15us_data = pd.read_csv(os.path.join(data_path, '2016-06-29.csv'))
16us_data.index = pd.PeriodIndex(us_data.Date.tolist(), freq='M')
17del us_data['Date']
18us_data_update = pd.read_csv(os.path.join(data_path, '2016-07-29.csv'))
19us_data_update.index = pd.PeriodIndex(us_data_update.Date.tolist(), freq='M')
20del us_data_update['Date']
21
22# Some definitions to re-use
23BLOCK_FACTORS_KP1 = {
24    'CPIAUCSL': ['global', 'test1'],
25    'UNRATE': ['global', 'test2'],
26    'PAYEMS': ['global', 'test2'],
27    'RSAFS': ['global', 'test1', 'test2'],
28    'TTLCONS': ['global', 'test1'],
29    'TCU': ['global', 'test2'],
30    'GDPC1': ['global', 'test1', 'test2'],
31    'ULCNFB': ['global'],
32}
33BLOCK_FACTOR_ORDERS_KP1 = {
34    'global': 1,
35    'test1': 1,
36    'test2': 1
37}
38BLOCK_FACTOR_ORDERS_KP2 = {
39    'global': 2,
40    'test1': 2,
41    'test2': 2
42}
43BLOCK_FACTOR_MULTIPLICITIES_KP2 = {
44    'global': 2,
45    'test1': 2,
46    'test2': 2
47}
48
49
50@pytest.fixture(scope="module")
51def matlab_results():
52    # Get estimation output from FRBNY programs
53    results = {}
54
55    # DFM results with a single block of factors
56    for run in ['111', '112', '11F', '221', '222', '22F']:
57        res = matlab.loadmat(os.path.join(results_path, f'test_dfm_{run}.mat'))
58
59        # The FRBNY version orders the idiosyncratic AR(1) factors differently,
60        # so we need to re-order the initial state mean and covariance matrix
61        k_factors = res['Spec']['k'][0, 0][0, 0]
62        factor_orders = res['Spec']['p'][0, 0][0, 0]
63        _factor_orders = max(5, factor_orders)
64
65        idio = k_factors * _factor_orders + 3
66        ix = np.r_[np.arange(idio),
67                   idio + np.arange(10).reshape(2, 5).ravel(order='F')]
68        initial_state = res['Res']['Z_0'][0, 0][:, 0][ix]
69        initial_state_cov = res['Res']['V_0'][0, 0][ix][:, ix]
70
71        # In the 2-factor case, we want both factors in a single block
72        if k_factors == 2:
73            factor_orders = {('0', '1'): factor_orders}
74
75        results[run] = {
76            'k_endog_M': 3,
77            'factors': k_factors,
78            'factor_orders': factor_orders,
79            'factor_multiplicities': None,
80            'params': res['params'][:, 0],
81            'llf': res['Res']['loglik'][0, 0][0, 0],
82            'initial_state': initial_state,
83            'initial_state_cov': initial_state_cov,
84            'smoothed_forecasts': res['Res']['x_sm'][0, 0]}
85
86    # News output with a single block of factors
87    for run in ['112', '222']:
88        res = matlab.loadmat(os.path.join(results_path,
89                                          f'test_news_{run}.mat'))
90
91        # The FRBNY version orders the idiosyncratic AR(1) factors differently,
92        # so we need to re-order the initial state mean and covariance matrix
93        k_factors = res['Spec']['k'][0, 0][0, 0]
94        factor_orders = res['Spec']['p'][0, 0][0, 0]
95        _factor_orders = max(5, factor_orders)
96
97        idio = k_factors * _factor_orders + 3
98        ix = np.r_[np.arange(idio),
99                   idio + np.arange(10).reshape(2, 5).ravel(order='F')]
100        initial_state = res['Res']['Z_0'][0, 0][:, 0][ix]
101        initial_state_cov = res['Res']['V_0'][0, 0][ix][:, ix]
102
103        # In the 2-factor case, we want both factors in a single block
104        if k_factors == 2:
105            factor_orders = {('0', '1'): factor_orders}
106
107        results[f'news_{run}'] = {
108            'k_endog_M': 3,
109            'factors': k_factors,
110            'factor_orders': factor_orders,
111            'factor_multiplicities': None,
112            'params': res['params'][:, 0],
113            'initial_state': initial_state,
114            'initial_state_cov': initial_state_cov,
115            'revision_impacts': res['Res']['impact_revisions'][0, 0],
116            'weight': res['Res']['weight'],
117            'news_table': res['Res']['news_table'][0, 0]}
118
119    # DFM results with three blocks of factors
120    for run in ['111', '112', '221', '222']:
121        res = matlab.loadmat(os.path.join(results_path,
122                                          f'test_dfm_blocks_{run}.mat'))
123
124        # The FRBNY version orders the idiosyncratic AR(1) factors differently,
125        # so we need to re-order the initial state mean and covariance matrix
126        k_factors = res['Spec']['k'][0, 0][0, 0]
127        factor_order = res['Spec']['p'][0, 0][0, 0]
128        _factor_order = max(5, factor_order)
129
130        idio = 3 * k_factors * _factor_order + 6
131        ix = np.r_[np.arange(idio),
132                   idio + np.arange(10).reshape(2, 5).ravel(order='F')]
133        initial_state = res['Res']['Z_0'][0, 0][:, 0][ix]
134        initial_state_cov = res['Res']['V_0'][0, 0][ix][:, ix]
135
136        # Setup factors / blocks
137        if k_factors == 1:
138            factors = BLOCK_FACTORS_KP1.copy()
139            factor_orders = BLOCK_FACTOR_ORDERS_KP1.copy()
140            factor_multiplicities = None
141        else:
142            factors = BLOCK_FACTORS_KP1.copy()
143            factor_orders = BLOCK_FACTOR_ORDERS_KP2.copy()
144            factor_multiplicities = BLOCK_FACTOR_MULTIPLICITIES_KP2.copy()
145
146        results[f'block_{run}'] = {
147            'k_endog_M': 6,
148            'factors': factors,
149            'factor_orders': factor_orders,
150            'factor_multiplicities': factor_multiplicities,
151            'params': res['params'][:, 0],
152            'llf': res['Res']['loglik'][0, 0][0, 0],
153            'initial_state': initial_state,
154            'initial_state_cov': initial_state_cov,
155            'smoothed_forecasts': res['Res']['x_sm'][0, 0]}
156
157    # News output with three blocks of factors
158    for run in ['112', '222']:
159        res = matlab.loadmat(os.path.join(results_path,
160                                          f'test_news_blocks_{run}.mat'))
161
162        # The FRBNY version orders the idiosyncratic AR(1) factors differently,
163        # so we need to re-order the initial state mean and covariance matrix
164        k_factors = res['Spec']['k'][0, 0][0, 0]
165        factor_order = res['Spec']['p'][0, 0][0, 0]
166        _factor_order = max(5, factor_order)
167
168        idio = 3 * k_factors * _factor_order + 6
169        ix = np.r_[np.arange(idio),
170                   idio + np.arange(10).reshape(2, 5).ravel(order='F')]
171        initial_state = res['Res']['Z_0'][0, 0][:, 0][ix]
172        initial_state_cov = res['Res']['V_0'][0, 0][ix][:, ix]
173
174        # Setup factors / blocks
175        if k_factors == 1:
176            factors = BLOCK_FACTORS_KP1.copy()
177            factor_orders = BLOCK_FACTOR_ORDERS_KP1.copy()
178            factor_multiplicities = None
179        else:
180            factors = BLOCK_FACTORS_KP1.copy()
181            factor_orders = BLOCK_FACTOR_ORDERS_KP2.copy()
182            factor_multiplicities = BLOCK_FACTOR_MULTIPLICITIES_KP2.copy()
183
184        results[f'news_block_{run}'] = {
185            'k_endog_M': 6,
186            'factors': factors,
187            'factor_orders': factor_orders,
188            'factor_multiplicities': factor_multiplicities,
189            'params': res['params'][:, 0],
190            'initial_state': initial_state,
191            'initial_state_cov': initial_state_cov,
192            'revision_impacts': res['Res']['impact_revisions'][0, 0],
193            'weight': res['Res']['weight'],
194            'news_table': res['Res']['news_table'][0, 0]}
195
196    # Construct the test dataset
197    def get_data(us_data, mean_M=None, std_M=None, mean_Q=None, std_Q=None):
198        dta_M = us_data[['CPIAUCSL', 'UNRATE', 'PAYEMS', 'RSAFS', 'TTLCONS',
199                         'TCU']].copy()
200        dta_Q = us_data[['GDPC1', 'ULCNFB']].copy().resample('Q').last()
201
202        dta_M['CPIAUCSL'] = (dta_M['CPIAUCSL'] /
203                             dta_M['CPIAUCSL'].shift(1) - 1) * 100
204        dta_M['UNRATE'] = dta_M['UNRATE'].diff()
205        dta_M['PAYEMS'] = dta_M['PAYEMS'].diff()
206        dta_M['TCU'] = dta_M['TCU'].diff()
207        dta_M['RSAFS'] = (dta_M['RSAFS'] /
208                          dta_M['RSAFS'].shift(1) - 1) * 100
209        dta_M['TTLCONS'] = (dta_M['TTLCONS'] /
210                            dta_M['TTLCONS'].shift(1) - 1) * 100
211        dta_Q = ((dta_Q / dta_Q.shift(1))**4 - 1) * 100
212
213        start = '2000'
214        dta_M = dta_M.loc[start:]
215        dta_Q = dta_Q.loc[start:]
216
217        first_ix = dta_M.first_valid_index()
218        last_ix = dta_M.last_valid_index()
219        dta_M = dta_M.loc[first_ix:last_ix]
220
221        first_ix = dta_Q.first_valid_index()
222        last_ix = dta_Q.last_valid_index()
223        dta_Q = dta_Q.loc[first_ix:last_ix]
224
225        return dta_M, dta_Q
226
227    # Usual test dataset
228    endog_M, endog_Q = get_data(us_data)
229    # Updated test dataset (for computing the news)
230    updated_M, updated_Q = get_data(us_data_update)
231
232    return endog_M, endog_Q, results, updated_M, updated_Q
233
234
235@pytest.mark.parametrize("run", ['111', '112', '11F', '221', '222', '22F',
236                                 'block_111', 'block_112', 'block_221',
237                                 'block_222'])
238def test_known(matlab_results, run):
239    endog_M, endog_Q = matlab_results[:2]
240    results = matlab_results[2][run]
241
242    # Construct the model
243    mod = dynamic_factor_mq.DynamicFactorMQ(
244            endog_M.iloc[:, :results['k_endog_M']], endog_quarterly=endog_Q,
245            factors=results['factors'],
246            factor_orders=results['factor_orders'],
247            factor_multiplicities=results['factor_multiplicities'],
248            idiosyncratic_ar1=True, init_t0=True, obs_cov_diag=True,
249            standardize=True)
250
251    mod.initialize_known(results['initial_state'],
252                         results['initial_state_cov'])
253    res = mod.smooth(results['params'], cov_type='none')
254    assert_allclose(res.llf - mod.loglike_constant, results['llf'])
255    assert_allclose(res.filter_results.smoothed_forecasts.T[1:],
256                    results['smoothed_forecasts'][:-1])
257    assert_allclose(res.forecast(1, original_scale=False).iloc[0],
258                    results['smoothed_forecasts'][-1])
259
260
261@pytest.mark.parametrize("run", ['11', '22', 'block_11', 'block_22'])
262def test_emstep1(matlab_results, run):
263    # Test that our EM step gets params2 from params1
264    # Uses our default method for the observation equation, which is an
265    # optimized version of the method presented in Bańbura and Modugno (2014)
266    # (e.g. our version doesn't require the loop over T or the Kronecker
267    # product)
268    endog_M, endog_Q = matlab_results[:2]
269    results1 = matlab_results[2][f'{run}1']
270    results2 = matlab_results[2][f'{run}2']
271
272    # Construct the model
273    mod = dynamic_factor_mq.DynamicFactorMQ(
274            endog_M.iloc[:, :results1['k_endog_M']], endog_quarterly=endog_Q,
275            factors=results1['factors'],
276            factor_orders=results1['factor_orders'],
277            factor_multiplicities=results1['factor_multiplicities'],
278            idiosyncratic_ar1=True, init_t0=True, obs_cov_diag=True,
279            standardize=True)
280
281    init = initialization.Initialization(
282        mod.k_states, 'known', constant=results1['initial_state'],
283        stationary_cov=results1['initial_state_cov'])
284    res2, params2 = mod._em_iteration(results1['params'], init=init,
285                                      mstep_method='missing')
286
287    # Test parameters
288    true2 = results2['params']
289    assert_allclose(params2[mod._p['loadings']], true2[mod._p['loadings']])
290    assert_allclose(params2[mod._p['factor_ar']], true2[mod._p['factor_ar']])
291    assert_allclose(params2[mod._p['factor_cov']], true2[mod._p['factor_cov']])
292    assert_allclose(params2[mod._p['idiosyncratic_ar1']],
293                    true2[mod._p['idiosyncratic_ar1']])
294    assert_allclose(params2[mod._p['idiosyncratic_var']],
295                    true2[mod._p['idiosyncratic_var']])
296
297
298@pytest.mark.parametrize(
299    "k_factors,factor_orders,factor_multiplicities,idiosyncratic_ar1",
300    [(1, 1, 1, True), (3, 1, 1, True), (1, 6, 1, True), (3, 6, 1, True),
301     (1, 1, 1, False), (3, 1, 1, False), (1, 6, 1, False), (3, 6, 1, False),
302     (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP1.copy(), 1, True),
303     (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP1.copy(), 1, False),
304     (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP2.copy(),
305        BLOCK_FACTOR_MULTIPLICITIES_KP2, True),
306     (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP2.copy(),
307        BLOCK_FACTOR_MULTIPLICITIES_KP2, False)])
308@pytest.mark.filterwarnings('ignore::UserWarning')
309def test_emstep_methods_missing(matlab_results, k_factors, factor_orders,
310                                factor_multiplicities, idiosyncratic_ar1):
311    # Test that in the case of missing data, the direct and optimized EM step
312    # methods for the observation equation give identical results across a
313    # variety of parameterizations
314    endog_M = matlab_results[0].iloc[:, :10]
315    endog_Q = matlab_results[1].iloc[:, :10]
316
317    # Construct the model
318    mod = dynamic_factor_mq.DynamicFactorMQ(
319            endog_M, endog_quarterly=endog_Q, factors=k_factors,
320            factor_orders=factor_orders,
321            factor_multiplicities=factor_multiplicities,
322            idiosyncratic_ar1=idiosyncratic_ar1, standardize=True)
323    mod.ssm.filter_univariate = True
324
325    params0 = mod.start_params
326    _, params1 = mod._em_iteration(params0, mstep_method='missing')
327
328    # Now double-check the observation equation M step for identical H and
329    # Lambda directly
330    mod.update(params1)
331    res = mod.ssm.smooth()
332
333    a = res.smoothed_state.T[..., None]
334    cov_a = res.smoothed_state_cov.transpose(2, 0, 1)
335    Eaa = cov_a + np.matmul(a, a.transpose(0, 2, 1))
336
337    Lambda, H = mod._em_maximization_obs_missing(res, Eaa, a, compute_H=True)
338
339
340@pytest.mark.parametrize(
341    "k_factors,factor_orders,factor_multiplicities,idiosyncratic_ar1",
342    [(1, 1, 1, True), (3, 1, 1, True), (1, 6, 1, True),
343     (3, {('0', '1', '2'): 6}, 1, True),
344     (1, 1, 1, False), (3, 1, 1, False), (1, 6, 1, False),
345     (3, {('0', '1', '2'): 6}, 1, False),
346     (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP1.copy(), 1, True),
347     (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP1.copy(), 1, False),
348     (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP2.copy(),
349        BLOCK_FACTOR_MULTIPLICITIES_KP2, True),
350     (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP2.copy(),
351        BLOCK_FACTOR_MULTIPLICITIES_KP2, False)])
352@pytest.mark.filterwarnings('ignore::UserWarning')
353def test_emstep_methods_nonmissing(matlab_results, k_factors, factor_orders,
354                                   factor_multiplicities, idiosyncratic_ar1):
355    # Test that in the case of non-missing data, our three EM step methods for
356    # the observation equation (nonmissing, missing_direct, missing) give
357    # identical results across a variety of parameterizations
358    # Note that including quarterly series will always imply missing values,
359    # so we have to only provide monthly series
360    dta_M = matlab_results[0].iloc[:, :8]
361    dta_M = (dta_M - dta_M.mean()) / dta_M.std()
362    endog_M = dta_M.interpolate().fillna(method='backfill')
363
364    # Remove the quarterly endog->factor maps
365    if isinstance(k_factors, dict):
366        if 'GDPC1' in k_factors:
367            del k_factors['GDPC1']
368        if 'ULCNFB' in k_factors:
369            del k_factors['ULCNFB']
370
371    # Construct the model
372    mod = dynamic_factor_mq.DynamicFactorMQ(
373        endog_M, factors=k_factors, factor_orders=factor_orders,
374        factor_multiplicities=factor_multiplicities,
375        idiosyncratic_ar1=idiosyncratic_ar1)
376    mod.ssm.filter_univariate = True
377
378    params0 = mod.start_params
379    _, params1 = mod._em_iteration(params0, mstep_method='missing')
380    _, params1_nonmissing = mod._em_iteration(
381        params0, mstep_method='nonmissing')
382
383    assert_allclose(params1_nonmissing, params1, atol=1e-13)
384
385    # Now double-check the observation equation M step for identical H and
386    # Lambda directly
387    mod.update(params1)
388    res = mod.ssm.smooth()
389
390    a = res.smoothed_state.T[..., None]
391    cov_a = res.smoothed_state_cov.transpose(2, 0, 1)
392    Eaa = cov_a + np.matmul(a, a.transpose(0, 2, 1))
393
394    Lambda, H = mod._em_maximization_obs_missing(res, Eaa, a, compute_H=True)
395    Lambda_nonmissing, H_nonmissing = mod._em_maximization_obs_nonmissing(
396        res, Eaa, a, compute_H=True)
397
398    assert_allclose(Lambda_nonmissing, Lambda, atol=1e-13)
399    assert_allclose(H_nonmissing, H, atol=1e-13)
400
401
402@pytest.mark.parametrize("run", ['news_112', 'news_222', 'news_block_112',
403                                 'news_block_222'])
404def test_news(matlab_results, run):
405    endog_M, endog_Q = matlab_results[:2]
406    results = matlab_results[2][run]
407    updated_M, updated_Q = matlab_results[-2:]
408
409    # Construct the base model
410    mod1 = dynamic_factor_mq.DynamicFactorMQ(
411        endog_M.iloc[:, :results['k_endog_M']], endog_quarterly=endog_Q,
412        factors=results['factors'],
413        factor_orders=results['factor_orders'],
414        factor_multiplicities=results['factor_multiplicities'],
415        idiosyncratic_ar1=True, init_t0=True, obs_cov_diag=True,
416        standardize=True)
417
418    mod1.initialize_known(results['initial_state'],
419                          results['initial_state_cov'])
420    res1 = mod1.smooth(results['params'], cov_type='none')
421
422    # Construct the updated model
423    res2 = res1.apply(updated_M.iloc[:, :results['k_endog_M']],
424                      endog_quarterly=updated_Q, retain_standardization=True)
425
426    # Compute the news
427    news = res2.news(res1, impact_date='2016-09', comparison_type='previous')
428
429    print(news.revision_impacts.loc['2016-09', 'GDPC1'])
430    assert_allclose(news.revision_impacts.loc['2016-09', 'GDPC1'],
431                    results['revision_impacts'])
432
433    columns = ['forecast (prev)', 'observed', 'weight', 'impact']
434    actual = news.details_by_impact.loc['2016-09', 'GDPC1'][columns]
435    assert_allclose(actual.loc[('2016-06', 'CPIAUCSL')],
436                    results['news_table'][0])
437    assert_allclose(actual.loc[('2016-06', 'UNRATE')],
438                    results['news_table'][1])
439    assert_allclose(actual.loc[('2016-06', 'PAYEMS')],
440                    results['news_table'][2])
441    if mod1.k_endog_M == 6:
442        i = 6
443        assert_allclose(actual.loc[('2016-06', 'RSAFS')],
444                        results['news_table'][3])
445        assert_allclose(actual.loc[('2016-05', 'TTLCONS')],
446                        results['news_table'][4])
447        assert_allclose(actual.loc[('2016-06', 'TCU')],
448                        results['news_table'][5])
449    else:
450        i = 3
451    assert_allclose(actual.loc[('2016-06', 'GDPC1')],
452                    results['news_table'][i])
453