1import os 2import numpy as np 3import pandas as pd 4from scipy.io import matlab 5 6import pytest 7from numpy.testing import assert_allclose 8 9from statsmodels.tsa.statespace import initialization, dynamic_factor_mq 10 11# Load dataset 12current_path = os.path.dirname(os.path.abspath(__file__)) 13results_path = os.path.join(current_path, 'results', 'frbny_nowcast') 14data_path = os.path.join(results_path, 'Nowcasting', 'data', 'US') 15us_data = pd.read_csv(os.path.join(data_path, '2016-06-29.csv')) 16us_data.index = pd.PeriodIndex(us_data.Date.tolist(), freq='M') 17del us_data['Date'] 18us_data_update = pd.read_csv(os.path.join(data_path, '2016-07-29.csv')) 19us_data_update.index = pd.PeriodIndex(us_data_update.Date.tolist(), freq='M') 20del us_data_update['Date'] 21 22# Some definitions to re-use 23BLOCK_FACTORS_KP1 = { 24 'CPIAUCSL': ['global', 'test1'], 25 'UNRATE': ['global', 'test2'], 26 'PAYEMS': ['global', 'test2'], 27 'RSAFS': ['global', 'test1', 'test2'], 28 'TTLCONS': ['global', 'test1'], 29 'TCU': ['global', 'test2'], 30 'GDPC1': ['global', 'test1', 'test2'], 31 'ULCNFB': ['global'], 32} 33BLOCK_FACTOR_ORDERS_KP1 = { 34 'global': 1, 35 'test1': 1, 36 'test2': 1 37} 38BLOCK_FACTOR_ORDERS_KP2 = { 39 'global': 2, 40 'test1': 2, 41 'test2': 2 42} 43BLOCK_FACTOR_MULTIPLICITIES_KP2 = { 44 'global': 2, 45 'test1': 2, 46 'test2': 2 47} 48 49 50@pytest.fixture(scope="module") 51def matlab_results(): 52 # Get estimation output from FRBNY programs 53 results = {} 54 55 # DFM results with a single block of factors 56 for run in ['111', '112', '11F', '221', '222', '22F']: 57 res = matlab.loadmat(os.path.join(results_path, f'test_dfm_{run}.mat')) 58 59 # The FRBNY version orders the idiosyncratic AR(1) factors differently, 60 # so we need to re-order the initial state mean and covariance matrix 61 k_factors = res['Spec']['k'][0, 0][0, 0] 62 factor_orders = res['Spec']['p'][0, 0][0, 0] 63 _factor_orders = max(5, factor_orders) 64 65 idio = k_factors * _factor_orders + 3 66 ix = np.r_[np.arange(idio), 67 idio + np.arange(10).reshape(2, 5).ravel(order='F')] 68 initial_state = res['Res']['Z_0'][0, 0][:, 0][ix] 69 initial_state_cov = res['Res']['V_0'][0, 0][ix][:, ix] 70 71 # In the 2-factor case, we want both factors in a single block 72 if k_factors == 2: 73 factor_orders = {('0', '1'): factor_orders} 74 75 results[run] = { 76 'k_endog_M': 3, 77 'factors': k_factors, 78 'factor_orders': factor_orders, 79 'factor_multiplicities': None, 80 'params': res['params'][:, 0], 81 'llf': res['Res']['loglik'][0, 0][0, 0], 82 'initial_state': initial_state, 83 'initial_state_cov': initial_state_cov, 84 'smoothed_forecasts': res['Res']['x_sm'][0, 0]} 85 86 # News output with a single block of factors 87 for run in ['112', '222']: 88 res = matlab.loadmat(os.path.join(results_path, 89 f'test_news_{run}.mat')) 90 91 # The FRBNY version orders the idiosyncratic AR(1) factors differently, 92 # so we need to re-order the initial state mean and covariance matrix 93 k_factors = res['Spec']['k'][0, 0][0, 0] 94 factor_orders = res['Spec']['p'][0, 0][0, 0] 95 _factor_orders = max(5, factor_orders) 96 97 idio = k_factors * _factor_orders + 3 98 ix = np.r_[np.arange(idio), 99 idio + np.arange(10).reshape(2, 5).ravel(order='F')] 100 initial_state = res['Res']['Z_0'][0, 0][:, 0][ix] 101 initial_state_cov = res['Res']['V_0'][0, 0][ix][:, ix] 102 103 # In the 2-factor case, we want both factors in a single block 104 if k_factors == 2: 105 factor_orders = {('0', '1'): factor_orders} 106 107 results[f'news_{run}'] = { 108 'k_endog_M': 3, 109 'factors': k_factors, 110 'factor_orders': factor_orders, 111 'factor_multiplicities': None, 112 'params': res['params'][:, 0], 113 'initial_state': initial_state, 114 'initial_state_cov': initial_state_cov, 115 'revision_impacts': res['Res']['impact_revisions'][0, 0], 116 'weight': res['Res']['weight'], 117 'news_table': res['Res']['news_table'][0, 0]} 118 119 # DFM results with three blocks of factors 120 for run in ['111', '112', '221', '222']: 121 res = matlab.loadmat(os.path.join(results_path, 122 f'test_dfm_blocks_{run}.mat')) 123 124 # The FRBNY version orders the idiosyncratic AR(1) factors differently, 125 # so we need to re-order the initial state mean and covariance matrix 126 k_factors = res['Spec']['k'][0, 0][0, 0] 127 factor_order = res['Spec']['p'][0, 0][0, 0] 128 _factor_order = max(5, factor_order) 129 130 idio = 3 * k_factors * _factor_order + 6 131 ix = np.r_[np.arange(idio), 132 idio + np.arange(10).reshape(2, 5).ravel(order='F')] 133 initial_state = res['Res']['Z_0'][0, 0][:, 0][ix] 134 initial_state_cov = res['Res']['V_0'][0, 0][ix][:, ix] 135 136 # Setup factors / blocks 137 if k_factors == 1: 138 factors = BLOCK_FACTORS_KP1.copy() 139 factor_orders = BLOCK_FACTOR_ORDERS_KP1.copy() 140 factor_multiplicities = None 141 else: 142 factors = BLOCK_FACTORS_KP1.copy() 143 factor_orders = BLOCK_FACTOR_ORDERS_KP2.copy() 144 factor_multiplicities = BLOCK_FACTOR_MULTIPLICITIES_KP2.copy() 145 146 results[f'block_{run}'] = { 147 'k_endog_M': 6, 148 'factors': factors, 149 'factor_orders': factor_orders, 150 'factor_multiplicities': factor_multiplicities, 151 'params': res['params'][:, 0], 152 'llf': res['Res']['loglik'][0, 0][0, 0], 153 'initial_state': initial_state, 154 'initial_state_cov': initial_state_cov, 155 'smoothed_forecasts': res['Res']['x_sm'][0, 0]} 156 157 # News output with three blocks of factors 158 for run in ['112', '222']: 159 res = matlab.loadmat(os.path.join(results_path, 160 f'test_news_blocks_{run}.mat')) 161 162 # The FRBNY version orders the idiosyncratic AR(1) factors differently, 163 # so we need to re-order the initial state mean and covariance matrix 164 k_factors = res['Spec']['k'][0, 0][0, 0] 165 factor_order = res['Spec']['p'][0, 0][0, 0] 166 _factor_order = max(5, factor_order) 167 168 idio = 3 * k_factors * _factor_order + 6 169 ix = np.r_[np.arange(idio), 170 idio + np.arange(10).reshape(2, 5).ravel(order='F')] 171 initial_state = res['Res']['Z_0'][0, 0][:, 0][ix] 172 initial_state_cov = res['Res']['V_0'][0, 0][ix][:, ix] 173 174 # Setup factors / blocks 175 if k_factors == 1: 176 factors = BLOCK_FACTORS_KP1.copy() 177 factor_orders = BLOCK_FACTOR_ORDERS_KP1.copy() 178 factor_multiplicities = None 179 else: 180 factors = BLOCK_FACTORS_KP1.copy() 181 factor_orders = BLOCK_FACTOR_ORDERS_KP2.copy() 182 factor_multiplicities = BLOCK_FACTOR_MULTIPLICITIES_KP2.copy() 183 184 results[f'news_block_{run}'] = { 185 'k_endog_M': 6, 186 'factors': factors, 187 'factor_orders': factor_orders, 188 'factor_multiplicities': factor_multiplicities, 189 'params': res['params'][:, 0], 190 'initial_state': initial_state, 191 'initial_state_cov': initial_state_cov, 192 'revision_impacts': res['Res']['impact_revisions'][0, 0], 193 'weight': res['Res']['weight'], 194 'news_table': res['Res']['news_table'][0, 0]} 195 196 # Construct the test dataset 197 def get_data(us_data, mean_M=None, std_M=None, mean_Q=None, std_Q=None): 198 dta_M = us_data[['CPIAUCSL', 'UNRATE', 'PAYEMS', 'RSAFS', 'TTLCONS', 199 'TCU']].copy() 200 dta_Q = us_data[['GDPC1', 'ULCNFB']].copy().resample('Q').last() 201 202 dta_M['CPIAUCSL'] = (dta_M['CPIAUCSL'] / 203 dta_M['CPIAUCSL'].shift(1) - 1) * 100 204 dta_M['UNRATE'] = dta_M['UNRATE'].diff() 205 dta_M['PAYEMS'] = dta_M['PAYEMS'].diff() 206 dta_M['TCU'] = dta_M['TCU'].diff() 207 dta_M['RSAFS'] = (dta_M['RSAFS'] / 208 dta_M['RSAFS'].shift(1) - 1) * 100 209 dta_M['TTLCONS'] = (dta_M['TTLCONS'] / 210 dta_M['TTLCONS'].shift(1) - 1) * 100 211 dta_Q = ((dta_Q / dta_Q.shift(1))**4 - 1) * 100 212 213 start = '2000' 214 dta_M = dta_M.loc[start:] 215 dta_Q = dta_Q.loc[start:] 216 217 first_ix = dta_M.first_valid_index() 218 last_ix = dta_M.last_valid_index() 219 dta_M = dta_M.loc[first_ix:last_ix] 220 221 first_ix = dta_Q.first_valid_index() 222 last_ix = dta_Q.last_valid_index() 223 dta_Q = dta_Q.loc[first_ix:last_ix] 224 225 return dta_M, dta_Q 226 227 # Usual test dataset 228 endog_M, endog_Q = get_data(us_data) 229 # Updated test dataset (for computing the news) 230 updated_M, updated_Q = get_data(us_data_update) 231 232 return endog_M, endog_Q, results, updated_M, updated_Q 233 234 235@pytest.mark.parametrize("run", ['111', '112', '11F', '221', '222', '22F', 236 'block_111', 'block_112', 'block_221', 237 'block_222']) 238def test_known(matlab_results, run): 239 endog_M, endog_Q = matlab_results[:2] 240 results = matlab_results[2][run] 241 242 # Construct the model 243 mod = dynamic_factor_mq.DynamicFactorMQ( 244 endog_M.iloc[:, :results['k_endog_M']], endog_quarterly=endog_Q, 245 factors=results['factors'], 246 factor_orders=results['factor_orders'], 247 factor_multiplicities=results['factor_multiplicities'], 248 idiosyncratic_ar1=True, init_t0=True, obs_cov_diag=True, 249 standardize=True) 250 251 mod.initialize_known(results['initial_state'], 252 results['initial_state_cov']) 253 res = mod.smooth(results['params'], cov_type='none') 254 assert_allclose(res.llf - mod.loglike_constant, results['llf']) 255 assert_allclose(res.filter_results.smoothed_forecasts.T[1:], 256 results['smoothed_forecasts'][:-1]) 257 assert_allclose(res.forecast(1, original_scale=False).iloc[0], 258 results['smoothed_forecasts'][-1]) 259 260 261@pytest.mark.parametrize("run", ['11', '22', 'block_11', 'block_22']) 262def test_emstep1(matlab_results, run): 263 # Test that our EM step gets params2 from params1 264 # Uses our default method for the observation equation, which is an 265 # optimized version of the method presented in Bańbura and Modugno (2014) 266 # (e.g. our version doesn't require the loop over T or the Kronecker 267 # product) 268 endog_M, endog_Q = matlab_results[:2] 269 results1 = matlab_results[2][f'{run}1'] 270 results2 = matlab_results[2][f'{run}2'] 271 272 # Construct the model 273 mod = dynamic_factor_mq.DynamicFactorMQ( 274 endog_M.iloc[:, :results1['k_endog_M']], endog_quarterly=endog_Q, 275 factors=results1['factors'], 276 factor_orders=results1['factor_orders'], 277 factor_multiplicities=results1['factor_multiplicities'], 278 idiosyncratic_ar1=True, init_t0=True, obs_cov_diag=True, 279 standardize=True) 280 281 init = initialization.Initialization( 282 mod.k_states, 'known', constant=results1['initial_state'], 283 stationary_cov=results1['initial_state_cov']) 284 res2, params2 = mod._em_iteration(results1['params'], init=init, 285 mstep_method='missing') 286 287 # Test parameters 288 true2 = results2['params'] 289 assert_allclose(params2[mod._p['loadings']], true2[mod._p['loadings']]) 290 assert_allclose(params2[mod._p['factor_ar']], true2[mod._p['factor_ar']]) 291 assert_allclose(params2[mod._p['factor_cov']], true2[mod._p['factor_cov']]) 292 assert_allclose(params2[mod._p['idiosyncratic_ar1']], 293 true2[mod._p['idiosyncratic_ar1']]) 294 assert_allclose(params2[mod._p['idiosyncratic_var']], 295 true2[mod._p['idiosyncratic_var']]) 296 297 298@pytest.mark.parametrize( 299 "k_factors,factor_orders,factor_multiplicities,idiosyncratic_ar1", 300 [(1, 1, 1, True), (3, 1, 1, True), (1, 6, 1, True), (3, 6, 1, True), 301 (1, 1, 1, False), (3, 1, 1, False), (1, 6, 1, False), (3, 6, 1, False), 302 (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP1.copy(), 1, True), 303 (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP1.copy(), 1, False), 304 (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP2.copy(), 305 BLOCK_FACTOR_MULTIPLICITIES_KP2, True), 306 (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP2.copy(), 307 BLOCK_FACTOR_MULTIPLICITIES_KP2, False)]) 308@pytest.mark.filterwarnings('ignore::UserWarning') 309def test_emstep_methods_missing(matlab_results, k_factors, factor_orders, 310 factor_multiplicities, idiosyncratic_ar1): 311 # Test that in the case of missing data, the direct and optimized EM step 312 # methods for the observation equation give identical results across a 313 # variety of parameterizations 314 endog_M = matlab_results[0].iloc[:, :10] 315 endog_Q = matlab_results[1].iloc[:, :10] 316 317 # Construct the model 318 mod = dynamic_factor_mq.DynamicFactorMQ( 319 endog_M, endog_quarterly=endog_Q, factors=k_factors, 320 factor_orders=factor_orders, 321 factor_multiplicities=factor_multiplicities, 322 idiosyncratic_ar1=idiosyncratic_ar1, standardize=True) 323 mod.ssm.filter_univariate = True 324 325 params0 = mod.start_params 326 _, params1 = mod._em_iteration(params0, mstep_method='missing') 327 328 # Now double-check the observation equation M step for identical H and 329 # Lambda directly 330 mod.update(params1) 331 res = mod.ssm.smooth() 332 333 a = res.smoothed_state.T[..., None] 334 cov_a = res.smoothed_state_cov.transpose(2, 0, 1) 335 Eaa = cov_a + np.matmul(a, a.transpose(0, 2, 1)) 336 337 Lambda, H = mod._em_maximization_obs_missing(res, Eaa, a, compute_H=True) 338 339 340@pytest.mark.parametrize( 341 "k_factors,factor_orders,factor_multiplicities,idiosyncratic_ar1", 342 [(1, 1, 1, True), (3, 1, 1, True), (1, 6, 1, True), 343 (3, {('0', '1', '2'): 6}, 1, True), 344 (1, 1, 1, False), (3, 1, 1, False), (1, 6, 1, False), 345 (3, {('0', '1', '2'): 6}, 1, False), 346 (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP1.copy(), 1, True), 347 (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP1.copy(), 1, False), 348 (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP2.copy(), 349 BLOCK_FACTOR_MULTIPLICITIES_KP2, True), 350 (BLOCK_FACTORS_KP1.copy(), BLOCK_FACTOR_ORDERS_KP2.copy(), 351 BLOCK_FACTOR_MULTIPLICITIES_KP2, False)]) 352@pytest.mark.filterwarnings('ignore::UserWarning') 353def test_emstep_methods_nonmissing(matlab_results, k_factors, factor_orders, 354 factor_multiplicities, idiosyncratic_ar1): 355 # Test that in the case of non-missing data, our three EM step methods for 356 # the observation equation (nonmissing, missing_direct, missing) give 357 # identical results across a variety of parameterizations 358 # Note that including quarterly series will always imply missing values, 359 # so we have to only provide monthly series 360 dta_M = matlab_results[0].iloc[:, :8] 361 dta_M = (dta_M - dta_M.mean()) / dta_M.std() 362 endog_M = dta_M.interpolate().fillna(method='backfill') 363 364 # Remove the quarterly endog->factor maps 365 if isinstance(k_factors, dict): 366 if 'GDPC1' in k_factors: 367 del k_factors['GDPC1'] 368 if 'ULCNFB' in k_factors: 369 del k_factors['ULCNFB'] 370 371 # Construct the model 372 mod = dynamic_factor_mq.DynamicFactorMQ( 373 endog_M, factors=k_factors, factor_orders=factor_orders, 374 factor_multiplicities=factor_multiplicities, 375 idiosyncratic_ar1=idiosyncratic_ar1) 376 mod.ssm.filter_univariate = True 377 378 params0 = mod.start_params 379 _, params1 = mod._em_iteration(params0, mstep_method='missing') 380 _, params1_nonmissing = mod._em_iteration( 381 params0, mstep_method='nonmissing') 382 383 assert_allclose(params1_nonmissing, params1, atol=1e-13) 384 385 # Now double-check the observation equation M step for identical H and 386 # Lambda directly 387 mod.update(params1) 388 res = mod.ssm.smooth() 389 390 a = res.smoothed_state.T[..., None] 391 cov_a = res.smoothed_state_cov.transpose(2, 0, 1) 392 Eaa = cov_a + np.matmul(a, a.transpose(0, 2, 1)) 393 394 Lambda, H = mod._em_maximization_obs_missing(res, Eaa, a, compute_H=True) 395 Lambda_nonmissing, H_nonmissing = mod._em_maximization_obs_nonmissing( 396 res, Eaa, a, compute_H=True) 397 398 assert_allclose(Lambda_nonmissing, Lambda, atol=1e-13) 399 assert_allclose(H_nonmissing, H, atol=1e-13) 400 401 402@pytest.mark.parametrize("run", ['news_112', 'news_222', 'news_block_112', 403 'news_block_222']) 404def test_news(matlab_results, run): 405 endog_M, endog_Q = matlab_results[:2] 406 results = matlab_results[2][run] 407 updated_M, updated_Q = matlab_results[-2:] 408 409 # Construct the base model 410 mod1 = dynamic_factor_mq.DynamicFactorMQ( 411 endog_M.iloc[:, :results['k_endog_M']], endog_quarterly=endog_Q, 412 factors=results['factors'], 413 factor_orders=results['factor_orders'], 414 factor_multiplicities=results['factor_multiplicities'], 415 idiosyncratic_ar1=True, init_t0=True, obs_cov_diag=True, 416 standardize=True) 417 418 mod1.initialize_known(results['initial_state'], 419 results['initial_state_cov']) 420 res1 = mod1.smooth(results['params'], cov_type='none') 421 422 # Construct the updated model 423 res2 = res1.apply(updated_M.iloc[:, :results['k_endog_M']], 424 endog_quarterly=updated_Q, retain_standardization=True) 425 426 # Compute the news 427 news = res2.news(res1, impact_date='2016-09', comparison_type='previous') 428 429 print(news.revision_impacts.loc['2016-09', 'GDPC1']) 430 assert_allclose(news.revision_impacts.loc['2016-09', 'GDPC1'], 431 results['revision_impacts']) 432 433 columns = ['forecast (prev)', 'observed', 'weight', 'impact'] 434 actual = news.details_by_impact.loc['2016-09', 'GDPC1'][columns] 435 assert_allclose(actual.loc[('2016-06', 'CPIAUCSL')], 436 results['news_table'][0]) 437 assert_allclose(actual.loc[('2016-06', 'UNRATE')], 438 results['news_table'][1]) 439 assert_allclose(actual.loc[('2016-06', 'PAYEMS')], 440 results['news_table'][2]) 441 if mod1.k_endog_M == 6: 442 i = 6 443 assert_allclose(actual.loc[('2016-06', 'RSAFS')], 444 results['news_table'][3]) 445 assert_allclose(actual.loc[('2016-05', 'TTLCONS')], 446 results['news_table'][4]) 447 assert_allclose(actual.loc[('2016-06', 'TCU')], 448 results['news_table'][5]) 449 else: 450 i = 3 451 assert_allclose(actual.loc[('2016-06', 'GDPC1')], 452 results['news_table'][i]) 453