1function Res = dfm(X,Spec,threshold,max_iter)
2%DFM()    Runs the dynamic factor model
3%
4%  Syntax:
5%    Res = DFM(X,Par)
6%
7%  Description:
8%   DFM() inputs the organized and transformed data X and parameter structure Par.
9%   Then, the function outputs dynamic factor model structure Res and data
10%   summary statistics (mean and standard deviation).
11%
12%  Input arguments:
13%    X: Kalman-smoothed data where missing values are replaced by their expectation
14%    Par: A structure containing the following parameters:
15%      Par.blocks: Block loadings.
16%      Par.nQ: Number of quarterly series
17%      Par.p: Number of lags in transition matrix
18%      Par.r: Number of common factors for each block
19%
20% Output Arguments:
21%
22%   Res - structure of model results with the following fields
23%       . X_sm | Kalman-smoothed data where missing values are replaced by their expectation
24%       . Z | Smoothed states. Rows give time, and columns are organized according to Res.C.
25%       . C | Observation matrix. The rows correspond
26%          to each series, and the columns are organized as shown below:
27%         - 1-20: These columns give the factor loadings. For example, 1-5
28%              give loadings for the first block and are organized in
29%              reverse-chronological order (f^G_t, f^G_t-1, f^G_t-2, f^G_t-3,
30%              f^G_t-4). Columns 6-10, 11-15, and 16-20 give loadings for
31%              the second, third, and fourth blocks respectively.
32%       .R: Covariance for observation matrix residuals
33%       .A: Transition matrix. This is a square matrix that follows the
34%      same organization scheme as Res.C's columns. Identity matrices are
35%      used to account for matching terms on the left and righthand side.
36%      For example, we place an I4 matrix to account for matching
37%      (f_t-1; f_t-2; f_t-3; f_t-4) terms.
38%       .Q: Covariance for transition equation residuals.
39%       .Mx: Series mean
40%       .Wx: Series standard deviation
41%       .Z_0: Initial value of state
42%       .V_0: Initial value of covariance matrix
43%       .r: Number of common factors for each block
44%       .p: Number of lags in transition equation
45%
46% References:
47%
48%   Marta Banbura, Domenico Giannone and Lucrezia Reichlin
49%   Nowcasting (2010)
50%   Michael P. Clements and David F. Hendry, editors,
51%   Oxford Handbook on Economic Forecasting.
52
53%% Store model parameters ------------------------------------------------
54
55
56% DFM input specifications: See documentation for details
57Par.blocks = Spec.Blocks;                  % Block loading structure
58Par.nQ = sum(strcmp('q',Spec.Frequency));  % Number of quarterly series
59Par.p = Spec.p;                                 % Number of lags in autoregressive of factor (same for all factors)
60Par.r = ones(1,size(Spec.Blocks,2)) * Spec.k;       % Number of common factors for each block
61
62%Par.r(1) =2;
63% Display blocks
64try
65    fprintf('\n\n\n');
66    disp('Table 3: Block Loading Structure')
67    disp(array2table(Spec.Blocks,...
68         'RowNames', strrep(Spec.SeriesName,' ','_'),...
69         'VariableNames',Spec.BlockNames));
70    fprintf('\n\n\n');
71catch
72end
73
74fprintf('Estimating the dynamic factor model (DFM) ... \n\n');
75
76[T,N] = size(X);
77r = Par.r;
78p = Par.p;
79nQ = Par.nQ;
80blocks = Par.blocks;
81
82i_idio = logical([ones(N-nQ,1);zeros(nQ,1)]);
83
84%R*Lambda = q; Constraints on the loadings of the quartrly variables
85
86R_mat = [  2 -1  0  0  0;...
87           3  0 -1  0  0;...
88           2  0  0 -1  0;...
89           1  0  0  0 -1];
90
91q = zeros(4,1);
92
93if(nargin < 3)
94    threshold = 1e-5;  % EM loop threshold (default value)
95end
96
97if(nargin < 4)
98    max_iter = 5000;  % EM loop maximum number of iterations
99end
100
101%% Prepare data -----------------------------------------------------------
102
103Mx = mean(X,'omitnan');
104Wx = std(X,'omitnan');
105xNaN = (X-repmat(Mx,T,1))./repmat(Wx,T,1);  % Standardize series
106
107%% Initial Conditions -----------------------------------------------------
108
109optNaN.method = 2; % Remove leading and closing zeros
110optNaN.k = 3;      % Setting for filter(): See remNaN_spline
111
112[A, C, Q, R, Z_0, V_0] = InitCond(xNaN,r,p,blocks,optNaN,R_mat,q,nQ,i_idio);
113
114% Initialize EM loop values
115previous_loglik = -inf;
116num_iter = 0;
117LL = -inf;
118converged = 0;
119
120% y for the estimation is WITH missing data
121y = xNaN';
122
123%% EM LOOP ----------------------------------------------------------------
124
125%The model can be written as
126%y = C*Z + e;
127%Z = A*Z(-1) + v
128%where y is NxT, Z is (pr)xT, etc
129
130% Remove the leading and ending nans
131optNaN.method = 3;
132y_est = remNaNs_spline(xNaN,optNaN)';
133
134while (num_iter < max_iter) & ~converged % Loop until converges or max iter.
135
136    [C_new, R_new, A_new, Q_new, Z_0, V_0, loglik] = ...  % Applying EM algorithm
137        EMstep(y_est, A, C, Q, R, Z_0, V_0, r,p,R_mat,q,nQ,i_idio,blocks);
138
139    C = C_new;
140    R = R_new;
141    A = A_new;
142    Q = Q_new;
143
144    if num_iter > 2  % Checking convergence
145        [converged, decrease(num_iter + 1)] = ...
146            em_converged(loglik, previous_loglik, threshold, 1);
147    end
148
149    if (mod(num_iter,10) == 0) && (num_iter > 0)  % Print updates to command window
150        disp(['Now running the ',num2str(num_iter),...
151              'th iteration of max ', num2str(max_iter)]);
152        disp(['  Loglik','   (% Change)'])
153        disp([num2str(loglik),'   (', sprintf('%6.2f',100*((loglik-previous_loglik)/previous_loglik)) '%)'])
154    end
155
156
157    LL = [LL loglik];
158    previous_loglik = loglik;
159    num_iter =  num_iter + 1;
160
161end
162
163if(num_iter < max_iter)
164    disp(['Successful: Convergence at ', num2str(num_iter), ' iterations'])
165else
166   disp('Stopped because maximum iterations reached')
167end
168
169% Final run of the Kalman filter
170[Zsmooth, Vsmooth, VVsmooth, loglik] = runKF(y, A, C, Q, R, Z_0, V_0);
171Zsmooth = Zsmooth';
172
173x_sm = Zsmooth(2:end,:) * C';  % Get smoothed X
174
175
176%%  Loading the structure with the results --------------------------------
177Res.x_sm = x_sm;
178Res.X_sm = repmat(Wx,T,1) .* x_sm + repmat(Mx,T,1);  % Unstandardized, smoothed
179Res.Z = Zsmooth(2:end,:);
180Res.C = C;
181Res.R = R;
182Res.A = A;
183Res.Q = Q;
184Res.Mx = Mx;
185Res.Wx = Wx;
186Res.Z_0 = Z_0;
187Res.V_0 = V_0;
188Res.r = r;
189Res.p = p;
190Res.loglik = loglik;
191Res.Vsmooth = Vsmooth;
192Res.VVsmooth = VVsmooth;
193
194%% Display output
195% Table with names and factor loadings
196
197nQ       = Par.nQ;                      % Number of quarterly series
198nM       = size(Spec.SeriesID,1) - nQ;  % Number monthly series
199nLags    = max(Par.p, 5);               % 5 comes from monthly-quarterly aggregation
200nFactors = sum(Par.r);
201
202fprintf('\n\n\n');
203
204try
205disp('Table 4: Factor Loadings for Monthly Series');
206disp(array2table(Res.C(1:nM, 1:5:nFactors*5),...  % Only select lag(0) terms
207     'RowNames', strrep(Spec.SeriesName(1:nM),' ','_'), ...
208     'VariableNames', Spec.BlockNames));
209fprintf('\n\n\n');
210disp('Table 5: Quarterly Loadings Sample (Global Factor)')
211disp(array2table(Res.C(end-nQ+1:end, 1:5), ...  % Select only quarterly series
212     'RowNames', strrep(Spec.SeriesName(end-nQ+1:end),' ','_'), ...
213     'VariableNames', {'f1_lag0', 'f1_lag1', 'f1_lag2', 'f1_lag3', 'f1_lag4'}));
214fprintf('\n\n\n');
215catch
216end
217
218% Table with AR model on factors (factors with AR parameter and variance of residuals)
219
220A_terms = diag(Res.A);  % Transition equation terms
221Q_terms = diag(Res.Q);  % Covariance matrix terms
222
223try
224disp('Table 6: Autoregressive Coefficients on Factors')
225disp(table(A_terms(1:5:nFactors*5), ...  % Only select lag(0) terms
226           Q_terms(1:5:nFactors*5), ...
227           'VariableNames', {'AR_Coefficient', 'Variance_Residual'}, ...
228           'RowNames',      strrep(Spec.BlockNames,' ','_')));
229fprintf('\n\n\n');
230catch
231end
232
233% Table with AR model idiosyncratic errors (factors with AR parameter and variance of residuals)
234try
235disp('Table 7: Autoregressive Coefficients on Idiosyncratic Component')
236disp(table(A_terms([nFactors*5+1:nFactors*5+nM nFactors*5+nM+1:5:end]),...  % 21:50 give monthly, 51:5:61 give quarterly
237           Q_terms([nFactors*5+1:nFactors*5+nM nFactors*5+nM+1:5:end]), ...
238           'VariableNames', {'AR_Coefficient', 'Variance_Residual'}, ...
239           'RowNames', strrep(Spec.SeriesName,' ','_')));
240catch
241end
242
243end
244
245
246
247%% PROCEDURES -------------------------------------------------------------
248% Note: Kalman filter (runKF()) is in the 'functions' folder
249
250function  [C_new, R_new, A_new, Q_new, Z_0, V_0, loglik] = EMstep(y, A, C, Q, R, Z_0, V_0, r,p,R_mat,q,nQ,i_idio,blocks)
251%EMstep    Applies EM algorithm for parameter reestimation
252%
253%  Syntax:
254%    [C_new, R_new, A_new, Q_new, Z_0, V_0, loglik]
255%    = EMstep(y, A, C, Q, R, Z_0, V_0, r, p, R_mat, q, nQ, i_idio, blocks)
256%
257%  Description:
258%    EMstep reestimates parameters based on the Estimation Maximization (EM)
259%    algorithm. This is a two-step procedure:
260%    (1) E-step: the expectation of the log-likelihood is calculated using
261%        previous parameter estimates.
262%    (2) M-step: Parameters are re-estimated through the maximisation of
263%        the log-likelihood (maximize result from (1)).
264%
265%    See "Maximum likelihood estimation of factor models on data sets with
266%    arbitrary pattern of missing data" for details about parameter
267%    derivation (Banbura & Modugno, 2010). This procedure is in much the
268%    same spirit.
269%
270%  Input:
271%    y:      Series data
272%    A:      Transition matrix
273%    C:      Observation matrix
274%    Q:      Covariance for transition equation residuals
275%    R:      Covariance for observation matrix residuals
276%    Z_0:    Initial values of factors
277%    V_0:    Initial value of factor covariance matrix
278%    r:      Number of common factors for each block (e.g. vector [1 1 1 1])
279%    p:      Number of lags in transition equation
280%    R_mat:  Estimation structure for quarterly variables (i.e. "tent")
281%    q:      Constraints on loadings
282%    nQ:     Number of quarterly series
283%    i_idio: Indices for monthly variables
284%    blocks: Block structure for each series (i.e. for a series, the structure
285%            [1 0 0 1] indicates loadings on the first and fourth factors)
286%
287%  Output:
288%    C_new: Updated observation matrix
289%    R_new: Updated covariance matrix for residuals of observation matrix
290%    A_new: Updated transition matrix
291%    Q_new: Updated covariance matrix for residuals for transition matrix
292%    Z_0:   Initial value of state
293%    V_0:   Initial value of covariance matrix
294%    loglik: Log likelihood
295%
296% References:
297%   "Maximum likelihood estimation of factor models on data sets with
298%   arbitrary pattern of missing data" by Banbura & Modugno (2010).
299%   Abbreviated as BM2010
300%
301%
302
303%% Initialize preliminary values
304
305% Store series/model values
306[n, T] = size(y);
307nM = n - nQ;  % Number of monthly series
308pC = size(R_mat,2);
309ppC = max(p,pC);
310num_blocks = size(blocks,2);  % Number of blocks
311
312%% ESTIMATION STEP: Compute the (expected) sufficient statistics for a single
313%Kalman filter sequence
314
315% Running the Kalman filter and smoother with current parameters
316% Note that log-liklihood is NOT re-estimated after the runKF step: This
317% effectively gives the previous iteration's log-likelihood
318% For more information on output, see runKF
319[Zsmooth, Vsmooth, VVsmooth, loglik] = runKF(y, A, C, Q, R, Z_0, V_0);
320
321
322%% MAXIMIZATION STEP (TRANSITION EQUATION)
323% See (Banbura & Modugno, 2010) for details.
324
325% Initialize output
326A_new = A;
327Q_new = Q;
328V_0_new = V_0;
329
330%%% 2A. UPDATE FACTOR PARAMETERS INDIVIDUALLY ----------------------------
331
332for i = 1:num_blocks  % Loop for each block: factors are uncorrelated
333
334    % SETUP INDEXING
335    r_i = r(i);  % r_i = 1 if block is loaded
336    rp = r_i*p;
337    rp1 = sum(r(1:i-1))*ppC;
338    b_subset = rp1+1:rp1+rp;  % Subset blocks: Helps for subsetting Zsmooth, Vsmooth
339    t_start = rp1+1;          % Transition matrix factor idx start
340    t_end = rp1+r_i*ppC;      % Transition matrix factor idx end
341
342
343
344    % ESTIMATE FACTOR PORTION OF Q, A
345    % Note: EZZ, EZZ_BB, EZZ_FB are parts of equations 6 and 8 in BM 2010
346
347    % E[f_t*f_t' | Omega_T]
348    EZZ = Zsmooth(b_subset, 2:end) * Zsmooth(b_subset, 2:end)'...
349        +sum(Vsmooth(b_subset, b_subset, 2:end) ,3);
350
351    % E[f_{t-1}*f_{t-1}' | Omega_T]
352    EZZ_BB = Zsmooth(b_subset, 1:end-1)*Zsmooth(b_subset, 1:end-1)'...
353            +sum(Vsmooth(b_subset, b_subset, 1:end-1), 3);
354
355    % E[f_t*f_{t-1}' | Omega_T]
356    EZZ_FB = Zsmooth(b_subset, 2:end)*Zsmooth(b_subset, 1:end-1)'...
357        +sum(VVsmooth(b_subset, b_subset, :), 3);
358
359    % Select transition matrix/covariance matrix for block i
360    A_i = A(t_start:t_end, t_start:t_end);
361    Q_i = Q(t_start:t_end, t_start:t_end);
362
363    % Equation 6: Estimate VAR(p) for factor
364    A_i(1:r_i,1:rp) = EZZ_FB(1:r_i,1:rp) * inv(EZZ_BB(1:rp,1:rp));
365
366    % Equation 8: Covariance matrix of residuals of VAR
367    Q_i(1:r_i,1:r_i) = (EZZ(1:r_i,1:r_i) - A_i(1:r_i,1:rp)* EZZ_FB(1:r_i,1:rp)') / T;
368
369    % Place updated results in output matrix
370    A_new(t_start:t_end, t_start:t_end) = A_i;
371    Q_new(t_start:t_end, t_start:t_end) = Q_i;
372    V_0_new(t_start:t_end, t_start:t_end) =...
373        Vsmooth(t_start:t_end, t_start:t_end,1);
374end
375
376%%% 2B. UPDATING PARAMETERS FOR IDIOSYNCRATIC COMPONENT ------------------
377
378rp1 = sum(r)*ppC;           % Col size of factor portion
379niM = sum(i_idio(1:nM));    % Number of monthly values
380t_start = rp1+1;            % Start of idiosyncratic component index
381i_subset = t_start:rp1+niM; % Gives indices for monthly idiosyncratic component values
382
383
384% Below 3 estimate the idiosyncratic component (for eqns 6, 8 BM 2010)
385
386% E[f_t*f_t' | \Omega_T]
387EZZ = diag(diag(Zsmooth(t_start:end, 2:end) * Zsmooth(t_start:end, 2:end)'))...
388    + diag(diag(sum(Vsmooth(t_start:end, t_start:end, 2:end), 3)));
389
390% E[f_{t-1}*f_{t-1}' | \Omega_T]
391EZZ_BB = diag(diag(Zsmooth(t_start:end, 1:end-1)* Zsmooth(t_start:end, 1:end-1)'))...
392       + diag(diag(sum(Vsmooth(t_start:end, t_start:end, 1:end-1), 3)));
393
394% E[f_t*f_{t-1}' | \Omega_T]
395EZZ_FB = diag(diag(Zsmooth(t_start:end, 2:end)*Zsmooth(t_start:end, 1:end-1)'))...
396       + diag(diag(sum(VVsmooth(t_start:end, t_start:end, :), 3)));
397
398A_i = EZZ_FB * diag(1./diag((EZZ_BB)));  % Equation 6
399Q_i = (EZZ - A_i*EZZ_FB') / T;           % Equation 8
400
401% Place updated results in output matrix
402A_new(i_subset, i_subset) = A_i(1:niM,1:niM);
403Q_new(i_subset, i_subset) = Q_i(1:niM,1:niM);
404V_0_new(i_subset, i_subset) = diag(diag(Vsmooth(i_subset, i_subset, 1)));
405
406
407%% 3 MAXIMIZATION STEP (observation equation)
408
409%%% INITIALIZATION AND SETUP ----------------------------------------------
410Z_0 = Zsmooth(:,1); %zeros(size(Zsmooth,1),1); %
411V_0 = Vsmooth(:,:,1);
412
413% Set missing data series values to 0
414nanY = isnan(y);
415y(nanY) = 0;
416
417% LOADINGS
418C_new = C;
419
420% Blocks
421bl = unique(blocks,'rows');  % Gives unique loadings
422n_bl = size(bl,1);           % Number of unique loadings
423
424% Initialize indices: These later help with subsetting
425bl_idxM = [];  % Indicator for monthly factor loadings
426bl_idxQ = [];  % Indicator for quarterly factor loadings
427R_con = [];    % Block diagonal matrix giving monthly-quarterly aggreg scheme
428q_con = [];
429
430% Loop through each block
431for i = 1:num_blocks
432    bl_idxQ = [bl_idxQ repmat(bl(:,i),1,r(i)*ppC)];
433    bl_idxM = [bl_idxM repmat(bl(:,i),1,r(i)) zeros(n_bl,r(i)*(ppC-1))];
434    R_con = blkdiag(R_con, kron(R_mat,eye(r(i))));
435    q_con = [q_con;zeros(r(i)*size(R_mat,1),1)];
436end
437
438% Indicator for monthly/quarterly blocks in observation matrix
439bl_idxM = logical(bl_idxM);
440bl_idxQ = logical(bl_idxQ);
441
442i_idio_M = i_idio(1:nM);            % Gives 1 for monthly series
443n_idio_M = length(find(i_idio_M));  % Number of monthly series
444c_i_idio = cumsum(i_idio);          % Cumulative number of monthly series
445
446for i = 1:n_bl  % Loop through unique loadings (e.g. [1 0 0 0], [1 1 0 0])
447
448    bl_i = bl(i,:);
449    rs = sum(r(logical(bl_i)));                    % Total num of blocks loaded
450    idx_i = find(ismember(blocks, bl_i, 'rows'));  % Indices for bl_i
451    idx_iM = idx_i(idx_i<nM+1);                    % Only monthly
452    n_i = length(idx_iM);                          % Number of monthly series
453
454    % Initialize sums in equation 13 of BGR 2010
455    denom = zeros(n_i*rs,n_i*rs);
456    nom = zeros(n_i,rs);
457
458    % Stores monthly indicies. These are done for input robustness
459    i_idio_i = i_idio_M(idx_iM);
460    i_idio_ii = c_i_idio(idx_iM);
461    i_idio_ii = i_idio_ii(i_idio_i);
462
463    %%% UPDATE MONTHLY VARIABLES: Loop through each period ----------------
464    for t = 1:T
465        Wt = diag(~nanY(idx_iM, t));  % Gives selection matrix (1 for nonmissing values)
466
467        denom = denom +...  % E[f_t*t_t' | Omega_T]
468                kron(Zsmooth(bl_idxM(i, :), t+1) * Zsmooth(bl_idxM(i, :), t+1)' + ...
469                Vsmooth(bl_idxM(i, :), bl_idxM(i, :), t+1), Wt);
470
471        nom = nom + ...  E[y_t*f_t' | \Omega_T]
472              y(idx_iM, t) * Zsmooth(bl_idxM(i, :), t+1)' - ...
473              Wt(:, i_idio_i) * (Zsmooth(rp1 + i_idio_ii, t+1) * ...
474              Zsmooth(bl_idxM(i, :), t+1)' + ...
475              Vsmooth(rp1 + i_idio_ii, bl_idxM(i, :), t+1));
476    end
477
478    vec_C = inv(denom)*nom(:);  % Eqn 13 BGR 2010
479
480    % Place updated monthly results in output matrix
481    C_new(idx_iM,bl_idxM(i,:)) = reshape(vec_C, n_i, rs);
482
483   %%% UPDATE QUARTERLY VARIABLES -----------------------------------------
484
485   idx_iQ = idx_i(idx_i > nM);  % Index for quarterly series
486   rps = rs * ppC;
487
488   % Monthly-quarterly aggregation scheme
489   R_con_i = R_con(:,bl_idxQ(i,:));
490   q_con_i = q_con;
491
492   no_c = ~(any(R_con_i,2));
493   R_con_i(no_c,:) = [];
494   q_con_i(no_c,:) = [];
495
496   % Loop through quarterly series in loading. This parallels monthly code
497   for j = idx_iQ'
498       % Initialization
499       denom = zeros(rps,rps);
500       nom = zeros(1,rps);
501       nom2 = zeros(1,rps);
502
503       idx_jQ = j-nM;  % Ordinal position of quarterly variable
504       % Loc of factor structure corresponding to quarterly var residuals
505       i_idio_jQ = (rp1 + n_idio_M + 5*(idx_jQ-1)+1:rp1+ n_idio_M + 5*idx_jQ);
506
507       % Place quarterly values in output matrix
508       V_0_new(i_idio_jQ, i_idio_jQ) = Vsmooth(i_idio_jQ, i_idio_jQ,1);
509       A_new(i_idio_jQ(1), i_idio_jQ(1)) = A_i(i_idio_jQ(1)-rp1, i_idio_jQ(1)-rp1);
510       Q_new(i_idio_jQ(1), i_idio_jQ(1)) = Q_i(i_idio_jQ(1)-rp1, i_idio_jQ(1)-rp1);
511
512       for t=1:T
513           Wt = diag(~nanY(j,t));  % Selection matrix for quarterly values
514
515           % Intermediate steps in BGR equation 13
516           denom = denom + ...
517                   kron(Zsmooth(bl_idxQ(i,:), t+1) * Zsmooth(bl_idxQ(i,:), t+1)'...
518                 + Vsmooth(bl_idxQ(i,:), bl_idxQ(i,:), t+1), Wt);
519           nom = nom + y(j, t)*Zsmooth(bl_idxQ(i,:), t+1)';
520           nom = nom -...
521                Wt * ([1 2 3 2 1] * Zsmooth(i_idio_jQ,t+1) * ...
522                Zsmooth(bl_idxQ(i,:),t+1)'+...
523                [1 2 3 2 1]*Vsmooth(i_idio_jQ,bl_idxQ(i,:),t+1));
524           nom2 = nom2 + Wt * ([1 2 3 2 1] * Zsmooth(i_idio_jQ,t+1) * ...
525                Zsmooth(bl_idxQ(i,:),t+1)'+...
526                [1 2 3 2 1] * Vsmooth(i_idio_jQ,bl_idxQ(i,:),t+1));
527       end
528
529        C_i = inv(denom) * nom';
530        C_i_constr = C_i - ...  % BGR equation 13
531                     inv(denom) * R_con_i'*inv(R_con_i*inv(denom)*R_con_i') * (R_con_i*C_i-q_con_i);
532
533        % Place updated values in output structure
534        C_new(j,bl_idxQ(i,:)) = C_i_constr;
535   end
536end
537
538%%% 3B. UPDATE COVARIANCE OF RESIDUALS FOR OBSERVATION EQUATION -----------
539% Initialize covariance of residuals of observation equation
540R_new = zeros(n,n);
541for t=1:T
542    Wt = diag(~nanY(:,t));  % Selection matrix
543    R_new = R_new + (y(:,t) - ...  % BGR equation 15
544            Wt * C_new * Zsmooth(:, t+1)) * (y(:,t) - Wt*C_new*Zsmooth(:,t+1))'...
545           + Wt*C_new*Vsmooth(:,:,t+1)*C_new'*Wt + (eye(n)-Wt)*R*(eye(n)-Wt);
546end
547
548
549R_new = R_new/T;
550RR = diag(R_new); %RR(RR<1e-2) = 1e-2;
551RR(i_idio_M) = 1e-04;  % Ensure non-zero measurement error. See Doz, Giannone, Reichlin (2012) for reference.
552RR(nM+1:end) = 1e-04;
553R_new = diag(RR);
554
555end
556
557
558
559%--------------------------------------------------------------------------
560
561function [converged, decrease] = em_converged(loglik, previous_loglik, threshold, check_decreased)
562%em_converged    checks whether EM algorithm has converged
563%
564%  Syntax:
565%    [converged, decrease] = em_converged(loglik, previous_loglik, threshold, check_increased)
566%
567%  Description:
568%    em_converged() checks whether EM has converged. Convergence occurs if
569%    the slope of the log-likelihood function falls below 'threshold'(i.e.
570%    f(t) - f(t-1)| / avg < threshold) where avg = (|f(t)| + |f(t-1)|)/2
571%    and f(t) is log lik at iteration t. 'threshold' defaults to 1e-4.
572%
573%    This stopping criterion is from Numerical Recipes in C (pg. 423).
574%    With MAP estimation (using priors), the likelihood can decrease
575%    even if the mode of the posterior increases.
576%
577%  Input arguments:
578%    loglik: Log-likelihood from current EM iteration
579%    previous_loglik: Log-likelihood from previous EM iteration
580%    threshold: Convergence threshhold. The default is 1e-4.
581%    check_decreased: Returns text output if log-likelihood decreases.
582%
583%  Output:
584%    converged (numeric): Returns 1 if convergence criteria satisfied, and 0 otherwise.
585%    decrease (numeric): Returns 1 if loglikelihood decreased.
586
587%% Instantiate variables
588
589% Threshhold arguments: Checks default behavior
590if nargin < 3, threshold = 1e-4; end
591if nargin < 4, check_decreased = 1; end
592
593% Initialize output
594converged = 0;
595decrease = 0;
596
597%% Check if log-likelihood decreases (optional)
598
599if check_decreased
600    if loglik - previous_loglik < -1e-3 % allow for a little imprecision
601        fprintf(1, '******likelihood decreased from %6.4f to %6.4f!\n', previous_loglik, loglik);
602        decrease = 1;
603    end
604end
605
606%% Check convergence criteria
607
608delta_loglik = abs(loglik - previous_loglik);  % Difference in loglik
609avg_loglik = (abs(loglik) + abs(previous_loglik) + eps)/2;
610
611if (delta_loglik / avg_loglik) < threshold,
612    converged = 1;  % Check convergence
613end
614
615end
616
617%--------------------------------------------------------------------------
618
619%InitCond()      Calculates initial conditions for parameter estimation
620%
621%  Description:
622%    Given standardized data and model information, InitCond() creates
623%    initial parameter estimates. These are intial inputs in the EM
624%    algorithm, which re-estimates these parameters using Kalman filtering
625%    techniques.
626%
627%Inputs:
628%  - x:      Standardized data
629%  - r:      Number of common factors for each block
630%  - p:      Number of lags in transition equation
631%  - blocks: Gives series loadings
632%  - optNaN: Option for missing values in spline. See remNaNs_spline() for details.
633%  - Rcon:   Incorporates estimation for quarterly series (i.e. "tent structure")
634%  - q:      Constraints on loadings for quarterly variables
635%  - NQ:     Number of quarterly variables
636%  - i_idio: Logical. Gives index for monthly variables (1) and quarterly (0)
637%
638%Output:
639%  - A:   Transition matrix
640%  - C:   Observation matrix
641%  - Q:   Covariance for transition equation residuals
642%  - R:   Covariance for observation equation residuals
643%  - Z_0: Initial value of state
644%  - V_0: Initial value of covariance matrix
645
646function [ A, C, Q, R, Z_0, V_0] = InitCond(x,r,p,blocks,optNaN,Rcon,q,nQ,i_idio)
647
648pC = size(Rcon,2);  % Gives 'tent' structure size (quarterly to monthly)
649ppC = max(p,pC);
650n_b = size(blocks,2);  % Number of blocks
651
652OPTS.disp=0;  % Turns off diagnostic information for eigenvalue computation
653[xBal,indNaN] = remNaNs_spline(x,optNaN);  % Spline without NaNs
654
655[T,N] = size(xBal);  % Time T series number N
656nM = N-nQ;           % Number of monthly series
657
658xNaN = xBal;
659xNaN(indNaN) = nan;  % Set missing values equal to NaNs
660res = xBal;          % Spline output equal to res: Later this is used for residuals
661resNaN = xNaN;       % Later used for residuals
662
663% Initialize model coefficient output
664C = [];
665A = [];
666Q = [];
667V_0 = [];
668
669% Set the first observations as NaNs: For quarterly-monthly aggreg. scheme
670indNaN(1:pC-1,:) = true;
671
672for i = 1:n_b  % Loop for each block
673
674    r_i = r(i);  % r_i = 1 when block is loaded
675
676    %% Observation equation -----------------------------------------------
677
678    C_i = zeros(N,r_i*ppC);     % Initialize state variable matrix helper
679    idx_i = find(blocks(:,i));  % Returns series index loading block i
680    idx_iM = idx_i(idx_i<nM+1); % Monthly series indicies for loaded blocks
681    idx_iQ = idx_i(idx_i>nM);   % Quarterly series indicies for loaded blocks
682
683
684
685    % Returns eigenvector v w/largest eigenvalue d
686    [v, d] = eigs(cov(res(:,idx_iM)), r_i, 'lm');
687
688    % Flip sign for cleaner output. Gives equivalent results without this section
689    if(sum(v) < 0)
690        v = -v;
691    end
692
693    % For monthly series with loaded blocks (rows), replace with eigenvector
694    % This gives the loading
695    C_i(idx_iM,1:r_i) = v;
696    f = res(:,idx_iM)*v;  % Data projection for eigenvector direction
697    F = [];
698
699    % Lag matrix using loading. This is later used for quarterly series
700    for kk = 0:max(p+1,pC)-1
701        F = [F f(pC-kk:end-kk,:)];
702    end
703
704    Rcon_i = kron(Rcon,eye(r_i));  % Quarterly-monthly aggregation scheme
705    q_i = kron(q,zeros(r_i,1));
706
707    % Produces projected data with lag structure (so pC-1 fewer entries)
708    ff = F(:, 1:r_i*pC);
709
710    for j = idx_iQ'      % Loop for quarterly variables
711
712        % For series j, values are dropped to accommodate lag structure
713        xx_j = resNaN(pC:end,j);
714
715        if sum(~isnan(xx_j)) < size(ff,2)+2
716            xx_j = res(pC:end,j);  % Replaces xx_j with spline if too many NaNs
717
718        end
719
720        ff_j = ff(~isnan(xx_j),:);
721        xx_j = xx_j(~isnan(xx_j));
722
723        iff_j = inv(ff_j'*ff_j);
724        Cc = iff_j*ff_j'*xx_j;  % Least squares
725
726        % Spline data monthly to quarterly conversion
727        Cc = Cc - iff_j*Rcon_i'*inv(Rcon_i*iff_j*Rcon_i')*(Rcon_i*Cc-q_i);
728
729        C_i(j,1:pC*r_i)=Cc';  % Place in output matrix
730    end
731
732    ff = [zeros(pC-1,pC*r_i);ff];  % Zeros in first pC-1 entries (replace dropped from lag)
733
734    % Residual calculations
735    res = res - ff*C_i';
736    resNaN = res;
737    resNaN(indNaN) = nan;
738
739    C = [C C_i];  % Combine past loadings together
740
741
742    %% Transition equation ------------------------------------------------
743
744    z = F(:,1:r_i);            % Projected data (no lag)
745    Z = F(:,r_i+1:r_i*(p+1));  % Data with lag 1
746
747    A_i = zeros(r_i*ppC,r_i*ppC)';  % Initialize transition matrix
748
749    A_temp = inv(Z'*Z)*Z'*z;  % OLS: gives coefficient value AR(p) process
750    A_i(1:r_i,1:r_i*p) = A_temp';
751    A_i(r_i+1:end,1:r_i*(ppC-1)) = eye(r_i*(ppC-1));
752
753    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
754    Q_i = zeros(ppC*r_i,ppC*r_i);
755    e = z  - Z*A_temp;         % VAR residuals
756    Q_i(1:r_i,1:r_i) = cov(e); % VAR covariance matrix
757
758    initV_i = reshape(inv(eye((r_i*ppC)^2)-kron(A_i,A_i))*Q_i(:),r_i*ppC,r_i*ppC);
759
760    % Gives top left block for the transition matrix
761    A = blkdiag(A,A_i);
762    Q = blkdiag(Q,Q_i);
763    V_0 = blkdiag(V_0,initV_i);
764end
765
766eyeN = eye(N);  % Used inside observation matrix
767eyeN(:,~i_idio) = [];
768
769C=[C eyeN];
770C = [C [zeros(nM,5*nQ); kron(eye(nQ),[1 2 3 2 1])]];  % Monthly-quarterly agreggation scheme
771R = diag(var(resNaN,'omitnan'));  % Initialize covariance matrix for transition matrix
772
773
774ii_idio = find(i_idio);    % Indicies for monthly variables
775n_idio = length(ii_idio);  % Number of monthly variables
776BM = zeros(n_idio);        % Initialize monthly transition matrix values
777SM = zeros(n_idio);        % Initialize monthly residual covariance matrix values
778
779
780for i = 1:n_idio;  % Loop for monthly variables
781    % Set observation equation residual covariance matrix diagonal
782    R(ii_idio(i),ii_idio(i)) = 1e-04;
783
784    % Subsetting series residuals for series i
785    res_i = resNaN(:,ii_idio(i));
786
787    % Returns number of leading/ending zeros
788    leadZero = max( find( (1:T)' == cumsum(isnan(res_i)) ) );
789    endZero  = max( find( (1:T)' == cumsum(isnan(res_i(end:-1:1))) ) );
790
791    % Truncate leading and ending zeros
792    res_i = res(:,ii_idio(i));
793    res_i(end-endZero + 1:end) = [];
794    res_i(1:leadZero) = [];
795
796    % Linear regression: AR 1 process for monthly series residuals
797    BM(i,i) = inv(res_i(1:end-1)'*res_i(1:end-1))*res_i(1:end-1)'*res_i(2:end,:);
798    SM(i,i) = cov(res_i(2:end)-res_i(1:end-1)*BM(i,i));  % Residual covariance matrix
799
800end
801
802Rdiag = diag(R);
803sig_e = Rdiag(nM+1:N)/19;
804Rdiag(nM+1:N) = 1e-04;
805R = diag(Rdiag);  % Covariance for obs matrix residuals
806
807% For BQ, SQ
808rho0 = 0.1;
809temp = zeros(5);
810temp(1,1) = 1;
811
812% Blocks for covariance matrices
813SQ = kron(diag((1-rho0^2)*sig_e),temp);
814BQ = kron(eye(nQ),[[rho0 zeros(1,4)];[eye(4),zeros(4,1)]]);
815initViQ = reshape(inv(eye((5*nQ)^2)-kron(BQ,BQ))*SQ(:),5*nQ,5*nQ);
816initViM = diag(1./diag(eye(size(BM,1))-BM.^2)).*SM;
817
818% Output
819A = blkdiag(A, BM, BQ);                % Observation matrix
820Q = blkdiag(Q, SM, SQ);                % Residual covariance matrix (transition)
821Z_0 = zeros(size(A,1),1);              % States
822V_0 = blkdiag(V_0, initViM, initViQ);  % Covariance of states
823
824end
825
826
827
828function [zsmooth, Vsmooth, VVsmooth, loglik] = runKF(Y, A, C, Q, R, Z_0, V_0);
829%runKF()    Applies Kalman filter and fixed-interval smoother
830%
831%  Syntax:
832%    [zsmooth, Vsmooth, VVsmooth, loglik] = runKF(Y, A, C, Q, R, Z_0, V_0)
833%
834%  Description:
835%    runKF() applies a Kalman filter and fixed-interval smoother. The
836%    script uses the following model:
837%           Y_t = C_t Z_t + e_t for e_t ~ N(0, R)
838%           Z_t = A Z_{t-1} + mu_t for mu_t ~ N(0, Q)
839
840%  Throughout this file:
841%    'm' denotes the number of elements in the state vector Z_t.
842%    'k' denotes the number of elements (observed variables) in Y_t.
843%    'nobs' denotes the number of time periods for which data are observed.
844%
845%  Input parameters:
846%    Y: k-by-nobs matrix of input data
847%    A: m-by-m transition matrix
848%    C: k-by-m observation matrix
849%    Q: m-by-m covariance matrix for transition equation residuals (mu_t)
850%    R: k-by-k covariance for observation matrix residuals (e_t)
851%    Z_0: 1-by-m vector, initial value of state
852%    V_0: m-by-m matrix, initial value of state covariance matrix
853%
854%  Output parameters:
855%    zsmooth: k-by-(nobs+1) matrix, smoothed factor estimates
856%             (i.e. zsmooth(:,t+1) = Z_t|T)
857%    Vsmooth: k-by-k-by-(nobs+1) array, smoothed factor covariance matrices
858%             (i.e. Vsmooth(:,:,t+1) = Cov(Z_t|T))
859%    VVsmooth: k-by-k-by-nobs array, lag 1 factor covariance matrices
860%              (i.e. Cov(Z_t,Z_t-1|T))
861%    loglik: scalar, log-likelihood
862%
863%  References:
864%  - QuantEcon's "A First Look at the Kalman Filter"
865%  - Adapted from replication files for:
866%    "Nowcasting", 2010, (by Marta Banbura, Domenico Giannone and Lucrezia
867%    Reichlin), in Michael P. Clements and David F. Hendry, editors, Oxford
868%    Handbook on Economic Forecasting.
869%
870% The software can be freely used in applications.
871% Users are kindly requested to add acknowledgements to published work and
872% to cite the above reference in any resulting publications
873
874S = SKF(Y, A, C, Q, R, Z_0, V_0);  % Kalman filter
875S = FIS(A, S);                     % Fixed interval smoother
876
877% Organize output
878zsmooth = S.ZmT;
879Vsmooth = S.VmT;
880VVsmooth = S.VmT_1;
881loglik = S.loglik;
882
883end
884
885%______________________________________________________________________
886function S = SKF(Y, A, C, Q, R, Z_0, V_0)
887%SKF    Applies Kalman filter
888%
889%  Syntax:
890%    S = SKF(Y, A, C, Q, R, Z_0, V_0)
891%
892%  Description:
893%    SKF() applies the Kalman filter
894
895%  Input parameters:
896%    Y: k-by-nobs matrix of input data
897%    A: m-by-m transition matrix
898%    C: k-by-m observation matrix
899%    Q: m-by-m covariance matrix for transition equation residuals (mu_t)
900%    R: k-by-k covariance for observation matrix residuals (e_t)
901%    Z_0: 1-by-m vector, initial value of state
902%    V_0: m-by-m matrix, initial value of state covariance matrix
903%
904%  Output parameters:
905%    S.Zm: m-by-nobs matrix, prior/predicted factor state vector
906%          (S.Zm(:,t) = Z_t|t-1)
907%    S.ZmU: m-by-(nobs+1) matrix, posterior/updated state vector
908%           (S.Zm(t+1) = Z_t|t)
909%    S.Vm: m-by-m-by-nobs array, prior/predicted covariance of factor
910%          state vector (S.Vm(:,:,t) = V_t|t-1)
911%    S.VmU: m-by-m-by-(nobs+1) array, posterior/updated covariance of
912%           factor state vector (S.VmU(:,:,t+1) = V_t|t)
913%    S.loglik: scalar, value of likelihood function
914%    S.k_t: k-by-m Kalman gain
915
916%% INITIALIZE OUTPUT VALUES ---------------------------------------------
917  % Output structure & dimensions of state space matrix
918  [~, m] = size(C);
919
920  % Outputs time for data matrix. "number of observations"
921  nobs  = size(Y,2);
922
923  % Instantiate output
924  S.Zm  = nan(m, nobs);       % Z_t | t-1 (prior)
925  S.Vm  = nan(m, m, nobs);    % V_t | t-1 (prior)
926  S.ZmU = nan(m, nobs+1);     % Z_t | t (posterior/updated)
927  S.VmU = nan(m, m, nobs+1);  % V_t | t (posterior/updated)
928  S.loglik = 0;
929
930%% SET INITIAL VALUES ----------------------------------------------------
931  Zu = Z_0;  % Z_0|0 (In below loop, Zu gives Z_t | t)
932  Vu = V_0;  % V_0|0 (In below loop, Vu guvse V_t | t)
933
934  % Store initial values
935  S.ZmU(:,1)    = Zu;
936  S.VmU(:,:,1)  = Vu;
937
938%% KALMAN FILTER PROCEDURE ----------------------------------------------
939  for t = 1:nobs
940      %%% CALCULATING PRIOR DISTIBUTION----------------------------------
941
942      % Use transition eqn to create prior estimate for factor
943      % i.e. Z = Z_t|t-1
944      Z   = A * Zu;
945
946      % Prior covariance matrix of Z (i.e. V = V_t|t-1)
947      %   Var(Z) = Var(A*Z + u_t) = Var(A*Z) + Var(\epsilon) =
948      %   A*Vu*A' + Q
949      V   = A * Vu* A' + Q;
950      V   =  0.5 * (V+V');  % Trick to make symmetric
951
952      %%% CALCULATING POSTERIOR DISTRIBUTION ----------------------------
953
954      % Removes missing series: These are removed from Y, C, and R
955      [Y_t, C_t, R_t, ~] = MissData(Y(:,t), C, R);
956
957      % Check if y_t contains no data. If so, replace Zu and Vu with prior.
958      if isempty(Y_t)
959          Zu = Z;
960          Vu = V;
961      else
962          % Steps for variance and population regression coefficients:
963          % Var(c_t*Z_t + e_t) = c_t Var(A) c_t' + Var(u) = c_t*V *c_t' + R
964          VC  = V * C_t';
965          iF  = inv(C_t * VC + R_t);
966
967          % Matrix of population regression coefficients (QuantEcon eqn #4)
968          VCF = VC*iF;
969
970          % Gives difference between actual and predicted observation
971          % matrix values
972          innov  = Y_t - C_t*Z;
973
974          % Update estimate of factor values (posterior)
975          Zu  = Z  + VCF * innov;
976
977          % Update covariance matrix (posterior) for time t
978          Vu  = V  - VCF * VC';
979          Vu   =  0.5 * (Vu+Vu'); % Approximation trick to make symmetric
980
981          % Update log likelihood
982          % t
983          % llf = -0.5 * size(Y_t, 1) * log(2 * pi) - 0.5*(log(det(iF))  - innov'*iF*innov)
984          % fprintf('----------')
985          S.loglik = S.loglik + 0.5*(log(det(iF))  - innov'*iF*innov);
986      end
987
988      %%% STORE OUTPUT----------------------------------------------------
989
990      % Store covariance and observation values for t-1 (priors)
991      S.Zm(:,t)   = Z;
992      S.Vm(:,:,t) = V;
993
994      % Store covariance and state values for t (posteriors)
995      % i.e. Zu = Z_t|t   & Vu = V_t|t
996      S.ZmU(:,t+1)    = Zu;
997      S.VmU(:,:,t+1)  = Vu;
998  end
999
1000  % Store Kalman gain k_t
1001  if isempty(Y_t)
1002      S.k_t = zeros(m,m);
1003  else
1004      S.k_t = VCF * C_t;
1005  end
1006
1007end
1008
1009
1010%______________________________________________________________________
1011function S = FIS(A, S)
1012%FIS()    Applies fixed-interval smoother
1013%
1014%  Syntax:
1015%    S = FIS(A, S)
1016%
1017%  Description:
1018%    SKF() applies a fixed-interval smoother, and is used in conjunction
1019%    with SKF(). See  page 154 of 'Forecasting, structural time series models
1020%    and the Kalman filter' for more details (Harvey, 1990).
1021%
1022%  Input parameters:
1023%    A: m-by-m transition matrix
1024%    S: structure returned by SKF()
1025%
1026%  Output parameters:
1027%    S: FIS() adds the following smoothed estimates to the S structure:
1028%    - S.ZmT: m-by-(nobs+1) matrix, smoothed states
1029%             (S.ZmT(:,t+1) = Z_t|T)
1030%    - S.VmT: m-by-m-by-(nobs+1) array, smoothed factor covariance
1031%             matrices (S.VmT(:,:,t+1) = V_t|T = Cov(Z_t|T))
1032%    - S.VmT_1: m-by-m-by-nobs array, smoothed lag 1 factor covariance
1033%               matrices (S.VmT_1(:,:,t) = Cov(Z_t Z_t-1|T))
1034%
1035%  Model:
1036%   Y_t = C_t Z_t + e_t for e_t ~ N(0, R)
1037%   Z_t = A Z_{t-1} + mu_t for mu_t ~ N(0, Q)
1038
1039%% ORGANIZE INPUT ---------------------------------------------------------
1040
1041% Initialize output matrices
1042  [m, nobs] = size(S.Zm);
1043  S.ZmT = zeros(m,nobs+1);
1044  S.VmT = zeros(m,m,nobs+1);
1045
1046  % Fill the final period of ZmT, VmT with SKF() posterior values
1047  S.ZmT(:,nobs+1)   = squeeze(S.ZmU(:, nobs+1));
1048  S.VmT(:,:,nobs+1) = squeeze(S.VmU(:,:, nobs+1));
1049
1050  % Initialize VmT_1 lag 1 covariance matrix for final period
1051  S.VmT_1(:,:,nobs) = (eye(m)-S.k_t) *A*squeeze(S.VmU(:,:,nobs));
1052
1053  % Used for recursion process. See companion file for details
1054  J_2 = squeeze(S.VmU(:,:,nobs)) * A' * pinv(squeeze(S.Vm(:,:,nobs)));
1055
1056  %% RUN SMOOTHING ALGORITHM ----------------------------------------------
1057
1058  % Loop through time reverse-chronologically (starting at final period nobs)
1059    for t = nobs:-1:1
1060
1061        % Store posterior and prior factor covariance values
1062        VmU = squeeze(S.VmU(:,:,t));
1063        Vm1 = squeeze(S.Vm(:,:,t));
1064
1065        % Store previous period smoothed factor covariance and lag-1 covariance
1066        V_T = squeeze(S.VmT(:,:,t+1));
1067        V_T1 = squeeze(S.VmT_1(:,:,t));
1068
1069        J_1 = J_2;
1070
1071        % Update smoothed factor estimate
1072        S.ZmT(:,t) = S.ZmU(:,t) + J_1 * (S.ZmT(:,t+1) - A * S.ZmU(:,t)) ;
1073
1074        % Update smoothed factor covariance matrix
1075        S.VmT(:,:,t) = VmU + J_1 * (V_T - Vm1) * J_1';
1076
1077        if t>1
1078            % Update weight
1079            J_2 = squeeze(S.VmU(:, :, t-1)) * A' * pinv(squeeze(S.Vm(:,:,t-1)));
1080
1081            % Update lag 1 factor covariance matrix
1082            S.VmT_1(:,:,t-1) = VmU * J_2'+J_1 * (V_T1 - A * VmU) * J_2';
1083        end
1084    end
1085
1086end
1087
1088
1089function [y,C,R,L]  = MissData(y,C,R)
1090% Syntax:
1091% Description:
1092%   Eliminates the rows in y & matrices C, R that correspond to missing
1093%   data (NaN) in y
1094%
1095% Input:
1096%   y: Vector of observations at time t
1097%   C: Observation matrix
1098%   R: Covariance for observation matrix residuals
1099%
1100% Output:
1101%   y: Vector of observations at time t (reduced)
1102%   C: Observation matrix (reduced)
1103%   R: Covariance for observation matrix residuals
1104%   L: Used to restore standard dimensions(n x #) where # is the nr of
1105%      available data in y
1106
1107  % Returns 1 for nonmissing series
1108  ix = ~isnan(y);
1109
1110  % Index for columns with nonmissing variables
1111  e  = eye(size(y,1));
1112  L  = e(:,ix);
1113
1114  % Removes missing series
1115  y  = y(ix);
1116
1117  % Removes missing series from observation matrix
1118  C  =  C(ix,:);
1119
1120  % Removes missing series from transition matrix
1121  R  =  R(ix,ix);
1122
1123end
1124
1125