1%%%  Replication files for:
2%%%  ""Nowcasting", 2010, (by Marta Banbura, Domenico Giannone and Lucrezia Reichlin),
3%%% in Michael P. Clements and David F. Hendry, editors, Oxford Handbook on Economic Forecasting.
4%%%
5%%% The software can be freely used in applications.
6%%% Users are kindly requested to add acknowledgements to published work and
7%%% to cite the above reference in any resulting publications
8%
9%Description:
10%
11%remNaNs    Treats NaNs in dataset for use in DFM.
12%
13%  Syntax:
14%    [X,indNaN] = remNaNs(X,options)
15%
16%  Description:
17%    remNaNs() processes NaNs in a data matrix X according to 5 cases (see
18%    below for details). These are useful for running functions in the
19%    'DFM.m' file that do not take missing value inputs.
20%
21%  Input parameters:
22%    X (T x n): Input data where T gives time and n gives the series.
23%    options: A structure with two elements:
24%      options.method (numeric):
25%      - 1: Replaces all missing values using filter().
26%      - 2: Replaces missing values after removing trailing and leading
27%           zeros (a row is 'missing' if >80% is NaN)
28%      - 3: Only removes rows with leading and closing zeros
29%      - 4: Replaces missing values after removing trailing and leading
30%           zeros (a row is 'missing' if all are NaN)
31%      - 5: Replaces missing values with spline() then runs filter().
32%
33%      options.k (numeric): remNaNs() relies on MATLAB's filter function
34%      for the 1-D filter. k controls the rational transfer function
35%      argument's numerator (the denominator is set to 1). More
36%      specifically, the numerator takes the form 'ones(2*k+1,1)/(2*k+1)'
37%      For additional help, see MATLAB's documentation for filter().
38%
39%  Output parameters:
40%    X: Outputted data.
41%    indNaN: A matrix indicating the location for missing values (1 for NaN).
42
43function [X,indNaN] = remNaNs_spline(X,options)
44[T,N]=size(X);    % Gives dimensions for data input
45k=options.k;      % Inputted options
46indNaN=isnan(X);  % Returns location of NaNs
47
48switch options.method
49    case 1  % replace all the missing values
50        for i = 1:N  % loop through columns
51            x = X(:,i);
52            x(indNaN(:,i))= nan_median(x);  % Replace missing values series median
53            x_MA =filter (ones(2*k+1,1)/(2*k+1), 1, [x(1)*ones(k,1);x;x(end)*ones(k,1)]);
54            x_MA=x_MA(2*k+1:end);  % Match dimensions
55            % Replace missing observations with filtered values
56            x(indNaN(:,i))=x_MA(indNaN(:,i));
57            X(:,i)=x;  % Replace vector
58        end
59    case 2 %replace missing values after removing leading and closing zeros
60
61        % Returns row sum for NaN values. Marks true for rows with more
62        % than 80% NaN
63        rem1=(sum(indNaN,2)>N*0.8);
64        nanLead =(cumsum(rem1)==(1:T)');
65        nanEnd =(cumsum(rem1(end:-1:1))==(1:T)');
66        nanEnd = nanEnd(end:-1:1);  % Reverses nanEnd
67        nanLE = (nanLead | nanEnd);
68
69        % Subsets X for for
70        X(nanLE,:) = [];
71        indNaN = isnan(X);  % Index for missing values
72        % Loop for each series
73        for i = 1:N
74            x = X(:,i);
75            isnanx = isnan(x);
76            t1 = min(find(~isnanx));  % First non-NaN entry
77            t2 = max(find(~isnanx));  % Last non-NaN entry
78            % Interpolates without NaN entries in beginning and end
79            x(t1:t2) = spline(find(~isnanx),x(~isnanx),(t1:t2)');
80            isnanx = isnan(x);
81            % replace NaN observations with median
82            x(isnanx) = median(x,'omitnan');
83            % Apply filter
84            x_MA = filter (ones(2*k+1,1)/(2*k+1),1,[x(1)*ones(k,1);x;x(end)*ones(k,1)]);
85            x_MA = x_MA(2*k+1:end);
86            % Replace nanx with filtered observations
87            x(isnanx) = x_MA(isnanx);
88            X(:,i) = x;
89        end
90    case 3 %only remove rows with leading and closing zeros
91        rem1=(sum(indNaN,2)==N);
92        nanLead=(cumsum(rem1)==(1:T)');
93        nanEnd=(cumsum(rem1(end:-1:1))==(1:T)');
94        nanEnd=nanEnd(end:-1:1);
95        nanLE=(nanLead | nanEnd);
96        X(nanLE,:)=[];
97        indNaN = isnan(X);
98    case 4  %remove rows with leading and closing zeros & replace missing values
99        rem1=(sum(indNaN,2)==N);
100        nanLead=(cumsum(rem1)==(1:T)');
101        nanEnd=(cumsum(rem1(end:-1:1))==(1:T)');
102        nanEnd=nanEnd(end:-1:1);
103        nanLE=(nanLead | nanEnd);
104        X(nanLE,:)=[];
105        indNaN=isnan(X);
106        for i = 1:N
107            x = X(:,i);
108            isnanx = isnan(x);
109            t1 = min(find(~isnanx));
110            t2 = max(find(~isnanx));
111            x(t1:t2) = spline(find(~isnanx),x(~isnanx),(t1:t2)');
112            isnanx = isnan(x);
113            x(isnanx)=nan_median(x);
114            x_MA =filter (ones(2*k+1,1)/(2*k+1),1,[x(1)*ones(k,1);x;x(end)*ones(k,1)]);
115            x_MA=x_MA(2*k+1:end);
116            x(isnanx)=x_MA(isnanx);
117            X(:,i)=x;
118        end
119    case 5 %replace missing values
120        indNaN=isnan(X);
121        for i = 1:N
122            x = X(:,i);
123            isnanx = isnan(x);
124            t1 = min(find(~isnanx));
125            t2 = max(find(~isnanx));
126            x(t1:t2) = spline(find(~isnanx),x(~isnanx),(t1:t2)');
127            isnanx = isnan(x);
128            x(isnanx)=nan_median(x);
129            x_MA =filter (ones(2*k+1,1)/(2*k+1),1,[x(1)*ones(k,1);x;x(end)*ones(k,1)]);
130            x_MA=x_MA(2*k+1:end);
131            x(isnanx)=x_MA(isnanx);
132            X(:,i)=x;
133        end
134end
135