1
2hegy.test <- function(x, deterministic = c(1,0,0),
3  lag.method = c("fixed", "AIC", "BIC", "AICc"), maxlag = 0,
4  pvalue = c("RS", "bootstrap", "raw"),
5  #args related to response surface method
6  rs.nobsreg = 15,
7  #args related to bootstrap method
8  boot.args = list(seed = 123,
9    #lag.method = c("fixed", "AIC", "BIC", "AICc"), maxlag = 0,
10    lag.method = lag.method[1], maxlag = maxlag,
11    byseason = FALSE, nb = 1000, BTdim = c(100, 10), debug.tid = -1))
12{
13  F.test.stat <- function(id)
14  {
15    if (length(id) == S)
16    {
17      if (is.null(xreg))
18      {
19        #model with no HEGY regressors and deterministic=c(0,0,0)
20        rss1 <- sum(dx^2)
21        dfs1 <- length(dx)
22        return(((rss1 - rss2) / (dfs1 - dfs2)) / (rss2 / dfs2))
23      } else
24        fit1 <- lm(dx ~ 0 + xreg)
25    } else
26      fit1 <- lm(dx ~ 0 + cbind(xreg, ypi[,-id]))
27    rss1 <- deviance(fit1)
28    dfs1 <- df.residual(fit1)
29    ((rss1 - rss2) / (dfs1 - dfs2)) / (rss2 / dfs2)
30  }
31
32  xreg <- NULL
33  stopifnot(is.ts(x))
34  n <- length(x)
35  S <- frequency(x)
36  if (S < 2)
37    stop("the time series is not seasonal")
38  isSeven <- (S %% 2) == 0
39  isSevenp2 <- 2 + isSeven
40  dx <- diff(x, S)
41
42  #xreg <- if (is.matrix(xreg)) xreg[-seq_len(S),] else xreg[-seq_len(S)]
43  if (!is.null(xreg) && NROW(xreg) != length(dx))
44      stop("the number of rows in argument ", sQuote("xreg"),
45        " does not match the length of ", sQuote("diff(x, frequency(x))"))
46
47  data.name <- deparse(substitute(x))
48  lag.method <- match.arg(lag.method)
49  isNullxreg <- is.null(xreg)
50  pvalue <- match.arg(pvalue)
51
52  if (lag.method == "AICc" && pvalue == "RS")
53  {
54    ##NOTE
55    #response surfaces are not provided by the reference paper DE14
56    #they are available for the Hannan-Quinn criterion, see add this option here
57    lag.method <- "AIC"
58    warning("argument ", sQuote("lag.method"), " was changed to ", sQuote("AIC"))
59  }
60
61  if (pvalue == "bootstrap")
62  {
63    #if (!isNullxreg)
64    #  warning("argument ", sQuote("xreg"), " was set to NULL")
65
66    #if (!is.null(boot.args$lag.method))
67    #boot.args$lag.method <- boot.args$lag.method[1]
68
69    # default arguments
70    #bargs <- list(seed = 123, lag.method = "fixed",
71    #  maxlag = 0, byseason = FALSE, nb = 1000, BTdim = c(100, 10), debug.tid = -1)
72    bargs <- list(seed = 123, lag.method = lag.method[1],
73      maxlag = maxlag, byseason = FALSE, nb = 1000, BTdim = c(100, 10), debug.tid = -1)
74
75    nms1 <- names(boot.args)
76    notinldef <- !(nms1 %in% names(bargs))
77    wno <- which(notinldef)
78
79    if (any(notinldef))
80      warning("the following elements in ", sQuote("boot.args"), " were omitted: ",
81      paste(sQuote(nms1[wno]), collapse = ", "), ".", call. = FALSE)
82
83    if (length(boot.args) > 0)
84    {
85      if (any(notinldef)) {
86        bargs[nms1[which(!notinldef)]] <- boot.args[-wno]
87      } else
88        bargs[nms1] <- boot.args
89    }
90  }
91
92  # storage matrix
93
94  stats <- matrix(nrow = floor(S/2) + 3, ncol = 2)
95  id <- seq.int(isSevenp2, S, 2)
96  rownames(stats) <- c("t_1", if (isSeven) "t_2" else NULL,
97  paste("F", apply(cbind(id, id + 1), 1, paste, collapse=":"), sep = "_"),
98  paste0("F_2:", S), paste0("F_1:", S))
99  colnames(stats) <- c("statistic", "p-value")
100
101  # deterministic terms
102
103  if (all(deterministic == c(0,0,1)))
104    deterministic <- c(1,0,1)
105
106  if (deterministic[1] == 1) {
107    strdet <- "constant"
108    xreg <- cbind(xreg, c = rep(1, n - S))
109  } else strdet <- ""
110
111  ##NOTE
112  #in principle it is not a good idea to define the intercept in "xreg" and
113  #use lm(x ~ 0 + xreg) because stats::summary.lm uses attr(z$terms, "intercept")
114  #to compute the R-squared and the F statistic includes the intercept (not the usual test),
115  #but here the R-squared and the F-statistic are not used
116
117  if (deterministic[2] == 1) {
118    xreg <- cbind(xreg, trend = seq_along(dx))
119    strdet <- paste(strdet, "+ trend")
120  }
121
122  if (deterministic[3] == 1)
123  {
124    SD <- do.call("rbind", replicate(ceiling(length(dx)/S), diag(S), simplify = FALSE))
125    SD <- ts(SD, frequency = S, start = start(dx))
126    # ignore warning "'end' value not changed"
127    SD <- suppressWarnings(window(SD, start = start(dx), end = end(dx)))
128    colnames(SD) <- paste0("SD", seq_len(S))
129    if (deterministic[1] == 1)
130      SD <- SD[,-1]
131    xreg <- cbind(xreg, SD)
132    strdet <- paste0(strdet, ifelse(deterministic[1] == 1, " + ", ""), "seasonal dummies")
133  }
134
135  if (strdet == "")
136    strdet <- "none"
137
138  # lags of the dependent variable
139
140  if (maxlag > 0)
141  {
142    dxlags <- dx
143    for (i in seq_len(maxlag))
144      dxlags <- cbind(dxlags, lag(dx, -i))
145    dxlags <- window(dxlags, end = end(dx))[,-1]
146    if (is.null(dim(dxlags)))
147      dxlags <- as.matrix(dxlags)
148    #NOTE this way would be a bit faster
149    #sapply(seq_len(maxlag), function(x) c(rep(NA,length.out=x), c(1:10)[1:(10-x)]))
150    colnames(dxlags) <- paste("Lag", seq_len(maxlag), sep ="")
151
152    if (lag.method == "fixed")
153    {
154      #NOTE 'data.frame' keeps column names, 'cbind' doesn't;
155      #'data.matrix' is required in order to be accepted by 'lm'
156      xreg <- data.matrix(data.frame(xreg, dxlags))
157    } # else, do not add dxlags here since they will be selected below
158
159  } #else # maxlags == 0 #nlags == 0
160    #str.lag.order <- lag.order <- 0
161
162  # HEGY regressors
163
164  ypi <- hegy.regressors(x)
165
166  # lag order selection
167
168  if (maxlag > 0)
169  {
170    if (lag.method != "fixed")
171    {
172      # information criteria are obtained using the same number of observations
173      # regardless of the number of lags included in a given model
174      tmp <- vector("list", maxlag + 1)
175      # model with no lags
176      id <- seq_len(maxlag)
177      tmp[[1]] <- lm(dx[-id] ~ 0 + ypi[-id,] + xreg[-id,])
178      dxlags2 <- dxlags[-id,]
179
180      #http://stackoverflow.com/questions/23154074/writing-a-loop-in-r-that-updates-a-model?rq=1
181      #the loop does not resolve the "i" and new variables are not distinguished
182      #tmp[[i+1]] <- update(tmp[[i]], . ~ . + dxlags[,i,drop=FALSE])
183      for (i in seq_len(maxlag))
184        tmp[[i+1]] <- update(tmp[[i]], as.formula(paste0(". ~ . + dxlags2[,",i,"]")))
185
186      icvals <- unlist(switch(lag.method,
187        "AIC" = lapply(tmp, AIC), "BIC" = lapply(tmp, BIC),
188        "AICc" = lapply(tmp, function(x) { k <- x$rank+1; -2*logLik(x) + 2*k +
189          (2*k*(k+1))/(length(residuals(x))-k-1) })))
190      id <- which.min(icvals)
191      # update lag order (tmp[[1]] contains 0 lags)
192      maxlag <- id - 1
193      if (maxlag > 0)
194      {
195        xreg <- data.matrix(data.frame(xreg, dxlags[,seq_len(maxlag),drop=FALSE]))
196      }
197    } # else, lag.method=="fixed", dxlags was added to reg above
198  }
199
200  # fit the model with the chosen lags (if any)
201  # using the entire sample (i.e., including the first 'maxlag' observations)
202  fit2 <- lm(dx ~ 0 + ypi + xreg)
203
204  # test statistics
205
206  rss2 <- deviance(fit2)
207  dfs2 <- df.residual(fit2)
208
209  j <- isSevenp2
210  for (i in seq.int(isSevenp2, S, 2))
211  {
212    id <- c(i, i+1)
213    stats[j,1] <- F.test.stat(id)
214    j <- j + 1
215  }
216
217  stats[j,1] <- F.test.stat(seq.int(2, ncol(ypi)))
218
219  # F-test statistic for all HEGY regressors
220  stats[j+1,1] <- F.test.stat(seq_len(S))
221
222  # t-test statistics
223  id <- seq_len(1 + isSeven) #+ ncxreg
224  stats[seq_along(id),1] <- coef(fit2)[id] / sqrt(diag(vcov(fit2))[id])
225
226  # p-values
227
228  # used with pvalue equal to "RS" or "raw"
229  ctd <- paste(deterministic, collapse = "")
230
231  if (pvalue == "RS")
232  {
233    stats[1,2] <- hegy.rs.pvalue(stats[1,1], "zero", ctd, lag.method,
234      maxlag, S, dfs2, rs.nobsreg)
235    if (isSeven)
236      stats[2,2] <- hegy.rs.pvalue(stats[2,1], "pi", ctd, lag.method,
237        maxlag, S, dfs2, rs.nobsreg)
238
239    for (i in seq.int(isSevenp2, (S+4)/2-1))
240      stats[i,2] <- hegy.rs.pvalue(stats[i,1], "pair", ctd, lag.method,
241        maxlag, S, dfs2, rs.nobsreg)
242
243    j <- nrow(stats) - 1
244    stats[j,2] <- hegy.rs.pvalue(stats[j,1], "seasall", ctd, lag.method,
245      maxlag, S, dfs2, rs.nobsreg)
246    stats[j+1,2] <- hegy.rs.pvalue(stats[j+1,1], "all", ctd, lag.method,
247      maxlag, S, dfs2, rs.nobsreg)
248  } else
249  if (pvalue == "bootstrap")
250  {
251    e <- residuals(fit2)
252    dgp.nlags <- maxlag
253    if (dgp.nlags > 0)
254    {
255      arcoefs <- coef(fit2)
256      arcoefs <- arcoefs[grepl("Lag\\d{1,3}$", names(arcoefs))]
257      if (length(arcoefs) != dgp.nlags) # debug
258        stop("unexpected number of lags")
259    } else
260      arcoefs <- 0
261
262    BTdim <- bargs$BTdim
263    nb <- bargs$nb
264    if (is.null(BTdim)) {
265      BTdim <- rep(ceiling(sqrt(nb)), 2)
266    } else
267    if (prod(BTdim) < nb) {
268      stop("the number of threads is lower than the number of bootstrap replicates")
269    }
270
271    #NOTE n-S is passed as the length of the data,
272    #the length of diff(x,S) is required by hegy_boot_pval
273
274    if (bargs$byseason)
275    {
276      e <- ts(e, frequency = S)
277      csns <- cumsum(table(cycle(e)))
278      #names(csns) <- cycle(dx)[seq.int(dgp.nlags+1, length.out=S)]
279
280      e <- unlist(split(e, cycle(e)))
281
282    } else {
283      csns <- c(0, length(e))
284    }
285
286    boot.lag.method <- match.arg(bargs$lag.method, c("fixed", "AIC", "BIC"))
287    chosen.lags <- if (boot.lag.method == "fixed")
288      integer(1) else integer(bargs$nb)
289
290    ## removed with cuda stuff
291    ## if (@HAS_CUDA@)
292    ## {
293    ##   tmp <- .C("hegy_boot_pval", dim = as.integer(c(nb, BTdim)),
294    ##     seed = as.integer(bargs$seed), idc = as.integer(deterministic),
295    ##     ip = as.integer(c(S, n-S, dgp.nlags, bargs$maxlag, bargs$debug.tid)),
296    ##     csns = as.integer(csns),
297    ##     eps = as.double(e), arcoefs = as.double(arcoefs),
298    ##     stats0 = as.double(stats[,1]),
299    ##     ICtype = as.integer(switch(boot.lag.method,
300    ##       "fixed" = 0, "AIC" = 1, "BIC" = 2, "AICc" = 3)),
301    ##     bpvals = double(nrow(stats)), chosen_lags = chosen.lags, PACKAGE = "uroot")
302    ##   stats[,2] <- tmp$bpvals
303    ##   chosen.lags <- tmp$chosen_lags
304    ## } else {
305    ##   if (grepl("windows", .Platform$OS.type)) {
306    ##     stop("GPU parallelization could not be tested on a Windows system,\n",
307    ##     "consider using the function ",  sQuote("hegy.boot.pval"))
308    ##   } else # unix
309    ##     stop("CUDA capable GPU was not available when installing the package on this system,\n",
310    ##     "consider using the function ",  sQuote("hegy.boot.pval"))
311    ## }
312    stop("'pvalue = bootstrap' was only available with CUDA, but\n",
313         "CUDA capablities in package 'uroot' were removed from v. 2.1-0,\n",
314         "consider using the function ",  sQuote("hegy.boot.pval"))
315
316  } else
317  if (pvalue == "raw")
318  {
319    #NOTE documentation deterministic == c(0,0,1) not available
320    #in original tables (use c(1,0,1) instead)
321    stats[1,2] <- uroot.raw.pvalue(stats[1,1], "HEGY", NULL, dfs2, ctd, S, "zero")
322    if (isSeven)
323      stats[2,2] <- uroot.raw.pvalue(stats[2,1], "HEGY", NULL, dfs2, ctd, S, "pi")
324    for (i in seq.int(isSevenp2, (S+4)/2-1))
325      stats[i,2] <- uroot.raw.pvalue(stats[i,1], "HEGY", NULL, dfs2, ctd, S, "pair")
326  }
327
328  # output
329
330  res <- list(statistics = stats[,1], pvalues = stats[,2],
331    method = "HEGY test for unit roots", data.name = data.name,
332    fitted.model = fit2,
333    lag.method = lag.method, lag.order = maxlag,
334    strdet = strdet, type.pvalue = pvalue,
335    bootstrap = if (pvalue == "bootstrap") bargs else NULL,
336    boot.chosen.lags = if (pvalue == "bootstrap") chosen.lags else NULL,
337    pvlabels = symnum(stats[,"p-value"], corr = FALSE, na = FALSE,
338      cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
339      symbols   =  c("***","**","*","."," ")))
340  class(res) <- "HEGYtest"
341
342  res
343}
344