1 2hegy.test <- function(x, deterministic = c(1,0,0), 3 lag.method = c("fixed", "AIC", "BIC", "AICc"), maxlag = 0, 4 pvalue = c("RS", "bootstrap", "raw"), 5 #args related to response surface method 6 rs.nobsreg = 15, 7 #args related to bootstrap method 8 boot.args = list(seed = 123, 9 #lag.method = c("fixed", "AIC", "BIC", "AICc"), maxlag = 0, 10 lag.method = lag.method[1], maxlag = maxlag, 11 byseason = FALSE, nb = 1000, BTdim = c(100, 10), debug.tid = -1)) 12{ 13 F.test.stat <- function(id) 14 { 15 if (length(id) == S) 16 { 17 if (is.null(xreg)) 18 { 19 #model with no HEGY regressors and deterministic=c(0,0,0) 20 rss1 <- sum(dx^2) 21 dfs1 <- length(dx) 22 return(((rss1 - rss2) / (dfs1 - dfs2)) / (rss2 / dfs2)) 23 } else 24 fit1 <- lm(dx ~ 0 + xreg) 25 } else 26 fit1 <- lm(dx ~ 0 + cbind(xreg, ypi[,-id])) 27 rss1 <- deviance(fit1) 28 dfs1 <- df.residual(fit1) 29 ((rss1 - rss2) / (dfs1 - dfs2)) / (rss2 / dfs2) 30 } 31 32 xreg <- NULL 33 stopifnot(is.ts(x)) 34 n <- length(x) 35 S <- frequency(x) 36 if (S < 2) 37 stop("the time series is not seasonal") 38 isSeven <- (S %% 2) == 0 39 isSevenp2 <- 2 + isSeven 40 dx <- diff(x, S) 41 42 #xreg <- if (is.matrix(xreg)) xreg[-seq_len(S),] else xreg[-seq_len(S)] 43 if (!is.null(xreg) && NROW(xreg) != length(dx)) 44 stop("the number of rows in argument ", sQuote("xreg"), 45 " does not match the length of ", sQuote("diff(x, frequency(x))")) 46 47 data.name <- deparse(substitute(x)) 48 lag.method <- match.arg(lag.method) 49 isNullxreg <- is.null(xreg) 50 pvalue <- match.arg(pvalue) 51 52 if (lag.method == "AICc" && pvalue == "RS") 53 { 54 ##NOTE 55 #response surfaces are not provided by the reference paper DE14 56 #they are available for the Hannan-Quinn criterion, see add this option here 57 lag.method <- "AIC" 58 warning("argument ", sQuote("lag.method"), " was changed to ", sQuote("AIC")) 59 } 60 61 if (pvalue == "bootstrap") 62 { 63 #if (!isNullxreg) 64 # warning("argument ", sQuote("xreg"), " was set to NULL") 65 66 #if (!is.null(boot.args$lag.method)) 67 #boot.args$lag.method <- boot.args$lag.method[1] 68 69 # default arguments 70 #bargs <- list(seed = 123, lag.method = "fixed", 71 # maxlag = 0, byseason = FALSE, nb = 1000, BTdim = c(100, 10), debug.tid = -1) 72 bargs <- list(seed = 123, lag.method = lag.method[1], 73 maxlag = maxlag, byseason = FALSE, nb = 1000, BTdim = c(100, 10), debug.tid = -1) 74 75 nms1 <- names(boot.args) 76 notinldef <- !(nms1 %in% names(bargs)) 77 wno <- which(notinldef) 78 79 if (any(notinldef)) 80 warning("the following elements in ", sQuote("boot.args"), " were omitted: ", 81 paste(sQuote(nms1[wno]), collapse = ", "), ".", call. = FALSE) 82 83 if (length(boot.args) > 0) 84 { 85 if (any(notinldef)) { 86 bargs[nms1[which(!notinldef)]] <- boot.args[-wno] 87 } else 88 bargs[nms1] <- boot.args 89 } 90 } 91 92 # storage matrix 93 94 stats <- matrix(nrow = floor(S/2) + 3, ncol = 2) 95 id <- seq.int(isSevenp2, S, 2) 96 rownames(stats) <- c("t_1", if (isSeven) "t_2" else NULL, 97 paste("F", apply(cbind(id, id + 1), 1, paste, collapse=":"), sep = "_"), 98 paste0("F_2:", S), paste0("F_1:", S)) 99 colnames(stats) <- c("statistic", "p-value") 100 101 # deterministic terms 102 103 if (all(deterministic == c(0,0,1))) 104 deterministic <- c(1,0,1) 105 106 if (deterministic[1] == 1) { 107 strdet <- "constant" 108 xreg <- cbind(xreg, c = rep(1, n - S)) 109 } else strdet <- "" 110 111 ##NOTE 112 #in principle it is not a good idea to define the intercept in "xreg" and 113 #use lm(x ~ 0 + xreg) because stats::summary.lm uses attr(z$terms, "intercept") 114 #to compute the R-squared and the F statistic includes the intercept (not the usual test), 115 #but here the R-squared and the F-statistic are not used 116 117 if (deterministic[2] == 1) { 118 xreg <- cbind(xreg, trend = seq_along(dx)) 119 strdet <- paste(strdet, "+ trend") 120 } 121 122 if (deterministic[3] == 1) 123 { 124 SD <- do.call("rbind", replicate(ceiling(length(dx)/S), diag(S), simplify = FALSE)) 125 SD <- ts(SD, frequency = S, start = start(dx)) 126 # ignore warning "'end' value not changed" 127 SD <- suppressWarnings(window(SD, start = start(dx), end = end(dx))) 128 colnames(SD) <- paste0("SD", seq_len(S)) 129 if (deterministic[1] == 1) 130 SD <- SD[,-1] 131 xreg <- cbind(xreg, SD) 132 strdet <- paste0(strdet, ifelse(deterministic[1] == 1, " + ", ""), "seasonal dummies") 133 } 134 135 if (strdet == "") 136 strdet <- "none" 137 138 # lags of the dependent variable 139 140 if (maxlag > 0) 141 { 142 dxlags <- dx 143 for (i in seq_len(maxlag)) 144 dxlags <- cbind(dxlags, lag(dx, -i)) 145 dxlags <- window(dxlags, end = end(dx))[,-1] 146 if (is.null(dim(dxlags))) 147 dxlags <- as.matrix(dxlags) 148 #NOTE this way would be a bit faster 149 #sapply(seq_len(maxlag), function(x) c(rep(NA,length.out=x), c(1:10)[1:(10-x)])) 150 colnames(dxlags) <- paste("Lag", seq_len(maxlag), sep ="") 151 152 if (lag.method == "fixed") 153 { 154 #NOTE 'data.frame' keeps column names, 'cbind' doesn't; 155 #'data.matrix' is required in order to be accepted by 'lm' 156 xreg <- data.matrix(data.frame(xreg, dxlags)) 157 } # else, do not add dxlags here since they will be selected below 158 159 } #else # maxlags == 0 #nlags == 0 160 #str.lag.order <- lag.order <- 0 161 162 # HEGY regressors 163 164 ypi <- hegy.regressors(x) 165 166 # lag order selection 167 168 if (maxlag > 0) 169 { 170 if (lag.method != "fixed") 171 { 172 # information criteria are obtained using the same number of observations 173 # regardless of the number of lags included in a given model 174 tmp <- vector("list", maxlag + 1) 175 # model with no lags 176 id <- seq_len(maxlag) 177 tmp[[1]] <- lm(dx[-id] ~ 0 + ypi[-id,] + xreg[-id,]) 178 dxlags2 <- dxlags[-id,] 179 180 #http://stackoverflow.com/questions/23154074/writing-a-loop-in-r-that-updates-a-model?rq=1 181 #the loop does not resolve the "i" and new variables are not distinguished 182 #tmp[[i+1]] <- update(tmp[[i]], . ~ . + dxlags[,i,drop=FALSE]) 183 for (i in seq_len(maxlag)) 184 tmp[[i+1]] <- update(tmp[[i]], as.formula(paste0(". ~ . + dxlags2[,",i,"]"))) 185 186 icvals <- unlist(switch(lag.method, 187 "AIC" = lapply(tmp, AIC), "BIC" = lapply(tmp, BIC), 188 "AICc" = lapply(tmp, function(x) { k <- x$rank+1; -2*logLik(x) + 2*k + 189 (2*k*(k+1))/(length(residuals(x))-k-1) }))) 190 id <- which.min(icvals) 191 # update lag order (tmp[[1]] contains 0 lags) 192 maxlag <- id - 1 193 if (maxlag > 0) 194 { 195 xreg <- data.matrix(data.frame(xreg, dxlags[,seq_len(maxlag),drop=FALSE])) 196 } 197 } # else, lag.method=="fixed", dxlags was added to reg above 198 } 199 200 # fit the model with the chosen lags (if any) 201 # using the entire sample (i.e., including the first 'maxlag' observations) 202 fit2 <- lm(dx ~ 0 + ypi + xreg) 203 204 # test statistics 205 206 rss2 <- deviance(fit2) 207 dfs2 <- df.residual(fit2) 208 209 j <- isSevenp2 210 for (i in seq.int(isSevenp2, S, 2)) 211 { 212 id <- c(i, i+1) 213 stats[j,1] <- F.test.stat(id) 214 j <- j + 1 215 } 216 217 stats[j,1] <- F.test.stat(seq.int(2, ncol(ypi))) 218 219 # F-test statistic for all HEGY regressors 220 stats[j+1,1] <- F.test.stat(seq_len(S)) 221 222 # t-test statistics 223 id <- seq_len(1 + isSeven) #+ ncxreg 224 stats[seq_along(id),1] <- coef(fit2)[id] / sqrt(diag(vcov(fit2))[id]) 225 226 # p-values 227 228 # used with pvalue equal to "RS" or "raw" 229 ctd <- paste(deterministic, collapse = "") 230 231 if (pvalue == "RS") 232 { 233 stats[1,2] <- hegy.rs.pvalue(stats[1,1], "zero", ctd, lag.method, 234 maxlag, S, dfs2, rs.nobsreg) 235 if (isSeven) 236 stats[2,2] <- hegy.rs.pvalue(stats[2,1], "pi", ctd, lag.method, 237 maxlag, S, dfs2, rs.nobsreg) 238 239 for (i in seq.int(isSevenp2, (S+4)/2-1)) 240 stats[i,2] <- hegy.rs.pvalue(stats[i,1], "pair", ctd, lag.method, 241 maxlag, S, dfs2, rs.nobsreg) 242 243 j <- nrow(stats) - 1 244 stats[j,2] <- hegy.rs.pvalue(stats[j,1], "seasall", ctd, lag.method, 245 maxlag, S, dfs2, rs.nobsreg) 246 stats[j+1,2] <- hegy.rs.pvalue(stats[j+1,1], "all", ctd, lag.method, 247 maxlag, S, dfs2, rs.nobsreg) 248 } else 249 if (pvalue == "bootstrap") 250 { 251 e <- residuals(fit2) 252 dgp.nlags <- maxlag 253 if (dgp.nlags > 0) 254 { 255 arcoefs <- coef(fit2) 256 arcoefs <- arcoefs[grepl("Lag\\d{1,3}$", names(arcoefs))] 257 if (length(arcoefs) != dgp.nlags) # debug 258 stop("unexpected number of lags") 259 } else 260 arcoefs <- 0 261 262 BTdim <- bargs$BTdim 263 nb <- bargs$nb 264 if (is.null(BTdim)) { 265 BTdim <- rep(ceiling(sqrt(nb)), 2) 266 } else 267 if (prod(BTdim) < nb) { 268 stop("the number of threads is lower than the number of bootstrap replicates") 269 } 270 271 #NOTE n-S is passed as the length of the data, 272 #the length of diff(x,S) is required by hegy_boot_pval 273 274 if (bargs$byseason) 275 { 276 e <- ts(e, frequency = S) 277 csns <- cumsum(table(cycle(e))) 278 #names(csns) <- cycle(dx)[seq.int(dgp.nlags+1, length.out=S)] 279 280 e <- unlist(split(e, cycle(e))) 281 282 } else { 283 csns <- c(0, length(e)) 284 } 285 286 boot.lag.method <- match.arg(bargs$lag.method, c("fixed", "AIC", "BIC")) 287 chosen.lags <- if (boot.lag.method == "fixed") 288 integer(1) else integer(bargs$nb) 289 290 ## removed with cuda stuff 291 ## if (@HAS_CUDA@) 292 ## { 293 ## tmp <- .C("hegy_boot_pval", dim = as.integer(c(nb, BTdim)), 294 ## seed = as.integer(bargs$seed), idc = as.integer(deterministic), 295 ## ip = as.integer(c(S, n-S, dgp.nlags, bargs$maxlag, bargs$debug.tid)), 296 ## csns = as.integer(csns), 297 ## eps = as.double(e), arcoefs = as.double(arcoefs), 298 ## stats0 = as.double(stats[,1]), 299 ## ICtype = as.integer(switch(boot.lag.method, 300 ## "fixed" = 0, "AIC" = 1, "BIC" = 2, "AICc" = 3)), 301 ## bpvals = double(nrow(stats)), chosen_lags = chosen.lags, PACKAGE = "uroot") 302 ## stats[,2] <- tmp$bpvals 303 ## chosen.lags <- tmp$chosen_lags 304 ## } else { 305 ## if (grepl("windows", .Platform$OS.type)) { 306 ## stop("GPU parallelization could not be tested on a Windows system,\n", 307 ## "consider using the function ", sQuote("hegy.boot.pval")) 308 ## } else # unix 309 ## stop("CUDA capable GPU was not available when installing the package on this system,\n", 310 ## "consider using the function ", sQuote("hegy.boot.pval")) 311 ## } 312 stop("'pvalue = bootstrap' was only available with CUDA, but\n", 313 "CUDA capablities in package 'uroot' were removed from v. 2.1-0,\n", 314 "consider using the function ", sQuote("hegy.boot.pval")) 315 316 } else 317 if (pvalue == "raw") 318 { 319 #NOTE documentation deterministic == c(0,0,1) not available 320 #in original tables (use c(1,0,1) instead) 321 stats[1,2] <- uroot.raw.pvalue(stats[1,1], "HEGY", NULL, dfs2, ctd, S, "zero") 322 if (isSeven) 323 stats[2,2] <- uroot.raw.pvalue(stats[2,1], "HEGY", NULL, dfs2, ctd, S, "pi") 324 for (i in seq.int(isSevenp2, (S+4)/2-1)) 325 stats[i,2] <- uroot.raw.pvalue(stats[i,1], "HEGY", NULL, dfs2, ctd, S, "pair") 326 } 327 328 # output 329 330 res <- list(statistics = stats[,1], pvalues = stats[,2], 331 method = "HEGY test for unit roots", data.name = data.name, 332 fitted.model = fit2, 333 lag.method = lag.method, lag.order = maxlag, 334 strdet = strdet, type.pvalue = pvalue, 335 bootstrap = if (pvalue == "bootstrap") bargs else NULL, 336 boot.chosen.lags = if (pvalue == "bootstrap") chosen.lags else NULL, 337 pvlabels = symnum(stats[,"p-value"], corr = FALSE, na = FALSE, 338 cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), 339 symbols = c("***","**","*","."," "))) 340 class(res) <- "HEGYtest" 341 342 res 343} 344