1 2#' Breusch--Godfrey Test for Panel Models 3#' 4#' Test of serial correlation for (the idiosyncratic component of) the 5#' errors in panel models. 6#' 7#' This Lagrange multiplier test uses the auxiliary model on 8#' (quasi-)demeaned data taken from a model of class `plm` which may 9#' be a `pooling` (default for formula interface), `random` or 10#' `within` model. It performs a Breusch--Godfrey test (using `bgtest` 11#' from package \CRANpkg{lmtest} on the residuals of the 12#' (quasi-)demeaned model, which should be serially uncorrelated under 13#' the null of no serial correlation in idiosyncratic errors, as 14#' illustrated in \insertCite{WOOL:10;textual}{plm}. The function 15#' takes the demeaned data, estimates the model and calls `bgtest`. 16#' 17#' Unlike most other tests for serial correlation in panels, this one 18#' allows to choose the order of correlation to test for. 19#' 20#' @aliases pbgtest 21#' @importFrom lmtest bgtest 22#' @param x an object of class `"panelmodel"` or of class `"formula"`, 23#' @param order an integer indicating the order of serial correlation 24#' to be tested for. `NULL` (default) uses the minimum number of 25#' observations over the time dimension (see also section 26#' **Details** below), 27#' @param type type of test statistic to be calculated; either 28#' `"Chisq"` (default) for the Chi-squared test statistic or `"F"` 29#' for the F test statistic, 30#' @param data only relevant for formula interface: data set for which 31#' the respective panel model (see `model`) is to be evaluated, 32#' @param model only relevant for formula interface: compute test 33#' statistic for model `pooling` (default), `random`, or `within`. 34#' When `model` is used, the `data` argument needs to be passed as 35#' well, 36#' @param \dots further arguments (see [lmtest::bgtest()]). 37#' @return An object of class `"htest"`. 38#' @note The argument `order` defaults to the minimum number of 39#' observations over the time dimension, while for 40#' `lmtest::bgtest` it defaults to `1`. 41#' @export 42#' @author Giovanni Millo 43#' @seealso For the original test in package \CRANpkg{lmtest} see 44#' [lmtest::bgtest()]. See [pdwtest()] for the analogous 45#' panel Durbin--Watson test. See [pbltest()], [pbsytest()], 46#' [pwartest()] and [pwfdtest()] for other serial correlation 47#' tests for panel models. 48#' @references 49#' 50#' \insertRef{BREU:78}{plm} 51#' 52#' \insertRef{GODF:78}{plm} 53#' 54#' \insertRef{WOOL:02}{plm} 55#' 56#' \insertRef{WOOL:10}{plm} 57#' 58#' \insertRef{WOOL:13}{plm} 59#' Sec. 12.2, pp. 421--422. 60#' @keywords htest 61#' @examples 62#' 63#' data("Grunfeld", package = "plm") 64#' g <- plm(inv ~ value + capital, data = Grunfeld, model = "random") 65#' 66#' # panelmodel interface 67#' pbgtest(g) 68#' pbgtest(g, order = 4) 69#' 70#' # formula interface 71#' pbgtest(inv ~ value + capital, data = Grunfeld, model = "random") 72#' 73#' # F test statistic (instead of default type="Chisq") 74#' pbgtest(g, type="F") 75#' pbgtest(inv ~ value + capital, data = Grunfeld, model = "random", type = "F") 76#' 77pbgtest <- function (x, ...) { 78 UseMethod("pbgtest") 79} 80 81#' @rdname pbgtest 82#' @export 83pbgtest.panelmodel <- function(x, order = NULL, type = c("Chisq", "F"), ...) { 84 ## residual serial correlation test based on the residuals of the demeaned 85 ## model (see Wooldridge (2002), p. 288) and the regular lmtest::bgtest() 86 87 ## structure: 88 ## 1: take demeaned data from 'plm' object 89 ## 2: est. auxiliary model by OLS on demeaned data 90 ## 3: apply lmtest::bgtest() to auxiliary model and return the result 91 92 model <- describe(x, "model") 93 effect <- describe(x, "effect") 94 theta <- x$ercomp$theta 95 96 ## retrieve demeaned data 97 demX <- model.matrix(x, model = model, effect = effect, theta = theta, cstcovar.rm = "all") 98 demy <- pmodel.response(model.frame(x), model = model, effect = effect, theta = theta) 99 ## ...and group numerosities 100 Ti <- pdim(x)$Tint$Ti 101 ## set lag order to minimum group numerosity if not specified by user 102 ## (check whether this is sensible) 103 if(is.null(order)) order <- min(Ti) 104 105 ## lmtest::bgtest on the demeaned model: 106 107 ## pbgtest is the return value of lmtest::bgtest, exception made for the method attribute 108 auxformula <- demy ~ demX - 1 109 lm.mod <- lm(auxformula) 110 bgtest <- bgtest(lm.mod, order = order, type = type, ...) 111 bgtest$method <- "Breusch-Godfrey/Wooldridge test for serial correlation in panel models" 112 bgtest$alternative <- "serial correlation in idiosyncratic errors" 113 bgtest$data.name <- data.name(x) 114 names(bgtest$statistic) <- if(length(bgtest$parameter) == 1) "chisq" else "F" 115 return(bgtest) 116} 117 118#' @rdname pbgtest 119#' @export 120pbgtest.formula <- function(x, order = NULL, type = c("Chisq", "F"), data, model=c("pooling", "random", "within"), ...) { 121 ## formula method for pbgtest; 122 ## defaults to a pooling model 123 cl <- match.call(expand.dots = TRUE) 124 if (names(cl)[3L] == "") names(cl)[3L] <- "data" 125 if (is.null(cl$model)) cl$model <- "pooling" 126 names(cl)[2L] <- "formula" 127 m <- match(plm.arg, names(cl), 0) 128 cl <- cl[c(1L, m)] 129 cl[[1L]] <- quote(plm) 130 plm.model <- eval(cl,parent.frame()) 131 pbgtest(plm.model, order = order, type = type, data = data, ...) 132} 133 134#' Wooldridge's Test for Unobserved Effects in Panel Models 135#' 136#' Semi-parametric test for the presence of (individual or time) unobserved 137#' effects in panel models. 138#' 139#' This semi-parametric test checks the null hypothesis of zero 140#' correlation between errors of the same group. Therefore, it has 141#' power both against individual effects and, more generally, any kind 142#' of serial correlation. 143#' 144#' The test relies on large-N asymptotics. It is valid under error 145#' heteroskedasticity and departures from normality. 146#' 147#' The above is valid if `effect="individual"`, which is the most 148#' likely usage. If `effect="time"`, symmetrically, the test relies on 149#' large-T asymptotics and has power against time effects and, more 150#' generally, against cross-sectional correlation. 151#' 152#' If the panelmodel interface is used, the inputted model must be a pooling 153#' model. 154#' 155#' @aliases pwtest 156#' @param x an object of class `"formula"`, or an estimated model of class 157#' `panelmodel`, 158#' @param effect the effect to be tested for, one of `"individual"` 159#' (default) or `"time"`, 160#' @param data a `data.frame`, 161#' @param \dots further arguments passed to `plm`. 162#' @return An object of class `"htest"`. 163#' @export 164#' @author Giovanni Millo 165#' @seealso [pbltest()], [pbgtest()], 166#' [pdwtest()], [pbsytest()], [pwartest()], 167#' [pwfdtest()] for tests for serial correlation in panel models. 168#' [plmtest()] for tests for random effects. 169#' @references 170#' 171#' \insertRef{WOOL:02}{plm} 172#' 173#' \insertRef{WOOL:10}{plm} 174#' 175#' @keywords htest 176#' @examples 177#' 178#' data("Produc", package = "plm") 179#' ## formula interface 180#' pwtest(log(gsp) ~ log(pcap) + log(pc) + log(emp) + unemp, data = Produc) 181#' pwtest(log(gsp) ~ log(pcap) + log(pc) + log(emp) + unemp, data = Produc, effect = "time") 182#' 183#' ## panelmodel interface 184#' # first, estimate a pooling model, than compute test statistics 185#' form <- formula(log(gsp) ~ log(pcap) + log(pc) + log(emp) + unemp) 186#' pool_prodc <- plm(form, data = Produc, model = "pooling") 187#' pwtest(pool_prodc) # == effect="individual" 188#' pwtest(pool_prodc, effect="time") 189#' 190pwtest <- function(x, ...){ 191 UseMethod("pwtest") 192} 193 194#' @rdname pwtest 195#' @export 196pwtest.formula <- function(x, data, effect = c("individual", "time"), ...) { 197 198 effect <- match.arg(effect, choices = c("individual", "time")) # match effect to pass it on to pwtest.panelmodel 199 200 cl <- match.call(expand.dots = TRUE) 201 if (names(cl)[3] == "") names(cl)[3] <- "data" 202 if (is.null(cl$model)) cl$model <- "pooling" 203 if (cl$model != "pooling") stop("pwtest only relevant for pooling models") 204 names(cl)[2] <- "formula" 205 m <- match(plm.arg, names(cl), 0) 206 cl <- cl[c(1L, m)] 207 cl[[1L]] <- quote(plm) 208 plm.model <- eval(cl,parent.frame()) 209 pwtest.panelmodel(plm.model, effect = effect, ...) # pass on desired 'effect' argument to pwtest.panelmodel 210 211 ## "RE" test a la Wooldridge (2002/2010), see 10.4.4 212 ## (basically the scaled and standardized estimator for sigma from REmod) 213 ## does not rely on normality or homoskedasticity; 214 ## H0: composite errors uncorrelated 215 216 ## ref. Wooldridge (2002), pp. 264-265; Wooldridge (2010), pp. 299-300 217 218 ######### from here generic testing interface from 219 ######### plm to my code 220} 221 222#' @rdname pwtest 223#' @export 224pwtest.panelmodel <- function(x, effect = c("individual", "time"), ...) { 225 if (describe(x, "model") != "pooling") stop("pwtest only relevant for pooling models") 226 effect <- match.arg(effect, choices = c("individual", "time")) 227 data <- model.frame(x) 228 ## extract indices 229 230 ## if effect="individual" std., else swap 231 xindex <- unclass(attr(data, "index")) # unclass for speed 232 if (effect == "individual"){ 233 index <- xindex[[1L]] 234 tindex <- xindex[[2L]] 235 } 236 else{ 237 index <- xindex[[2L]] 238 tindex <- xindex[[1L]] 239 } 240 ## det. number of groups and df 241 n <- length(unique(index)) 242 X <- model.matrix(x) 243 244 k <- ncol(X) 245 ## det. total number of obs. (robust vs. unbalanced panels) 246 nT <- nrow(X) 247 ## det. max. group numerosity 248 t <- max(tapply(X[ , 1L], index, length)) 249 250 ## ref. Wooldridge (2002), p.264 / Wooldridge (2010), p.299 251 252 ## extract resids 253 u <- x$residuals 254 255 ## est. random effect variance 256 ## "pre-allocate" an empty list of length n 257 tres <- vector("list", n) 258 259 ## list of n "empirical omega-blocks" 260 ## with averages of xproducts of t(i) residuals 261 ## for each group 1..n 262 ## (possibly different sizes if unbal., thus a list 263 ## and thus, unlike Wooldridge (eq.10.37), we divide 264 ## every block by *its* t(t-1)/2) 265 unind <- unique(index) # ???? 266 267 for(i in 1:n) { 268 ut <- u[index == unind[i]] 269 tres[[i]] <- ut %o% ut 270 } 271 272 ## det. # of upper triangle members (n*t(t-1)/2 if balanced) 273 ## no needed, only for illustration 274 # ti <- vapply(tres, function(x) dim(x)[[1L]], FUN.VALUE = 0.0, USE.NAMES = FALSE) 275 # uptrinum <- sum(ti*(ti-1)/2) 276 277 ## sum over all upper triangles of emp. omega blocks: 278 ## and sum over resulting vector (df corrected) 279 sum.uptri <- vapply(tres, function(x) sum(x[upper.tri(x, diag = FALSE)]), FUN.VALUE = 0.0, USE.NAMES = FALSE) 280 W <- sum(sum.uptri) # /sqrt(n) simplifies out 281 282 ## calculate se(Wstat) as in 10.40 283 seW <- sqrt(as.numeric(crossprod(sum.uptri))) 284 285 ## NB should we apply a df correction here, maybe that of the standard 286 ## RE estimator? (see page 261) 287 288 Wstat <- W/seW 289 names(Wstat) <- "z" 290 pW <- 2*pnorm(abs(Wstat), lower.tail = FALSE) # unlike LM, test is two-tailed! 291 292 ## insert usual htest features 293 RVAL <- list(statistic = Wstat, 294 parameter = NULL, 295 method = paste("Wooldridge's test for unobserved", 296 effect, "effects"), 297 alternative = "unobserved effect", 298 p.value = pW, 299 data.name = paste(deparse(substitute(formula)))) 300 class(RVAL) <- "htest" 301 return(RVAL) 302} 303 304#' Wooldridge Test for AR(1) Errors in FE Panel Models 305#' 306#' Test of serial correlation for (the idiosyncratic component of) the errors 307#' in fixed--effects panel models. 308#' 309#' As \insertCite{WOOL:10;textual}{plm}, Sec. 10.5.4 observes, under 310#' the null of no serial correlation in the errors, the residuals of a 311#' FE model must be negatively serially correlated, with 312#' \eqn{cor(\hat{u}_{it}, \hat{u}_{is})=-1/(T-1)} for each 313#' \eqn{t,s}. He suggests basing a test for this null hypothesis on a 314#' pooled regression of FE residuals on their first lag: 315#' \eqn{\hat{u}_{i,t} = \alpha + \delta \hat{u}_{i,t-1} + 316#' \eta_{i,t}}. Rejecting the restriction \eqn{\delta = -1/(T-1)} 317#' makes us conclude against the original null of no serial 318#' correlation. 319#' 320#' `pwartest` estimates the `within` model and retrieves residuals, 321#' then estimates an AR(1) `pooling` model on them. The test statistic 322#' is obtained by applying a F test to the latter model to test the 323#' above restriction on \eqn{\delta}, setting the covariance matrix to 324#' `vcovHC` with the option `method="arellano"` to control for serial 325#' correlation. 326#' 327#' Unlike the [pbgtest()] and [pdwtest()], this test does 328#' not rely on large--T asymptotics and has therefore good properties in 329#' ``short'' panels. Furthermore, it is robust to general heteroskedasticity. 330#' 331#' @aliases pwartest 332#' @param x an object of class `formula` or of class `panelmodel`, 333#' @param data a `data.frame`, 334#' @param \dots further arguments to be passed on to `vcovHC` (see 335#' Details and Examples). 336#' @return An object of class `"htest"`. 337#' @export 338#' @author Giovanni Millo 339#' @seealso [pwfdtest()], [pdwtest()], [pbgtest()], [pbltest()], 340#' [pbsytest()]. 341#' @references 342#' 343#' \insertRef{WOOL:02}{plm} 344#' 345#' \insertRef{WOOL:10}{plm} 346#' 347#' @keywords htest 348#' @examples 349#' 350#' data("EmplUK", package = "plm") 351#' pwartest(log(emp) ~ log(wage) + log(capital), data = EmplUK) 352#' 353#' # pass argument 'type' to vcovHC used in test 354#' pwartest(log(emp) ~ log(wage) + log(capital), data = EmplUK, type = "HC3") 355#' 356#' 357pwartest <- function(x, ...) { 358 UseMethod("pwartest") 359} 360 361#' @rdname pwartest 362#' @export 363pwartest.formula <- function(x, data, ...) { 364 ## small-sample serial correlation test for FE models 365 ## ref.: Wooldridge (2002/2010) 10.5.4 366 367 cl <- match.call(expand.dots = TRUE) 368 if (is.null(cl$model)) cl$model <- "within" 369 if (cl$model != "within") stop("pwartest only relevant for within models") 370 if (names(cl)[3L] == "") names(cl)[3L] <- "data" 371 names(cl)[2L] <- "formula" 372 m <- match(plm.arg, names(cl), 0) 373 cl <- cl[c(1L, m)] 374 cl[[1L]] <- quote(plm) 375 plm.model <- eval(cl, parent.frame()) 376 pwartest(plm.model, ...) 377} 378 379#' @rdname pwartest 380#' @export 381pwartest.panelmodel <- function(x, ...) { 382 383 if (describe(x, "model") != "within") stop("pwartest only relevant for within models") 384 385 FEres <- x$residuals 386 data <- model.frame(x) 387 388 ## this is a bug fix for incorrect naming of the "data" attr. 389 ## for the pseries in pdata.frame() 390 391 attr(FEres, "data") <- NULL 392 N <- length(FEres) 393 FEres.1 <- c(NA, FEres[1:(N-1)]) 394 xindex <- unclass(attr(data, "index")) # unclass for speed 395 id <- xindex[[1L]] 396 time <- xindex[[2L]] 397 lagid <- as.numeric(id) - c(NA, as.numeric(id)[1:(N-1)]) 398 FEres.1[lagid != 0] <- NA 399 data <- data.frame(id, time, FEres = unclass(FEres), FEres.1 = unclass(FEres.1)) 400 names(data)[c(1L, 2L)] <- c("id", "time") 401 data <- na.omit(data) 402 403 # calc. auxiliary model 404 auxmod <- plm(FEres ~ FEres.1, data = data, model = "pooling", index = c("id", "time")) 405 406 ## calc. theoretical rho under H0: no serial corr. in errors 407 t. <- pdim(x)$nT$T 408 rho.H0 <- -1/(t.-1) 409 myH0 <- paste("FEres.1 = ", as.character(rho.H0), sep="") 410 411 ## test H0: rho=rho.H0 with HAC 412 myvcov <- function(x) vcovHC(x, method = "arellano", ...) # more params may be passed via ellipsis 413 414 # calc F stat with restriction rho.H0 and robust vcov 415 FEARstat <- ((coef(auxmod)["FEres.1"] - rho.H0)/sqrt(myvcov(auxmod)["FEres.1", "FEres.1"]))^2 416 names(FEARstat) <- "F" 417 df1 <- c("df1" = 1) 418 df2 <- c("df2" = df.residual(auxmod)) 419 pFEARstat <- pf(FEARstat, df1 = df1, df2 = df2, lower.tail = FALSE) 420 421 ## insert usual htest features 422 RVAL <- list(statistic = FEARstat, 423 parameter = c(df1, df2), 424 p.value = pFEARstat, 425 method = "Wooldridge's test for serial correlation in FE panels", 426 alternative = "serial correlation", 427 data.name = paste(deparse(substitute(x)))) 428 class(RVAL) <- "htest" 429 return(RVAL) 430} 431 432## Bera, Sosa-Escudero and Yoon type LM test for random effects 433## under serial correlation (H0: no random effects) or the inverse; 434## test="ar": serial corr. test robust vs. RE 435## test="re": RE test robust vs. serial corr. 436## test="j": joint test for serial corr. and random effects 437 438# Reference for the _balanced_ tests="ar"|"re": 439# Bera/Sosa-Escudero/Yoon (2001), Tests for the error component model in the presence of local misspecifcation, 440# Journal of Econometrics 101 (2001), pp. 1-23. 441# 442# for original (balanced) test="j": Baltagi/Li (1991), A joint test for serial correlation and random individual effects, 443# Statistics & Probability Letters 11 (1991), pp. 277-280. 444# 445# Reference for _un_balanced versions of all three tests (boil down to the balanced versions for balanced panels): 446# Sosa-Escudero/Bera (2008), Tests for unbalanced error-components models under local misspecification, 447# The Stata Journal (2008), Vol. 8, Number 1, pp. 68-78. 448# 449# Concise treatment of only _balanced_ tests in 450# Baltagi (2005), Econometric Analysis of Panel Data, 3rd edition, pp. 96-97 451# or Baltagi (2013), Econometric Analysis of Panel Data, 5th edition, pp. 108. 452# 453# 454## Implementation follows the formulae for unbalanced panels, which reduce for balanced data to the formulae for balanced panels. 455## 456## Notation in code largely follows Sosa-Escudero/Bera (2008) (m in Sosa-Escudero/Bera (2008) is total number of observations -> N_obs) 457## NB: Baltagi's book matrix A is slightly different defined: A in Baltagi is -A in Sosa-Escudera/Bera (2008) 458 459 460 461#' Bera, Sosa-Escudero and Yoon Locally--Robust Lagrange Multiplier 462#' Tests for Panel Models and Joint Test by Baltagi and Li 463#' 464#' Test for residual serial correlation (or individual random effects) 465#' locally robust vs. individual random effects (serial correlation) 466#' for panel models and joint test of serial correlation and the 467#' random effect specification by Baltagi and Li. 468#' 469#' These Lagrange multiplier tests are robust vs. local 470#' misspecification of the alternative hypothesis, i.e., they test the 471#' null of serially uncorrelated residuals against AR(1) residuals in 472#' a pooling model, allowing for local departures from the assumption 473#' of no random effects; or they test the null of no random effects 474#' allowing for local departures from the assumption of no serial 475#' correlation in residuals. They use only the residuals of the 476#' pooled OLS model and correct for local misspecification as outlined 477#' in \insertCite{BERA:SOSA:YOON:01;textual}{plm}. 478#' 479#' For `test = "re"`, the default (`re.normal = TRUE`) is to compute 480#' a one-sided test which is expected to lead to a more powerful test 481#' (asymptotically N(0,1) distributed). Setting `re.normal = FALSE` gives 482#' the two-sided test (asymptotically chi-squared(2) distributed). Argument 483#' `re.normal` is irrelevant for all other values of `test`. 484#' 485#' The joint test of serial correlation and the random effect 486#' specification (`test = "j"`) is due to 487#' \insertCite{BALT:LI:91;textual}{plm} (also mentioned in 488#' \insertCite{BALT:LI:95;textual}{plm}, pp. 135--136) and is added 489#' for convenience under this same function. 490#' 491#' The unbalanced version of all tests are derived in 492#' \insertCite{SOSA:BERA:08;textual}{plm}. The functions implemented 493#' are suitable for balanced as well as unbalanced panel data sets. 494#' 495#' A concise treatment of the statistics for only balanced panels is 496#' given in \insertCite{BALT:13;textual}{plm}, p. 108. 497#' 498#' Here is an overview of how the various values of the `test` 499#' argument relate to the literature: 500#' 501#' \itemize{ \item `test = "ar"`: \itemize{ \item \eqn{RS*_{\rho}} in Bera 502#' et al. (2001), p. 9 (balanced) \item \eqn{LM*_{\rho}} in Baltagi (2013), p. 503#' 108 (balanced) \item \eqn{RS*_{\lambda}} in Sosa-Escudero/Bera (2008), p. 73 504#' (unbalanced) } 505#' 506#' \item `test = "re", re.normal = TRUE` (default) (one-sided test, 507#' asymptotically N(0,1) distributed): \itemize{ \item \eqn{RSO*_{\mu}} in Bera 508#' et al. (2001), p. 11 (balanced) \item \eqn{RSO*_{\mu}} in Sosa-Escudero/Bera 509#' (2008), p. 75 (unbalanced) } 510#' 511#' \item `test = "re", re.normal = FALSE` (two-sided test, asymptotically 512#' chi-squared(2) distributed): \itemize{ \item \eqn{RS*_{\mu}} in Bera et al. 513#' (2001), p. 7 (balanced) \item \eqn{LM*_{\mu}} in Baltagi (2013), p. 108 514#' (balanced) \item \eqn{RS*_{\mu}} in Sosa-Escudero/Bera (2008), p. 73 515#' (unbalanced) } 516#' 517#' \item `test = "j"`: \itemize{ \item \eqn{RS_{\mu\rho}} in Bera et al. 518#' (2001), p. 10 (balanced) \item \eqn{LM} in Baltagi/Li (2001), p. 279 519#' (balanced) \item \eqn{LM_{1}} in Baltagi and Li (1995), pp. 135--136 520#' (balanced) \item \eqn{LM1} in Baltagi (2013), p. 108 (balanced) \item 521#' \eqn{RS_{\lambda\rho}} in Sosa-Escudero/Bera (2008), p. 74 (unbalanced) } } 522#' 523#' @aliases pbsytest 524#' @param x an object of class `formula` or of class `panelmodel`, 525#' @param data a `data.frame`, 526#' @param test a character string indicating which test to perform: 527#' first--order serial correlation (`"ar"`), random effects (`"re"`) 528#' or joint test for either of them (`"j"`), 529#' @param re.normal logical, only relevant for `test = "re"`: `TRUE` 530#' (default) computes the one-sided `"re"` test, `FALSE` the 531#' two-sided test (see also Details); not relevant for other values of 532#' `test` and, thus, should be `NULL`, 533#' @param \dots further arguments. 534#' @return An object of class `"htest"`. 535#' @export 536#' @author Giovanni Millo (initial implementation) & Kevin Tappe (extension to 537#' unbalanced panels) 538#' @seealso [plmtest()] for individual and/or time random effects 539#' tests based on a correctly specified model; [pbltest()], 540#' [pbgtest()] and [pdwtest()] for serial correlation tests 541#' in random effects models. 542#' @references 543#' 544#' \insertRef{BERA:SOSA:YOON:01}{plm} 545#' 546#' \insertRef{BALT:13}{plm} 547#' 548#' \insertRef{BALT:LI:91}{plm} 549#' 550#' \insertRef{BALT:LI:95}{plm} 551#' 552#' \insertRef{SOSA:BERA:08}{plm} 553#' 554#' @keywords htest 555#' 556#' @examples 557#' 558#' ## Bera et. al (2001), p. 13, table 1 use 559#' ## a subset of the original Grunfeld 560#' ## data which contains three errors -> construct this subset: 561#' data("Grunfeld", package = "plm") 562#' Grunsubset <- rbind(Grunfeld[1:80, ], Grunfeld[141:160, ]) 563#' Grunsubset[Grunsubset$firm == 2 & Grunsubset$year %in% c(1940, 1952), ][["inv"]] <- c(261.6, 645.2) 564#' Grunsubset[Grunsubset$firm == 2 & Grunsubset$year == 1946, ][["capital"]] <- 232.6 565#' 566#' ## default is AR testing (formula interface) 567#' pbsytest(inv ~ value + capital, data = Grunsubset, index = c("firm", "year")) 568#' pbsytest(inv ~ value + capital, data = Grunsubset, index = c("firm", "year"), test = "re") 569#' pbsytest(inv ~ value + capital, data = Grunsubset, index = c("firm", "year"), 570#' test = "re", re.normal = FALSE) 571#' pbsytest(inv ~ value + capital, data = Grunsubset, index = c("firm", "year"), test = "j") 572#' 573#' ## plm interface 574#' mod <- plm(inv ~ value + capital, data = Grunsubset, model = "pooling") 575#' pbsytest(mod) 576#' 577pbsytest <- function (x, ...) { 578 UseMethod("pbsytest") 579} 580 581#' @rdname pbsytest 582#' @export 583pbsytest.formula <- function(x, data, ..., test = c("ar", "re", "j"), re.normal = if (test == "re") TRUE else NULL) { 584 585 ######### from here generic testing interface from 586 ######### plm to my code 587 if (length(test) == 1L) test <- tolower(test) # for backward compatibility: allow upper case 588 test <- match.arg(test) 589 590 cl <- match.call(expand.dots = TRUE) 591 if (is.null(cl$model)) cl$model <- "pooling" 592 if (cl$model != "pooling") stop("pbsytest only relevant for pooling models") 593 names(cl)[2L] <- "formula" 594 if (names(cl)[3L] == "") names(cl)[3L] <- "data" 595 m <- match(plm.arg, names(cl), 0) 596 cl <- cl[c(1, m)] 597 cl[[1L]] <- as.name("plm") 598 plm.model <- eval(cl, parent.frame()) 599 pbsytest(plm.model, test = test, re.normal = re.normal, ...) 600} 601 602#' @rdname pbsytest 603#' @export 604pbsytest.panelmodel <- function(x, test = c("ar", "re", "j"), re.normal = if (test == "re") TRUE else NULL, ...) { 605 test <- match.arg(test) 606 if (describe(x, "model") != "pooling") stop("pbsytest only relevant for pooling models") 607 608 # interface check for argument re.normal 609 if (test != "re" && !is.null(re.normal)) { 610 stop("argument 're.normal' only relevant for test = \"re\", set re.normal = NULL for other tests")} 611 612 poolres <- x$residuals 613 data <- model.frame(x) 614 ## extract indices 615 index <- attr(data, "index") 616 iindex <- index[[1L]] 617 tindex <- index[[2L]] 618 619 620 ## till here. 621 ## ordering here if needed. 622 623 ## this needs ordering of obs. on time, regardless 624 ## whether before that on groups or after 625 626 ## and numerosity check 627 628 ## order by group, then time 629 oo <- order(iindex,tindex) 630 ind <- iindex[oo] 631 tind <- tindex[oo] 632 poolres <- poolres[oo] 633 pdim <- pdim(x) 634 n <- max(pdim$Tint$n) ## det. number of groups 635 T_i <- pdim$Tint$Ti 636 N_t <- pdim$Tint$nt 637 t <- max(T_i) ## det. max. group numerosity 638 N_obs <- pdim$nT$N ## det. total number of obs. (m in Sosa-Escudera/Bera (2008), p. 69) 639 640 ## calc. matrices A and B: 641 # Sosa-Escudera/Bera (2008), p. 74 642 # Baltagi (2013), p. 108 defines A=(S1/S2)-1 and, thus, has slightly different formulae [opposite sign in Baltagi] 643 S1 <- as.numeric(crossprod(tapply(poolres,ind,sum))) # == sum(tapply(poolres,ind,sum)^2) 644 S2 <- as.numeric(crossprod(poolres)) # == sum(poolres^2) 645 A <- 1 - S1/S2 646 647 unind <- unique(ind) 648 uu <- rep(NA, length(unind)) 649 uu1 <- rep(NA, length(unind)) 650 for(i in 1:length(unind)) { 651 u.t <- poolres[ind == unind[i]] 652 u.t.1 <- u.t[-length(u.t)] 653 u.t <- u.t[-1L] 654 uu[i] <- crossprod(u.t) 655 uu1[i] <- crossprod(u.t, u.t.1) 656 } 657 B <- sum(uu1)/sum(uu) 658 659 a <- as.numeric(crossprod(T_i)) # Sosa-Escudera/Bera (2008), p. 69 660 661 switch(test, 662 "ar" = { 663 # RS*_lambda from Sosa-Escudero/Bera (2008), p. 73 (unbalanced formula) 664 stat <- (B + (((N_obs - n)/(a - N_obs)) * A))^2 * (((a - N_obs)*N_obs^2) / ((N_obs - n)*(a - 3*N_obs + 2*n))) 665 df <- c(df = 1) 666 names(stat) <- "chisq" 667 pstat <- pchisq(stat, df = df, lower.tail = FALSE) 668 tname <- "Bera, Sosa-Escudero and Yoon locally robust test" 669 myH0_alt <- "AR(1) errors sub random effects" 670 }, 671 672 "re" = { 673 if(re.normal) { 674 # RSO*_mu from Sosa-Escudero/Bera (2008), p. 75 (unbalanced formula), normally distributed 675 stat <- -sqrt( (N_obs^2) / (2*(a - 3*N_obs + 2*n))) * (A + 2*B) 676 names(stat) <- "z" 677 df <- NULL 678 pstat <- pnorm(stat, lower.tail = FALSE) 679 tname <- "Bera, Sosa-Escudero and Yoon locally robust test (one-sided)" 680 myH0_alt <- "random effects sub AR(1) errors" 681 } else { 682 # RS*_mu from Sosa-Escudero/Bera (2008), p. 73 (unbalanced formula), chisq(1) 683 stat <- ((N_obs^2) * (A + 2*B)^2) / (2*(a - 3*N_obs + 2*n)) 684 names(stat) <- "chisq" 685 df <- c(df = 1) 686 pstat <- pchisq(stat, df = df, lower.tail = FALSE) 687 tname <- "Bera, Sosa-Escudero and Yoon locally robust test (two-sided)" 688 myH0_alt <- "random effects sub AR(1) errors" 689 } 690 }, 691 692 "j" = { 693 # RS_lambda_mu in Sosa-Escudero/Bera (2008), p. 74 (unbalanced formula) 694 stat <- N_obs^2 * ( ((A^2 + 4*A*B + 4*B^2) / (2*(a - 3*N_obs + 2*n))) + (B^2/(N_obs - n))) 695 # Degrees of freedom in the joint test (test="j") of Baltagi/Li (1991) are 2 (chisquare(2) distributed), 696 # see Baltagi/Li (1991), p. 279 and again in Baltagi/Li (1995), p. 136 697 df <- c(df = 2) 698 names(stat) <- "chisq" 699 pstat <- pchisq(stat, df = df, lower.tail = FALSE) 700 tname <- "Baltagi and Li AR-RE joint test" 701 myH0_alt <- "AR(1) errors or random effects" 702 } 703 ) # END switch 704 705 dname <- paste(deparse(substitute(formula))) 706 balanced.type <- if(pdim$balanced) "balanced" else "unbalanced" 707 tname <- paste(tname, "-", balanced.type, "panel", collapse = " ") 708 709 RVAL <- list(statistic = stat, 710 parameter = df, 711 method = tname, 712 alternative = myH0_alt, 713 p.value = pstat, 714 data.name = dname) 715 class(RVAL) <- "htest" 716 return(RVAL) 717} 718 719#' Durbin--Watson Test for Panel Models 720#' 721#' Test of serial correlation for (the idiosyncratic component of) the errors 722#' in panel models. 723#' 724#' This Durbin--Watson test uses the auxiliary model on 725#' (quasi-)demeaned data taken from a model of class `plm` which may 726#' be a `pooling` (the default), `random` or `within` model. It 727#' performs a Durbin--Watson test (using `dwtest` from package 728#' \CRANpkg{lmtest} on the residuals of the (quasi-)demeaned model, 729#' which should be serially uncorrelated under the null of no serial 730#' correlation in idiosyncratic errors. The function takes the 731#' demeaned data, estimates the model and calls `dwtest`. Thus, this 732#' test does not take the panel structure of the residuals into 733#' consideration; it shall not be confused with the generalized 734#' Durbin-Watson test for panels in `pbnftest`. 735#' 736#' @aliases pdwtest 737#' @importFrom lmtest dwtest 738#' @param x an object of class `"panelmodel"` or of class 739#' `"formula"`, 740#' @param data a `data.frame`, 741#' @param \dots further arguments to be passed on to `dwtest`, 742#' e.g., `alternative`, see [lmtest::dwtest()] for 743#' further details. 744#' @return An object of class `"htest"`. 745#' @export 746#' @author Giovanni Millo 747#' @seealso [lmtest::dwtest()] for the Durbin--Watson test 748#' in \CRANpkg{lmtest}, [pbgtest()] for the analogous 749#' Breusch--Godfrey test for panel models, 750#' [lmtest::bgtest()] for the Breusch--Godfrey test for 751#' serial correlation in the linear model. [pbltest()], 752#' [pbsytest()], [pwartest()] and 753#' [pwfdtest()] for other serial correlation tests for 754#' panel models. 755#' 756#' For the Durbin-Watson test generalized to panel data models see 757#' [pbnftest()]. 758#' @references 759#' 760#' \insertRef{DURB:WATS:50}{plm} 761#' 762#' \insertRef{DURB:WATS:51}{plm} 763#' 764#' \insertRef{DURB:WATS:71}{plm} 765#' 766#' \insertRef{WOOL:02}{plm} 767#' 768#' \insertRef{WOOL:10}{plm} 769#' 770#' @keywords htest 771#' @examples 772#' 773#' data("Grunfeld", package = "plm") 774#' g <- plm(inv ~ value + capital, data = Grunfeld, model="random") 775#' pdwtest(g) 776#' pdwtest(g, alternative="two.sided") 777#' ## formula interface 778#' pdwtest(inv ~ value + capital, data=Grunfeld, model="random") 779#' 780pdwtest <- function (x, ...) { 781 UseMethod("pdwtest") 782} 783 784#' @rdname pdwtest 785#' @export 786pdwtest.panelmodel <- function(x, ...) { 787 ## does not respect panel structure: 788 ## residual serial correlation test based on the residuals of the demeaned 789 ## model and passed on to lmtest::dwtest() for the original DW test 790 ## approach justified in Wooldridge (2002/2010), Econometric Analysis of Cross Section and Panel Data, p. 288/328. 791 ## 792 ## For the Bhargava et al. (1982) generalized DW test see pbnftest() 793 794 ## structure: 795 ## 1: take demeaned data from 'plm' object 796 ## 2: est. auxiliary model by OLS on demeaned data 797 ## 3: apply lmtest::dwtest() to auxiliary model and return the result 798 799 model <- describe(x, "model") 800 effect <- describe(x, "effect") 801 theta <- x$ercomp$theta 802 803 ## retrieve demeaned data 804 demX <- model.matrix(x, model = model, effect = effect, theta = theta, cstcovar.rm = "all") 805 demy <- pmodel.response(model.frame(x), model = model, effect = effect, theta = theta) 806 807 ## lmtest::dwtest on the demeaned model: 808 809 ## ARtest is the return value of lmtest::dwtest, exception made for the method attribute 810 dots <- list(...) 811 order.by <- if(is.null(dots$order.by)) NULL else dots$order.by 812 alternative <- if(is.null(dots$alternative)) "greater" else dots$alternative 813 iterations <- if(is.null(dots$iterations)) 15 else dots$iterations 814 exact <- if(is.null(dots$exact)) NULL else dots$exact 815 tol <- if(is.null(dots$tol)) 1e-10 else dots$tol 816 817 demy <- remove_pseries_features(demy) # needed as lmtest::dwtest cannot cope with pseries 818 819 auxformula <- demy ~ demX - 1 820 lm.mod <- lm(auxformula) 821 822 ARtest <- dwtest(lm.mod, order.by = order.by, 823 alternative = alternative, 824 iterations = iterations, exact = exact, tol = tol) 825 826 # overwrite elements of the values produced by lmtest::dwtest 827 ARtest$method <- "Durbin-Watson test for serial correlation in panel models" 828 ARtest$alternative <- "serial correlation in idiosyncratic errors" 829 ARtest$data.name <- data.name(x) 830 return(ARtest) 831} 832 833#' @rdname pdwtest 834#' @export 835pdwtest.formula <- function(x, data, ...) { 836 ## formula method for pdwtest; 837 ## defaults to pooling model 838 839 cl <- match.call(expand.dots = TRUE) 840 if (is.null(cl$model)) cl$model <- "pooling" 841 names(cl)[2L] <- "formula" 842 if (names(cl)[3L] == "") names(cl)[3L] <- "data" 843 m <- match(plm.arg, names(cl), 0) 844 cl <- cl[c(1L, m)] 845 cl[[1L]] <- quote(plm) 846 plm.model <- eval(cl, parent.frame()) 847 pdwtest(plm.model, ...) 848} 849 850 851 852## references: 853## * balanced and consecutive: 854## Bhargava/Franzini/Narendranathan (1982), Serial Correlation and the Fixed Effects Model, Review of Economic Studies (1982), XLIX(4), pp. 533-549. 855## (also in Baltagi (2005/2013), p. 98-99/109-110 for FE application) 856## * unbalanced and/or non-consecutive: modified BNF statistic and LBI statistic 857## Baltagi/Wu (1999), Unequally spaced panel data regressions with AR(1) disturbances. Econometric Theory, 15(6), pp. 814-823. 858## (an example is also in Baltagi (2005/2013), p. 90/101) 859 860 861 862#' Modified BNF--Durbin--Watson Test and Baltagi--Wu's LBI Test for Panel 863#' Models 864#' 865#' Tests for AR(1) disturbances in panel models. 866#' 867#' The default, `test = "bnf"`, gives the (modified) BNF statistic, 868#' the generalised Durbin-Watson statistic for panels. For balanced 869#' and consecutive panels, the reference is 870#' Bhargava/Franzini/Narendranathan (1982). The modified BNF is given 871#' for unbalanced and/or non-consecutive panels (d1 in formula 16 of 872#' \insertCite{BALT:WU:99;textual}{plm}). 873#' 874#' `test = "lbi"` yields Baltagi--Wu's LBI statistic 875#' \insertCite{BALT:WU:99}{plm}, the locally best invariant test which 876#' is based on the modified BNF statistic. 877#' 878#' No specific variants of these tests are available for random effect models. 879#' As the within estimator is consistent also under the random effects 880#' assumptions, the test for random effect models is performed by taking the 881#' within residuals. 882#' 883#' No p-values are given for the statistics as their distribution is 884#' quite difficult. \insertCite{BHAR:FRAN:NARE:82;textual}{plm} supply 885#' tabulated bounds for p = 0.05 for the balanced case and consecutive 886#' case. 887#' 888#' For large N, \insertCite{BHAR:FRAN:NARE:82}{plm} suggest it is 889#' sufficient to check whether the BNF statistic is < 2 to test 890#' against positive serial correlation. 891#' 892#' @aliases pbnftest 893#' @param x an object of class `"panelmodel"` or of class `"formula"`, 894#' @param test a character indicating the test to be performed, either 895#' `"bnf"` or `"lbi"` for the (modified) BNF statistic or 896#' Baltagi--Wu's LBI statistic, respectively, 897#' @param data a `data.frame` (only relevant for formula interface), 898#' @param model a character indicating on which type of model the test 899#' shall be performed (`"pooling"`, `"within"`, `"random"`, only 900#' relevant for formula interface), 901#' @param \dots only relevant for formula interface: further arguments 902#' to specify the model to test (arguments passed on to plm()), 903#' e.g., `effect`. 904#' @return An object of class `"htest"`. 905#' @export 906#' @author Kevin Tappe 907#' @seealso [pdwtest()] for the original Durbin--Watson test using 908#' (quasi-)demeaned residuals of the panel model without taking 909#' the panel structure into account. [pbltest()], [pbsytest()], 910#' [pwartest()] and [pwfdtest()] for other serial correlation 911#' tests for panel models. 912#' @references 913#' 914#' \insertRef{BALT:13}{plm} 915#' 916#' \insertRef{BALT:WU:99}{plm} 917#' 918#' \insertRef{BHAR:FRAN:NARE:82}{plm} 919#' 920#' @keywords htest 921#' @examples 922#' 923#' data("Grunfeld", package = "plm") 924#' 925#' # formula interface, replicate Baltagi/Wu (1999), table 1, test case A: 926#' data_A <- Grunfeld[!Grunfeld[["year"]] %in% c("1943", "1944"), ] 927#' pbnftest(inv ~ value + capital, data = data_A, model = "within") 928#' pbnftest(inv ~ value + capital, data = data_A, test = "lbi", model = "within") 929#' 930#' # replicate Baltagi (2013), p. 101, table 5.1: 931#' re <- plm(inv ~ value + capital, data = Grunfeld, model = "random") 932#' pbnftest(re) 933#' pbnftest(re, test = "lbi") 934#' 935pbnftest <- function (x, ...) { 936 UseMethod("pbnftest") 937} 938 939#' @rdname pbnftest 940#' @export 941pbnftest.panelmodel <- function(x, test = c("bnf", "lbi"), ...) { 942 943 test <- match.arg(test) 944 945 # no test for random effects available: take FE as also consistent (Verbeek (2004, 2nd edition), p. 358) 946 model <- describe(x, "model") 947 if (model == "random") x <- update(x, model = "within") 948 949 consec <- all(is.pconsecutive(x)) 950 balanced <- is.pbalanced(x) 951 952 # residuals are now class pseries, so diff.pseries is used and the 953 # differences are computed within observational units (not across as 954 # it would be the case if base::diff() is used and as it is done for 955 # lm-objects) NAs are introduced by the differencing as one 956 # observation is lost per observational unit 957 if (!inherits(residuals(x), "pseries")) stop("pdwtest internal error: residuals are not of class \"pseries\"") # check to be safe: need pseries 958 959 ind <- unclass(index(x))[[1L]] # unclass for speed 960 obs1 <- !duplicated(ind) # first ob of each individual 961 obsn <- !duplicated(ind, fromLast = TRUE) # last ob of each individual 962 963 #### d1, d2, d3, d4 as in Baltagi/Wu (1999), p. 819 formula (16) 964 res_crossprod <- as.numeric(crossprod(residuals(x))) # denominator 965 966 ## d1 consists of two parts: 967 ## d1.1: BNF statistic (sum of squared differenced residuals of consecutive time periods per individual) 968 ## d1.2: sum of squared "later" residuals (not differenced) surrounded by gaps in time periods 969 ## typo in Baltagi/Wu (1999) for d1: index j starts at j = 2, not j = 1 970 res_diff <- diff(residuals(x), shift = "time") 971 d1.1 <- sum(res_diff^2, na.rm = T) / res_crossprod # == BNF (1982), formula (4) 972 d1.2_contrib <- as.logical(is.na(res_diff) - obs1) 973 d1.2 <- as.numeric(crossprod(residuals(x)[d1.2_contrib])) / res_crossprod 974 d1 <- d1.1 + d1.2 # == modified BNF statistic = d1 in Baltagi/Wu (1999) formula (16) 975 # [reduces to original BNF in case of balanced and consecutive data (d1.2 is zero)] 976 977 if (test == "bnf") { 978 stat <- d1 979 names(stat) <- "DW" 980 method <- "Bhargava/Franzini/Narendranathan Panel Durbin-Watson Test" 981 if (!consec || !balanced) method <- paste0("modified ", method) 982 } 983 984 if (test == "lbi") { 985 ## d2 contains the "earlier" obs surrounded by gaps in time periods 986 d2_contrib <- as.logical(is.na(lead(residuals(x), shift = "time")) - obsn) 987 d2 <- as.numeric(crossprod(residuals(x)[d2_contrib])) / res_crossprod 988 989 ## d3, d4: sum squared residual of first/last time period for all individuals / crossprod(residuals) 990 d3 <- as.numeric(crossprod(residuals(x)[obs1])) / res_crossprod 991 d4 <- as.numeric(crossprod(residuals(x)[obsn])) / res_crossprod 992 993 stat <- d1 + d2 + d3 + d4 994 names(stat) <- "LBI" 995 method <- "Baltagi/Wu LBI Test for Serial Correlation in Panel Models" 996 } 997 998 result <- list(statistic = stat, 999 # p.value = NA, # none 1000 method = method, 1001 alternative = "serial correlation in idiosyncratic errors", 1002 data.name = data.name(x)) 1003 class(result) <- "htest" 1004 return(result) 1005} 1006 1007#' @rdname pbnftest 1008#' @export 1009pbnftest.formula <- function(x, data, test = c("bnf", "lbi"), model = c("pooling", "within", "random"), ...) { 1010 ## formula method for pdwtest; 1011 ## defaults to pooling model 1012 1013 test <- match.arg(test) 1014 model <- match.arg(model) 1015 1016 cl <- match.call(expand.dots = TRUE) 1017 if (is.null(model)) model <- "pooling" 1018 names(cl)[2L] <- "formula" 1019 if (names(cl)[3L] == "") names(cl)[3L] <- "data" 1020 m <- match(plm.arg, names(cl), 0) 1021 cl <- cl[c(1L, m)] 1022 cl[[1L]] <- quote(plm) 1023 plm.model <- eval(cl, parent.frame()) 1024 pbnftest(plm.model, test = test) 1025} 1026 1027######### Baltagi and Li's LM_rho|mu ######## 1028## ex Baltagi and Li (1995) Testing AR(1) against MA(1)..., 1029## JE 68, 133-151, test statistic (one-sided) is LM_4; 1030## see also idem (1997), Monte Carlo results..., 1031## Annales d'Econometrie et Statistique 48, formula (8) 1032 1033## from version 2: disposes of Kronecker products, 1034## thus much faster and feasible on large NT (original 1035## is already infeasible for NT>3000, this takes 10'' 1036## on N=3000, T=10 and even 20000x10 (55'') is no problem; 1037## lme() hits the memory limit at ca. 20000x20) 1038 1039#' Baltagi and Li Serial Dependence Test For Random Effects Models 1040#' 1041#' \insertCite{BALT:LI:95;textual}{plm}'s Lagrange multiplier test for 1042#' AR(1) or MA(1) idiosyncratic errors in panel models with random 1043#' effects. 1044#' 1045#' This is a Lagrange multiplier test for the null of no serial 1046#' correlation, against the alternative of either an AR(1) or a MA(1) 1047#' process, in the idiosyncratic component of the error term in a 1048#' random effects panel model (as the analytical expression of the 1049#' test turns out to be the same under both alternatives, 1050#' \insertCite{@see @BALT:LI:95 and @BALT:LI:97}{plm}. The 1051#' `alternative` argument, defaulting to `twosided`, allows testing 1052#' for positive serial correlation only, if set to `onesided`. 1053#' 1054#' @aliases pbltest 1055#' @importFrom nlme lme 1056#' @param x a model formula or an estimated random--effects model of 1057#' class `plm` , 1058#' @param data for the formula interface only: a `data.frame`, 1059#' @param alternative one of `"twosided"`, 1060#' `"onesided"`. Selects either \eqn{H_A: \rho \neq 0} or 1061#' \eqn{H_A: \rho = 0} (i.e., the Normal or the Chi-squared 1062#' version of the test), 1063#' @param index the index of the `data.frame`, 1064#' @param \dots further arguments. 1065#' @return An object of class `"htest"`. 1066#' @export 1067#' @author Giovanni Millo 1068#' @seealso [pdwtest()], [pbnftest()], [pbgtest()], 1069#' [pbsytest()], [pwartest()] and 1070#' [pwfdtest()] for other serial correlation tests for 1071#' panel models. 1072#' @references 1073#' 1074#' \insertRef{BALT:LI:95}{plm} 1075#' 1076#' \insertRef{BALT:LI:97}{plm} 1077#' 1078#' @keywords htest 1079#' @examples 1080#' 1081#' data("Grunfeld", package = "plm") 1082#' 1083#' # formula interface 1084#' pbltest(inv ~ value + capital, data = Grunfeld) 1085#' 1086#' # plm interface 1087#' re_mod <- plm(inv ~ value + capital, data = Grunfeld, model = "random") 1088#' pbltest(re_mod) 1089#' pbltest(re_mod, alternative = "onesided") 1090#' 1091pbltest <- function (x, ...) 1092{ 1093 UseMethod("pbltest") 1094} 1095 1096 1097#' @rdname pbltest 1098#' @export 1099pbltest.formula <- function(x, data, alternative = c("twosided", "onesided"), index = NULL, ...) { 1100 ## this version (pbltest0) based on a "formula, pdataframe" interface 1101 1102 1103 ## reduce X to model matrix value (no NAs) 1104 X <- model.matrix(x, data = data) 1105 ## reduce data accordingly 1106 data <- data[which(row.names(data) %in% row.names(X)), ] 1107 if (! inherits(data, "pdata.frame")) 1108 data <- pdata.frame(data, index = index) 1109 1110 ## need name of individual index 1111 gindex <- dimnames(attr(data, "index"))[[2L]][1L] 1112 1113 ## make random effects formula 1114 rformula <- NULL 1115 eval(parse(text = paste("rformula <- ~1|", gindex, sep = ""))) 1116 1117 ## est. MLE model 1118 mymod <- lme(x, data = data, random = rformula, method = "ML") 1119 1120 nt. <- mymod$dims$N 1121 n. <- as.numeric(mymod$dims$ngrps[1]) 1122 t. <- nt./n. 1123 Jt <- matrix(1, ncol = t., nrow = t.)/t. 1124 Et <- diag(1, t.) - Jt 1125 ## make 'bidiagonal' matrix (see BL, p.136) 1126 G <- matrix(0, ncol = t., nrow = t.) 1127 for(i in 2:t.) { 1128 G[i-1, i] <- 1 1129 G[i, i-1] <- 1 1130 } 1131 1132 ## retrieve composite (=lowest level) residuals 1133 uhat <- residuals(mymod, level = 0) 1134 1135 ## sigma2.e and sigma2.1 as in BL 1136 ## break up residuals by group to get rid of Kronecker prod. 1137 ## data have to be balanced and sorted by group/time, so this works 1138 uhat.i <- vector("list", n.) 1139 for(i in 1:n.) { 1140 uhat.i[[i]] <- uhat[t.*(i-1)+1:t.] 1141 } 1142 s2e <- rep(NA, n.) 1143 s21 <- rep(NA, n.) 1144 for(i in 1:n.) { 1145 u.i <- uhat.i[[i]] 1146 s2e[i] <- as.numeric(crossprod(u.i, Et) %*% u.i) 1147 s21[i] <- as.numeric(crossprod(u.i, Jt) %*% u.i) 1148 } 1149 sigma2.e <- sum(s2e) / (n.*(t.-1)) 1150 sigma2.1 <- sum(s21) / n. 1151 1152 ## calc. score under the null: 1153 star1 <- (Jt/sigma2.1 + Et/sigma2.e) %*% G %*% (Jt/sigma2.1 + Et/sigma2.e) 1154 star2 <- rep(NA, n.) 1155 ## again, do this group by group to avoid Kronecker prod. 1156 for(i in 1:n.) { 1157 star2[i] <- as.numeric(crossprod(uhat.i[[i]], star1) %*% uhat.i[[i]]) 1158 } 1159 star2 <- sum(star2) 1160 Drho <- (n.*(t.-1)/t.) * (sigma2.1-sigma2.e)/sigma2.1 + sigma2.e/2 * star2 1161 ## star2 is (crossprod(uhat, kronecker(In, star1)) %*% uhat) 1162 1163 ## components for the information matrix 1164 a <- (sigma2.e - sigma2.1)/(t.*sigma2.1) 1165 j.rr <- n. * (2 * a^2 * (t.-1)^2 + 2*a*(2*t.-3) + (t.-1)) 1166 j.12 <- n.*(t.-1)*sigma2.e / sigma2.1^2 1167 j.13 <- n.*(t.-1)/t. * sigma2.e * (1/sigma2.1^2 - 1/sigma2.e^2) 1168 j.22 <- (n. * t.^2) / (2 * sigma2.1^2) 1169 j.23 <- (n. * t.) / (2 * sigma2.1^2) 1170 j.33 <- (n./2) * (1/sigma2.1^2 + (t.-1)/sigma2.e^2) 1171 1172 ## build up information matrix 1173 Jmat <- matrix(nrow = 3L, ncol = 3L) 1174 Jmat[1L, ] <- c(j.rr, j.12, j.13) 1175 Jmat[2L, ] <- c(j.12, j.22, j.23) 1176 Jmat[3L, ] <- c(j.13, j.23, j.33) 1177 1178 J11 <- n.^2 * t.^2 * (t.-1) / (det(Jmat) * 4*sigma2.1^2 * sigma2.e^2) 1179 ## this is the same as J11 <- solve(Jmat)[1,1], see BL page 73 1180 1181 switch(match.arg(alternative), 1182 "onesided" = { 1183 LMr.m <- Drho * sqrt(J11) 1184 pval <- pnorm(LMr.m, lower.tail = FALSE) 1185 names(LMr.m) <- "z" 1186 method1 <- "one-sided" 1187 method2 <- "H0: rho = 0, HA: rho > 0" 1188 parameter <- NULL 1189 }, 1190 "twosided" = { 1191 LMr.m <- Drho^2 * J11 1192 pval <- pchisq(LMr.m, df = 1, lower.tail = FALSE) 1193 names(LMr.m) <- "chisq" 1194 parameter <- c(df = 1) 1195 method1 <- "two-sided" 1196 method2 <- "H0: rho = 0, HA: rho != 0" 1197 } 1198 ) 1199 dname <- paste(deparse(substitute(x))) 1200 method <- paste("Baltagi and Li", method1, "LM test") 1201 alternative <- "AR(1)/MA(1) errors in RE panel model" 1202 1203 res <- list(statistic = LMr.m, 1204 p.value = pval, 1205 method = method, 1206 alternative = alternative, 1207 parameter = parameter, 1208 data.name = dname) 1209 1210 class(res) <- "htest" 1211 res 1212} 1213 1214#' @rdname pbltest 1215#' @export 1216pbltest.plm <- function(x, alternative = c("twosided", "onesided"), ...) { 1217 # only continue if random effects model 1218 if (describe(x, "model") != "random") stop("Test is only for random effects models.") 1219 1220 # call pbltest.formula the right way 1221 pbltest.formula(formula(x$formula), data=cbind(index(x), x$model), 1222 index=names(index(x)), alternative = alternative, ...) 1223} 1224 1225#' Wooldridge first--difference--based test for AR(1) errors in levels 1226#' or first--differenced panel models 1227#' 1228#' First--differencing--based test of serial correlation for (the idiosyncratic 1229#' component of) the errors in either levels or first--differenced panel 1230#' models. 1231#' 1232#' As \insertCite{WOOL:10;textual}{plm}, Sec. 10.6.3 observes, if the 1233#' idiosyncratic errors in the model in levels are uncorrelated (which 1234#' we label hypothesis `"fe"`), then the errors of the model in first 1235#' differences (FD) must be serially correlated with 1236#' \eqn{cor(\hat{e}_{it}, \hat{e}_{is}) = -0.5} for each \eqn{t,s}. If 1237#' on the contrary the levels model's errors are a random walk, then 1238#' there must be no serial correlation in the FD errors (hypothesis 1239#' `"fd"`). Both the fixed effects (FE) and the first--differenced 1240#' (FD) estimators remain consistent under either assumption, but the 1241#' relative efficiency changes: FE is more efficient under `"fe"`, FD 1242#' under `"fd"`. 1243#' 1244#' Wooldridge (ibid.) suggests basing a test for either hypothesis on 1245#' a pooled regression of FD residuals on their first lag: 1246#' \eqn{\hat{e}_{i,t}=\alpha + \rho \hat{e}_{i,t-1} + 1247#' \eta_{i,t}}. Rejecting the restriction \eqn{\rho = -0.5} makes us 1248#' conclude against the null of no serial correlation in errors of the 1249#' levels equation (`"fe"`). The null hypothesis of no serial 1250#' correlation in differenced errors (`"fd"`) is tested in a similar 1251#' way, but based on the zero restriction on \eqn{\rho} (\eqn{\rho = 1252#' 0}). Rejecting `"fe"` favours the use of the first--differences 1253#' estimator and the contrary, although it is possible that both be 1254#' rejected. 1255#' 1256#' `pwfdtest` estimates the `fd` model (or takes an `fd` model as 1257#' input for the panelmodel interface) and retrieves its residuals, 1258#' then estimates an AR(1) `pooling` model on them. The test statistic 1259#' is obtained by applying a F test to the latter model to test the 1260#' relevant restriction on \eqn{\rho}, setting the covariance matrix 1261#' to `vcovHC` with the option `method="arellano"` to control for 1262#' serial correlation. 1263#' 1264#' Unlike the `pbgtest` and `pdwtest`, this test does not rely on 1265#' large--T asymptotics and has therefore good properties in ''short'' 1266#' panels. Furthermore, it is robust to general 1267#' heteroskedasticity. The `"fe"` version can be used to test for 1268#' error autocorrelation regardless of whether the maintained 1269#' specification has fixed or random effects 1270#' \insertCite{@see @DRUK:03}{plm}. 1271#' 1272#' @aliases pwfdtest 1273#' @param x an object of class `formula` or a `"fd"`-model (plm 1274#' object), 1275#' @param data a `data.frame`, 1276#' @param h0 the null hypothesis: one of `"fd"`, `"fe"`, 1277#' @param \dots further arguments to be passed on to `vcovHC` (see Details 1278#' and Examples). 1279#' @return An object of class `"htest"`. 1280#' @export 1281#' @author Giovanni Millo 1282#' @seealso `pdwtest`, `pbgtest`, `pwartest`, 1283#' @references 1284#' 1285#' \insertRef{DRUK:03}{plm} 1286#' 1287#' \insertRef{WOOL:02}{plm} 1288#' Sec. 10.6.3, pp. 282--283. 1289#' 1290#' \insertRef{WOOL:10}{plm} 1291#' Sec. 10.6.3, pp. 319--320 1292#' 1293#' @keywords htest 1294#' @examples 1295#' 1296#' data("EmplUK" , package = "plm") 1297#' pwfdtest(log(emp) ~ log(wage) + log(capital), data = EmplUK) 1298#' pwfdtest(log(emp) ~ log(wage) + log(capital), data = EmplUK, h0 = "fe") 1299#' 1300#' # pass argument 'type' to vcovHC used in test 1301#' pwfdtest(log(emp) ~ log(wage) + log(capital), data = EmplUK, type = "HC3", h0 = "fe") 1302#' 1303#' 1304#' # same with panelmodel interface 1305#' mod <- plm(log(emp) ~ log(wage) + log(capital), data = EmplUK, model = "fd") 1306#' pwfdtest(mod) 1307#' pwfdtest(mod, h0 = "fe") 1308#' pwfdtest(mod, type = "HC3", h0 = "fe") 1309#' 1310#' 1311pwfdtest <- function(x, ...) { 1312 UseMethod("pwfdtest") 1313} 1314 1315#' @rdname pwfdtest 1316#' @export 1317pwfdtest.formula <- function(x, data, ..., h0 = c("fd", "fe")) { 1318 cl <- match.call(expand.dots = TRUE) 1319 if (is.null(cl$model)) cl$model <- "fd" 1320 names(cl)[2L] <- "formula" 1321 if (names(cl)[3L] == "") names(cl)[3L] <- "data" 1322 m <- match(plm.arg, names(cl), 0) 1323 cl <- cl[c(1L, m)] 1324 cl[[1L]] <- quote(plm) 1325 plm.model <- eval(cl, parent.frame()) 1326 pwfdtest(plm.model, ..., h0 = h0) 1327} 1328 1329#' @rdname pwfdtest 1330#' @export 1331pwfdtest.panelmodel <- function(x, ..., h0 = c("fd", "fe")) { 1332 ## first-difference-based serial correlation test for panel models 1333 ## ref.: Wooldridge (2002/2010), par. 10.6.3 1334 1335 # interface check 1336 model <- describe(x, "model") 1337 if (model != "fd") stop(paste0("input 'x' needs to be a \"fd\" model (first-differenced model), but is \"", model, "\"")) 1338 1339 ## fetch fd residuals 1340 FDres <- x$residuals 1341 ## indices (full length! must reduce by 1st time period) 1342 ## this is an ad-hoc solution for the fact that the 'fd' model 1343 ## carries on the full indices while losing the first time period 1344 xindex <- unclass(attr(model.frame(x), "index")) # unclass for speed 1345 time <- as.numeric(xindex[[2L]]) 1346 id <- as.numeric(xindex[[1L]]) 1347 1348 ## fetch dimensions and adapt to those of indices 1349 pdim <- pdim(x) 1350 n <- pdim$nT$n 1351 Ti_minus_one <- pdim$Tint$Ti-1 1352 1353 ## generate new individual index: drop one observation per individual 1354 ## NB: This is based on the assumption that the estimated FD model performs 1355 ## its diff-ing row-wise (it currently does so). If the diff-ing for FD 1356 ## is changed to diff-ing based on time dimension, this part about index 1357 ## creation needs to be re-worked because more than 1 observation per 1358 ## individual can be dropped 1359 red_id <- integer() 1360 for(i in 1:n) { 1361 red_id <- c(red_id, rep(i, Ti_minus_one[i])) 1362 } 1363 # additional check 1364 # (but should error earlier already as the FD model should be nonestimable) 1365 if(length(red_id) == 0L) 1366 stop("only individuals with one observation in original data: test not feasible") 1367 1368 # make pdata.frame for auxiliary regression: time dimension is not relevant 1369 # as the first observation of each individual was dropped -> let time dimension 1370 # be created (is not related to the original times anymore) 1371 auxdata <- pdata.frame(as.data.frame(cbind(red_id, FDres)), index = "red_id") 1372 1373 # lag residuals by row (as the FD model diffs by row) 1374 # NB: need to consider change to shift = "time" if behaviour of FD model is changed 1375 auxdata[["FDres.1"]] <- lag(auxdata[["FDres"]], shift = "row") 1376 1377 ## pooling model FDres vs. lag(FDres), with intercept (might as well do it w.o.) 1378 auxmod <- plm(FDres ~ FDres.1, data = auxdata, model = "pooling") 1379 1380 switch(match.arg(h0), 1381 "fd" = {h0des <- "differenced" 1382 ## theoretical rho under H0: no serial 1383 ## corr. in differenced errors is 0 1384 rho.H0 <- 0}, 1385 1386 "fe" = {h0des <- "original" 1387 ## theoretical rho under H0: no serial 1388 ## corr. in original errors is -0.5 1389 rho.H0 <- -0.5}) 1390 1391 myH0 <- paste("FDres.1 = ", as.character(rho.H0), sep="") 1392 1393 ## test H0: rho=rho.H0 with HAC, more params may be passed via ellipsis 1394 myvcov <- function(x) vcovHC(x, method = "arellano", ...) 1395 1396 # calc F stat with restriction rho.H0 and robust vcov 1397 FDARstat <- ((coef(auxmod)["FDres.1"] - rho.H0)/sqrt(myvcov(auxmod)["FDres.1", "FDres.1"]))^2 1398 names(FDARstat) <- "F" 1399 df1 <- c(df1 = 1) 1400 df2 <- c(df2 = df.residual(auxmod)) 1401 pFDARstat <- pf(FDARstat, df1 = df1, df2 = df2, lower.tail = FALSE) 1402 1403 ## insert usual htest features 1404 RVAL <- list(statistic = FDARstat, 1405 parameter = c(df1, df2), 1406 p.value = pFDARstat, 1407 method = "Wooldridge's first-difference test for serial correlation in panels", 1408 alternative = paste("serial correlation in", h0des, "errors"), 1409 data.name = paste(deparse(substitute(x)))) 1410 class(RVAL) <- "htest" 1411 return(RVAL) 1412} 1413