1
2#' Breusch--Godfrey Test for Panel Models
3#'
4#' Test of serial correlation for (the idiosyncratic component of) the
5#' errors in panel models.
6#'
7#' This Lagrange multiplier test uses the auxiliary model on
8#' (quasi-)demeaned data taken from a model of class `plm` which may
9#' be a `pooling` (default for formula interface), `random` or
10#' `within` model. It performs a Breusch--Godfrey test (using `bgtest`
11#' from package \CRANpkg{lmtest} on the residuals of the
12#' (quasi-)demeaned model, which should be serially uncorrelated under
13#' the null of no serial correlation in idiosyncratic errors, as
14#' illustrated in \insertCite{WOOL:10;textual}{plm}. The function
15#' takes the demeaned data, estimates the model and calls `bgtest`.
16#'
17#' Unlike most other tests for serial correlation in panels, this one
18#' allows to choose the order of correlation to test for.
19#'
20#' @aliases pbgtest
21#' @importFrom lmtest bgtest
22#' @param x an object of class `"panelmodel"` or of class `"formula"`,
23#' @param order an integer indicating the order of serial correlation
24#'     to be tested for. `NULL` (default) uses the minimum number of
25#'     observations over the time dimension (see also section
26#'     **Details** below),
27#' @param type type of test statistic to be calculated; either
28#'     `"Chisq"` (default) for the Chi-squared test statistic or `"F"`
29#'     for the F test statistic,
30#' @param data only relevant for formula interface: data set for which
31#'     the respective panel model (see `model`) is to be evaluated,
32#' @param model only relevant for formula interface: compute test
33#'     statistic for model `pooling` (default), `random`, or `within`.
34#'     When `model` is used, the `data` argument needs to be passed as
35#'     well,
36#' @param \dots further arguments (see [lmtest::bgtest()]).
37#' @return An object of class `"htest"`.
38#' @note The argument `order` defaults to the minimum number of
39#'     observations over the time dimension, while for
40#'     `lmtest::bgtest` it defaults to `1`.
41#' @export
42#' @author Giovanni Millo
43#' @seealso For the original test in package \CRANpkg{lmtest} see
44#'     [lmtest::bgtest()].  See [pdwtest()] for the analogous
45#'     panel Durbin--Watson test.  See [pbltest()], [pbsytest()],
46#'     [pwartest()] and [pwfdtest()] for other serial correlation
47#'     tests for panel models.
48#' @references
49#'
50#' \insertRef{BREU:78}{plm}
51#'
52#' \insertRef{GODF:78}{plm}
53#'
54#' \insertRef{WOOL:02}{plm}
55#'
56#' \insertRef{WOOL:10}{plm}
57#'
58#' \insertRef{WOOL:13}{plm}
59#'  Sec. 12.2, pp. 421--422.
60#' @keywords htest
61#' @examples
62#'
63#' data("Grunfeld", package = "plm")
64#' g <- plm(inv ~ value + capital, data = Grunfeld, model = "random")
65#'
66#' # panelmodel interface
67#' pbgtest(g)
68#' pbgtest(g, order = 4)
69#'
70#' # formula interface
71#' pbgtest(inv ~ value + capital, data = Grunfeld, model = "random")
72#'
73#' # F test statistic (instead of default type="Chisq")
74#' pbgtest(g, type="F")
75#' pbgtest(inv ~ value + capital, data = Grunfeld, model = "random", type = "F")
76#'
77pbgtest <- function (x, ...) {
78    UseMethod("pbgtest")
79}
80
81#' @rdname pbgtest
82#' @export
83pbgtest.panelmodel <- function(x, order = NULL, type = c("Chisq", "F"), ...) {
84    ## residual serial correlation test based on the residuals of the demeaned
85    ## model (see Wooldridge (2002), p. 288) and the regular lmtest::bgtest()
86
87    ## structure:
88    ## 1: take demeaned data from 'plm' object
89    ## 2: est. auxiliary model by OLS on demeaned data
90    ## 3: apply lmtest::bgtest() to auxiliary model and return the result
91
92    model <- describe(x, "model")
93    effect <- describe(x, "effect")
94    theta <- x$ercomp$theta
95
96    ## retrieve demeaned data
97    demX <- model.matrix(x, model = model, effect = effect, theta = theta, cstcovar.rm = "all")
98    demy <- pmodel.response(model.frame(x), model = model, effect = effect, theta = theta)
99    ## ...and group numerosities
100    Ti <- pdim(x)$Tint$Ti
101    ## set lag order to minimum group numerosity if not specified by user
102    ## (check whether this is sensible)
103    if(is.null(order)) order <- min(Ti)
104
105    ## lmtest::bgtest on the demeaned model:
106
107    ## pbgtest is the return value of lmtest::bgtest, exception made for the method attribute
108    auxformula <- demy ~ demX - 1
109    lm.mod <- lm(auxformula)
110    bgtest <- bgtest(lm.mod, order = order, type = type, ...)
111    bgtest$method <- "Breusch-Godfrey/Wooldridge test for serial correlation in panel models"
112    bgtest$alternative <- "serial correlation in idiosyncratic errors"
113    bgtest$data.name <- data.name(x)
114    names(bgtest$statistic) <- if(length(bgtest$parameter) == 1) "chisq" else "F"
115    return(bgtest)
116}
117
118#' @rdname pbgtest
119#' @export
120pbgtest.formula <- function(x, order = NULL, type = c("Chisq", "F"), data, model=c("pooling", "random", "within"), ...) {
121    ## formula method for pbgtest;
122    ## defaults to a pooling model
123    cl <- match.call(expand.dots = TRUE)
124    if (names(cl)[3L] == "") names(cl)[3L] <- "data"
125    if (is.null(cl$model)) cl$model <- "pooling"
126    names(cl)[2L] <- "formula"
127    m <- match(plm.arg, names(cl), 0)
128    cl <- cl[c(1L, m)]
129    cl[[1L]] <- quote(plm)
130    plm.model <- eval(cl,parent.frame())
131    pbgtest(plm.model, order = order, type = type, data = data, ...)
132}
133
134#' Wooldridge's Test for Unobserved Effects in Panel Models
135#'
136#' Semi-parametric test for the presence of (individual or time) unobserved
137#' effects in panel models.
138#'
139#' This semi-parametric test checks the null hypothesis of zero
140#' correlation between errors of the same group. Therefore, it has
141#' power both against individual effects and, more generally, any kind
142#' of serial correlation.
143#'
144#' The test relies on large-N asymptotics. It is valid under error
145#' heteroskedasticity and departures from normality.
146#'
147#' The above is valid if `effect="individual"`, which is the most
148#' likely usage. If `effect="time"`, symmetrically, the test relies on
149#' large-T asymptotics and has power against time effects and, more
150#' generally, against cross-sectional correlation.
151#'
152#' If the panelmodel interface is used, the inputted model must be a pooling
153#' model.
154#'
155#' @aliases pwtest
156#' @param x an object of class `"formula"`, or an estimated model of class
157#' `panelmodel`,
158#' @param effect the effect to be tested for, one of `"individual"`
159#' (default) or `"time"`,
160#' @param data a `data.frame`,
161#' @param \dots further arguments passed to `plm`.
162#' @return An object of class `"htest"`.
163#' @export
164#' @author Giovanni Millo
165#' @seealso [pbltest()], [pbgtest()],
166#' [pdwtest()], [pbsytest()], [pwartest()],
167#' [pwfdtest()] for tests for serial correlation in panel models.
168#' [plmtest()] for tests for random effects.
169#' @references
170#'
171#' \insertRef{WOOL:02}{plm}
172#'
173#' \insertRef{WOOL:10}{plm}
174#'
175#' @keywords htest
176#' @examples
177#'
178#' data("Produc", package = "plm")
179#' ## formula interface
180#' pwtest(log(gsp) ~ log(pcap) + log(pc) + log(emp) + unemp, data = Produc)
181#' pwtest(log(gsp) ~ log(pcap) + log(pc) + log(emp) + unemp, data = Produc, effect = "time")
182#'
183#' ## panelmodel interface
184#' # first, estimate a pooling model, than compute test statistics
185#' form <- formula(log(gsp) ~ log(pcap) + log(pc) + log(emp) + unemp)
186#' pool_prodc <- plm(form, data = Produc, model = "pooling")
187#' pwtest(pool_prodc) # == effect="individual"
188#' pwtest(pool_prodc, effect="time")
189#'
190pwtest <- function(x, ...){
191  UseMethod("pwtest")
192}
193
194#' @rdname pwtest
195#' @export
196pwtest.formula <- function(x, data, effect = c("individual", "time"), ...) {
197
198  effect <- match.arg(effect, choices = c("individual", "time")) # match effect to pass it on to pwtest.panelmodel
199
200  cl <- match.call(expand.dots = TRUE)
201  if (names(cl)[3] == "") names(cl)[3] <- "data"
202  if (is.null(cl$model)) cl$model <- "pooling"
203  if (cl$model != "pooling") stop("pwtest only relevant for pooling models")
204  names(cl)[2] <- "formula"
205  m <- match(plm.arg, names(cl), 0)
206  cl <- cl[c(1L, m)]
207  cl[[1L]] <- quote(plm)
208  plm.model <- eval(cl,parent.frame())
209  pwtest.panelmodel(plm.model, effect = effect, ...) # pass on desired 'effect' argument to pwtest.panelmodel
210
211  ## "RE" test a la Wooldridge (2002/2010), see 10.4.4
212  ## (basically the scaled and standardized estimator for sigma from REmod)
213  ## does not rely on normality or homoskedasticity;
214  ## H0: composite errors uncorrelated
215
216  ## ref. Wooldridge (2002), pp. 264-265; Wooldridge (2010), pp. 299-300
217
218  ######### from here generic testing interface from
219  ######### plm to my code
220}
221
222#' @rdname pwtest
223#' @export
224pwtest.panelmodel <- function(x, effect = c("individual", "time"), ...) {
225  if (describe(x, "model") != "pooling") stop("pwtest only relevant for pooling models")
226  effect <- match.arg(effect, choices = c("individual", "time"))
227  data <- model.frame(x)
228  ## extract indices
229
230  ## if effect="individual" std., else swap
231  xindex <- unclass(attr(data, "index")) # unclass for speed
232  if (effect == "individual"){
233    index  <- xindex[[1L]]
234    tindex <- xindex[[2L]]
235  }
236  else{
237    index  <- xindex[[2L]]
238    tindex <- xindex[[1L]]
239  }
240  ## det. number of groups and df
241  n <- length(unique(index))
242  X <- model.matrix(x)
243
244  k <- ncol(X)
245  ## det. total number of obs. (robust vs. unbalanced panels)
246  nT <- nrow(X)
247  ## det. max. group numerosity
248  t <- max(tapply(X[ , 1L], index, length))
249
250  ## ref. Wooldridge (2002), p.264 / Wooldridge (2010), p.299
251
252  ## extract resids
253  u <- x$residuals
254
255  ## est. random effect variance
256  ## "pre-allocate" an empty list of length n
257  tres <- vector("list", n)
258
259  ## list of n "empirical omega-blocks"
260  ## with averages of xproducts of t(i) residuals
261  ## for each group 1..n
262  ## (possibly different sizes if unbal., thus a list
263  ## and thus, unlike Wooldridge (eq.10.37), we divide
264  ## every block by *its* t(t-1)/2)
265  unind <- unique(index) # ????
266
267  for(i in 1:n) {
268    ut <- u[index == unind[i]]
269    tres[[i]] <- ut %o% ut
270  }
271
272  ## det. # of upper triangle members (n*t(t-1)/2 if balanced)
273  ## no needed, only for illustration
274  # ti <- vapply(tres, function(x) dim(x)[[1L]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
275  # uptrinum <- sum(ti*(ti-1)/2)
276
277  ## sum over all upper triangles of emp. omega blocks:
278  ## and sum over resulting vector (df corrected)
279  sum.uptri <- vapply(tres, function(x) sum(x[upper.tri(x, diag = FALSE)]), FUN.VALUE = 0.0, USE.NAMES = FALSE)
280  W <- sum(sum.uptri) # /sqrt(n) simplifies out
281
282  ## calculate se(Wstat) as in 10.40
283  seW <- sqrt(as.numeric(crossprod(sum.uptri)))
284
285  ## NB should we apply a df correction here, maybe that of the standard
286  ## RE estimator? (see page 261)
287
288  Wstat <- W/seW
289  names(Wstat) <- "z"
290  pW <- 2*pnorm(abs(Wstat), lower.tail = FALSE) # unlike LM, test is two-tailed!
291
292  ## insert usual htest features
293  RVAL <- list(statistic   = Wstat,
294               parameter   = NULL,
295               method      = paste("Wooldridge's test for unobserved",
296                                 effect, "effects"),
297               alternative = "unobserved effect",
298               p.value     = pW,
299               data.name   = paste(deparse(substitute(formula))))
300  class(RVAL) <- "htest"
301  return(RVAL)
302}
303
304#' Wooldridge Test for AR(1) Errors in FE Panel Models
305#'
306#' Test of serial correlation for (the idiosyncratic component of) the errors
307#' in fixed--effects panel models.
308#'
309#' As \insertCite{WOOL:10;textual}{plm}, Sec. 10.5.4 observes, under
310#' the null of no serial correlation in the errors, the residuals of a
311#' FE model must be negatively serially correlated, with
312#' \eqn{cor(\hat{u}_{it}, \hat{u}_{is})=-1/(T-1)} for each
313#' \eqn{t,s}. He suggests basing a test for this null hypothesis on a
314#' pooled regression of FE residuals on their first lag:
315#' \eqn{\hat{u}_{i,t} = \alpha + \delta \hat{u}_{i,t-1} +
316#' \eta_{i,t}}. Rejecting the restriction \eqn{\delta = -1/(T-1)}
317#' makes us conclude against the original null of no serial
318#' correlation.
319#'
320#' `pwartest` estimates the `within` model and retrieves residuals,
321#' then estimates an AR(1) `pooling` model on them. The test statistic
322#' is obtained by applying a F test to the latter model to test the
323#' above restriction on \eqn{\delta}, setting the covariance matrix to
324#' `vcovHC` with the option `method="arellano"` to control for serial
325#' correlation.
326#'
327#' Unlike the [pbgtest()] and [pdwtest()], this test does
328#' not rely on large--T asymptotics and has therefore good properties in
329#' ``short'' panels.  Furthermore, it is robust to general heteroskedasticity.
330#'
331#' @aliases pwartest
332#' @param x an object of class `formula` or of class `panelmodel`,
333#' @param data a `data.frame`,
334#' @param \dots further arguments to be passed on to `vcovHC` (see
335#'     Details and Examples).
336#' @return An object of class `"htest"`.
337#' @export
338#' @author Giovanni Millo
339#' @seealso [pwfdtest()], [pdwtest()], [pbgtest()], [pbltest()],
340#'     [pbsytest()].
341#' @references
342#'
343#' \insertRef{WOOL:02}{plm}
344#'
345#' \insertRef{WOOL:10}{plm}
346#'
347#' @keywords htest
348#' @examples
349#'
350#' data("EmplUK", package = "plm")
351#' pwartest(log(emp) ~ log(wage) + log(capital), data = EmplUK)
352#'
353#' # pass argument 'type' to vcovHC used in test
354#' pwartest(log(emp) ~ log(wage) + log(capital), data = EmplUK, type = "HC3")
355#'
356#'
357pwartest <- function(x, ...) {
358  UseMethod("pwartest")
359}
360
361#' @rdname pwartest
362#' @export
363pwartest.formula <- function(x, data, ...) {
364  ## small-sample serial correlation test for FE models
365  ## ref.: Wooldridge (2002/2010) 10.5.4
366
367  cl <- match.call(expand.dots = TRUE)
368  if (is.null(cl$model)) cl$model <- "within"
369  if (cl$model != "within") stop("pwartest only relevant for within models")
370  if (names(cl)[3L] == "") names(cl)[3L] <- "data"
371  names(cl)[2L] <- "formula"
372  m <- match(plm.arg, names(cl), 0)
373  cl <- cl[c(1L, m)]
374  cl[[1L]] <- quote(plm)
375  plm.model <- eval(cl, parent.frame())
376  pwartest(plm.model, ...)
377}
378
379#' @rdname pwartest
380#' @export
381pwartest.panelmodel <- function(x, ...) {
382
383  if (describe(x, "model") != "within") stop("pwartest only relevant for within models")
384
385  FEres <- x$residuals
386  data <- model.frame(x)
387
388  ## this is a bug fix for incorrect naming of the "data" attr.
389  ## for the pseries in pdata.frame()
390
391  attr(FEres, "data") <- NULL
392  N <- length(FEres)
393  FEres.1 <- c(NA, FEres[1:(N-1)])
394  xindex <- unclass(attr(data, "index")) # unclass for speed
395  id   <- xindex[[1L]]
396  time <- xindex[[2L]]
397  lagid <- as.numeric(id) - c(NA, as.numeric(id)[1:(N-1)])
398  FEres.1[lagid != 0] <- NA
399  data <- data.frame(id, time, FEres = unclass(FEres), FEres.1 = unclass(FEres.1))
400  names(data)[c(1L, 2L)] <- c("id", "time")
401  data <- na.omit(data)
402
403  # calc. auxiliary model
404  auxmod <- plm(FEres ~ FEres.1, data = data, model = "pooling", index = c("id", "time"))
405
406  ## calc. theoretical rho under H0: no serial corr. in errors
407  t. <- pdim(x)$nT$T
408  rho.H0 <- -1/(t.-1)
409  myH0 <- paste("FEres.1 = ", as.character(rho.H0), sep="")
410
411  ## test H0: rho=rho.H0 with HAC
412  myvcov <- function(x) vcovHC(x, method = "arellano", ...) # more params may be passed via ellipsis
413
414  # calc F stat with restriction rho.H0 and robust vcov
415  FEARstat <- ((coef(auxmod)["FEres.1"] - rho.H0)/sqrt(myvcov(auxmod)["FEres.1", "FEres.1"]))^2
416  names(FEARstat) <- "F"
417  df1 <- c("df1" = 1)
418  df2 <- c("df2" = df.residual(auxmod))
419  pFEARstat <- pf(FEARstat, df1 = df1, df2 = df2, lower.tail = FALSE)
420
421  ## insert usual htest features
422  RVAL <- list(statistic   = FEARstat,
423               parameter   = c(df1, df2),
424               p.value     = pFEARstat,
425               method = "Wooldridge's test for serial correlation in FE panels",
426               alternative = "serial correlation",
427               data.name   = paste(deparse(substitute(x))))
428  class(RVAL) <- "htest"
429  return(RVAL)
430}
431
432## Bera, Sosa-Escudero and Yoon type LM test for random effects
433## under serial correlation (H0: no random effects) or the inverse;
434## test="ar": serial corr. test robust vs. RE
435## test="re": RE test robust vs. serial corr.
436## test="j":  joint test for serial corr. and random effects
437
438# Reference for the _balanced_ tests="ar"|"re":
439#                   Bera/Sosa-Escudero/Yoon (2001), Tests for the error component model in the presence of local misspecifcation,
440#                                                   Journal of Econometrics 101 (2001), pp. 1-23.
441#
442#           for original (balanced) test="j": Baltagi/Li (1991), A joint test for serial correlation and random individual effects,
443#                                                     Statistics & Probability Letters 11 (1991), pp. 277-280.
444#
445# Reference for _un_balanced versions of all three tests (boil down to the balanced versions for balanced panels):
446#                    Sosa-Escudero/Bera (2008), Tests for unbalanced error-components models under local misspecification,
447#                                               The Stata Journal (2008), Vol. 8, Number 1, pp. 68-78.
448#
449# Concise treatment of only _balanced_ tests in
450#                      Baltagi (2005), Econometric Analysis of Panel Data, 3rd edition, pp. 96-97
451#                   or Baltagi (2013), Econometric Analysis of Panel Data, 5th edition, pp. 108.
452#
453#
454## Implementation follows the formulae for unbalanced panels, which reduce for balanced data to the formulae for balanced panels.
455##
456## Notation in code largely follows Sosa-Escudero/Bera (2008) (m in Sosa-Escudero/Bera (2008) is total number of observations -> N_obs)
457## NB: Baltagi's book matrix A is slightly different defined: A in Baltagi is -A in Sosa-Escudera/Bera (2008)
458
459
460
461#' Bera, Sosa-Escudero and Yoon Locally--Robust Lagrange Multiplier
462#' Tests for Panel Models and Joint Test by Baltagi and Li
463#'
464#' Test for residual serial correlation (or individual random effects)
465#' locally robust vs. individual random effects (serial correlation)
466#' for panel models and joint test of serial correlation and the
467#' random effect specification by Baltagi and Li.
468#'
469#' These Lagrange multiplier tests are robust vs. local
470#' misspecification of the alternative hypothesis, i.e., they test the
471#' null of serially uncorrelated residuals against AR(1) residuals in
472#' a pooling model, allowing for local departures from the assumption
473#' of no random effects; or they test the null of no random effects
474#' allowing for local departures from the assumption of no serial
475#' correlation in residuals.  They use only the residuals of the
476#' pooled OLS model and correct for local misspecification as outlined
477#' in \insertCite{BERA:SOSA:YOON:01;textual}{plm}.
478#'
479#' For `test = "re"`, the default (`re.normal = TRUE`) is to compute
480#' a one-sided test which is expected to lead to a more powerful test
481#' (asymptotically N(0,1) distributed).  Setting `re.normal = FALSE` gives
482#' the two-sided test (asymptotically chi-squared(2) distributed). Argument
483#' `re.normal` is irrelevant for all other values of `test`.
484#'
485#' The joint test of serial correlation and the random effect
486#' specification (`test = "j"`) is due to
487#' \insertCite{BALT:LI:91;textual}{plm} (also mentioned in
488#' \insertCite{BALT:LI:95;textual}{plm}, pp. 135--136) and is added
489#' for convenience under this same function.
490#'
491#' The unbalanced version of all tests are derived in
492#' \insertCite{SOSA:BERA:08;textual}{plm}. The functions implemented
493#' are suitable for balanced as well as unbalanced panel data sets.
494#'
495#' A concise treatment of the statistics for only balanced panels is
496#' given in \insertCite{BALT:13;textual}{plm}, p. 108.
497#'
498#' Here is an overview of how the various values of the `test`
499#' argument relate to the literature:
500#'
501#' \itemize{ \item `test = "ar"`: \itemize{ \item \eqn{RS*_{\rho}} in Bera
502#' et al. (2001), p. 9 (balanced) \item \eqn{LM*_{\rho}} in Baltagi (2013), p.
503#' 108 (balanced) \item \eqn{RS*_{\lambda}} in Sosa-Escudero/Bera (2008), p. 73
504#' (unbalanced) }
505#'
506#' \item `test = "re", re.normal = TRUE` (default) (one-sided test,
507#' asymptotically N(0,1) distributed): \itemize{ \item \eqn{RSO*_{\mu}} in Bera
508#' et al. (2001), p. 11 (balanced) \item \eqn{RSO*_{\mu}} in Sosa-Escudero/Bera
509#' (2008), p. 75 (unbalanced) }
510#'
511#' \item `test = "re", re.normal = FALSE` (two-sided test, asymptotically
512#' chi-squared(2) distributed): \itemize{ \item \eqn{RS*_{\mu}} in Bera et al.
513#' (2001), p. 7 (balanced) \item \eqn{LM*_{\mu}} in Baltagi (2013), p. 108
514#' (balanced) \item \eqn{RS*_{\mu}} in Sosa-Escudero/Bera (2008), p. 73
515#' (unbalanced) }
516#'
517#' \item `test = "j"`: \itemize{ \item \eqn{RS_{\mu\rho}} in Bera et al.
518#' (2001), p. 10 (balanced) \item \eqn{LM} in Baltagi/Li (2001), p. 279
519#' (balanced) \item \eqn{LM_{1}} in Baltagi and Li (1995), pp. 135--136
520#' (balanced) \item \eqn{LM1} in Baltagi (2013), p. 108 (balanced) \item
521#' \eqn{RS_{\lambda\rho}} in Sosa-Escudero/Bera (2008), p. 74 (unbalanced) } }
522#'
523#' @aliases pbsytest
524#' @param x an object of class `formula` or of class `panelmodel`,
525#' @param data a `data.frame`,
526#' @param test a character string indicating which test to perform:
527#' first--order serial correlation (`"ar"`), random effects (`"re"`)
528#' or joint test for either of them (`"j"`),
529#' @param re.normal logical, only relevant for `test = "re"`: `TRUE`
530#' (default) computes the one-sided `"re"` test, `FALSE` the
531#' two-sided test (see also Details); not relevant for other values of
532#' `test` and, thus, should be `NULL`,
533#' @param \dots further arguments.
534#' @return An object of class `"htest"`.
535#' @export
536#' @author Giovanni Millo (initial implementation) & Kevin Tappe (extension to
537#' unbalanced panels)
538#' @seealso [plmtest()] for individual and/or time random effects
539#' tests based on a correctly specified model; [pbltest()],
540#' [pbgtest()] and [pdwtest()] for serial correlation tests
541#' in random effects models.
542#' @references
543#'
544#' \insertRef{BERA:SOSA:YOON:01}{plm}
545#'
546#' \insertRef{BALT:13}{plm}
547#'
548#' \insertRef{BALT:LI:91}{plm}
549#'
550#' \insertRef{BALT:LI:95}{plm}
551#'
552#' \insertRef{SOSA:BERA:08}{plm}
553#'
554#' @keywords htest
555#'
556#' @examples
557#'
558#' ## Bera et. al (2001), p. 13, table 1 use
559#' ## a subset of the original Grunfeld
560#' ## data which contains three errors -> construct this subset:
561#' data("Grunfeld", package = "plm")
562#' Grunsubset <- rbind(Grunfeld[1:80, ], Grunfeld[141:160, ])
563#' Grunsubset[Grunsubset$firm == 2 & Grunsubset$year %in% c(1940, 1952), ][["inv"]] <- c(261.6, 645.2)
564#' Grunsubset[Grunsubset$firm == 2 & Grunsubset$year == 1946, ][["capital"]] <- 232.6
565#'
566#' ## default is AR testing (formula interface)
567#' pbsytest(inv ~ value + capital, data = Grunsubset, index = c("firm", "year"))
568#' pbsytest(inv ~ value + capital, data = Grunsubset, index = c("firm", "year"), test = "re")
569#' pbsytest(inv ~ value + capital, data = Grunsubset, index = c("firm", "year"),
570#'   test = "re", re.normal = FALSE)
571#' pbsytest(inv ~ value + capital, data = Grunsubset, index = c("firm", "year"), test = "j")
572#'
573#' ## plm interface
574#' mod <- plm(inv ~ value + capital, data = Grunsubset, model = "pooling")
575#' pbsytest(mod)
576#'
577pbsytest <- function (x, ...) {
578  UseMethod("pbsytest")
579}
580
581#' @rdname pbsytest
582#' @export
583pbsytest.formula <- function(x, data, ..., test = c("ar", "re", "j"), re.normal = if (test == "re") TRUE else NULL) {
584
585  ######### from here generic testing interface from
586  ######### plm to my code
587  if (length(test) == 1L) test <- tolower(test) # for backward compatibility: allow upper case
588  test <- match.arg(test)
589
590  cl <- match.call(expand.dots = TRUE)
591  if (is.null(cl$model)) cl$model <- "pooling"
592  if (cl$model != "pooling") stop("pbsytest only relevant for pooling models")
593  names(cl)[2L] <- "formula"
594  if (names(cl)[3L] == "") names(cl)[3L] <- "data"
595  m <- match(plm.arg, names(cl), 0)
596  cl <- cl[c(1, m)]
597  cl[[1L]] <- as.name("plm")
598  plm.model <- eval(cl, parent.frame())
599  pbsytest(plm.model, test = test, re.normal = re.normal, ...)
600}
601
602#' @rdname pbsytest
603#' @export
604pbsytest.panelmodel <- function(x, test = c("ar", "re", "j"), re.normal = if (test == "re") TRUE else NULL, ...) {
605  test <- match.arg(test)
606  if (describe(x, "model") != "pooling") stop("pbsytest only relevant for pooling models")
607
608  # interface check for argument re.normal
609  if (test != "re" && !is.null(re.normal)) {
610    stop("argument 're.normal' only relevant for test = \"re\", set re.normal = NULL for other tests")}
611
612  poolres <- x$residuals
613  data <- model.frame(x)
614  ## extract indices
615  index <- attr(data, "index")
616  iindex <- index[[1L]]
617  tindex <- index[[2L]]
618
619
620  ## till here.
621  ## ordering here if needed.
622
623  ## this needs ordering of obs. on time, regardless
624  ## whether before that on groups or after
625
626  ## and numerosity check
627
628  ## order by group, then time
629  oo <- order(iindex,tindex)
630  ind <- iindex[oo]
631  tind <- tindex[oo]
632  poolres <- poolres[oo]
633  pdim <- pdim(x)
634  n <- max(pdim$Tint$n) ## det. number of groups
635  T_i <- pdim$Tint$Ti
636  N_t <- pdim$Tint$nt
637  t <- max(T_i) ## det. max. group numerosity
638  N_obs <- pdim$nT$N ## det. total number of obs. (m in Sosa-Escudera/Bera (2008), p. 69)
639
640  ## calc. matrices A and B:
641  # Sosa-Escudera/Bera (2008), p. 74
642  # Baltagi (2013), p. 108 defines A=(S1/S2)-1 and, thus, has slightly different formulae [opposite sign in Baltagi]
643  S1 <- as.numeric(crossprod(tapply(poolres,ind,sum))) # == sum(tapply(poolres,ind,sum)^2)
644  S2 <- as.numeric(crossprod(poolres))                 # == sum(poolres^2)
645  A <- 1 - S1/S2
646
647  unind <- unique(ind)
648  uu <- rep(NA, length(unind))
649  uu1 <- rep(NA, length(unind))
650  for(i in 1:length(unind)) {
651    u.t <- poolres[ind == unind[i]]
652    u.t.1 <- u.t[-length(u.t)]
653    u.t <- u.t[-1L]
654    uu[i] <- crossprod(u.t)
655    uu1[i] <- crossprod(u.t, u.t.1)
656  }
657  B <- sum(uu1)/sum(uu)
658
659  a <- as.numeric(crossprod(T_i)) # Sosa-Escudera/Bera (2008), p. 69
660
661  switch(test,
662           "ar" = {
663             # RS*_lambda from Sosa-Escudero/Bera (2008), p. 73 (unbalanced formula)
664             stat <- (B + (((N_obs - n)/(a - N_obs)) * A))^2 * (((a - N_obs)*N_obs^2) / ((N_obs - n)*(a - 3*N_obs + 2*n)))
665             df <- c(df = 1)
666             names(stat) <- "chisq"
667             pstat <- pchisq(stat, df = df, lower.tail = FALSE)
668             tname <- "Bera, Sosa-Escudero and Yoon locally robust test"
669             myH0_alt <- "AR(1) errors sub random effects"
670           },
671
672           "re" = {
673             if(re.normal) {
674               # RSO*_mu from Sosa-Escudero/Bera (2008), p. 75 (unbalanced formula), normally distributed
675               stat <- -sqrt( (N_obs^2) / (2*(a - 3*N_obs + 2*n))) * (A + 2*B)
676               names(stat) <- "z"
677               df <- NULL
678               pstat <- pnorm(stat, lower.tail = FALSE)
679               tname <- "Bera, Sosa-Escudero and Yoon locally robust test (one-sided)"
680               myH0_alt <- "random effects sub AR(1) errors"
681             } else {
682                # RS*_mu from Sosa-Escudero/Bera (2008), p. 73 (unbalanced formula), chisq(1)
683                stat <- ((N_obs^2) * (A + 2*B)^2) / (2*(a - 3*N_obs + 2*n))
684                names(stat) <- "chisq"
685                df <- c(df = 1)
686                pstat <- pchisq(stat, df = df, lower.tail = FALSE)
687                tname <- "Bera, Sosa-Escudero and Yoon locally robust test (two-sided)"
688                myH0_alt <- "random effects sub AR(1) errors"
689             }
690           },
691
692           "j" = {
693             # RS_lambda_mu in Sosa-Escudero/Bera (2008), p. 74 (unbalanced formula)
694             stat <- N_obs^2 * ( ((A^2 + 4*A*B + 4*B^2) / (2*(a - 3*N_obs + 2*n))) + (B^2/(N_obs - n)))
695             # Degrees of freedom in the joint test (test="j") of Baltagi/Li (1991) are 2 (chisquare(2) distributed),
696             # see Baltagi/Li (1991), p. 279 and again in Baltagi/Li (1995), p. 136
697             df <- c(df = 2)
698             names(stat) <- "chisq"
699             pstat <- pchisq(stat, df = df, lower.tail = FALSE)
700             tname <- "Baltagi and Li AR-RE joint test"
701             myH0_alt <- "AR(1) errors or random effects"
702           }
703  ) # END switch
704
705  dname <- paste(deparse(substitute(formula)))
706  balanced.type <- if(pdim$balanced) "balanced" else "unbalanced"
707  tname <- paste(tname, "-", balanced.type, "panel", collapse = " ")
708
709  RVAL <- list(statistic   = stat,
710               parameter   = df,
711               method      = tname,
712               alternative = myH0_alt,
713               p.value     = pstat,
714               data.name   = dname)
715  class(RVAL) <- "htest"
716  return(RVAL)
717}
718
719#' Durbin--Watson Test for Panel Models
720#'
721#' Test of serial correlation for (the idiosyncratic component of) the errors
722#' in panel models.
723#'
724#' This Durbin--Watson test uses the auxiliary model on
725#' (quasi-)demeaned data taken from a model of class `plm` which may
726#' be a `pooling` (the default), `random` or `within` model. It
727#' performs a Durbin--Watson test (using `dwtest` from package
728#' \CRANpkg{lmtest} on the residuals of the (quasi-)demeaned model,
729#' which should be serially uncorrelated under the null of no serial
730#' correlation in idiosyncratic errors. The function takes the
731#' demeaned data, estimates the model and calls `dwtest`. Thus, this
732#' test does not take the panel structure of the residuals into
733#' consideration; it shall not be confused with the generalized
734#' Durbin-Watson test for panels in `pbnftest`.
735#'
736#' @aliases pdwtest
737#' @importFrom lmtest dwtest
738#' @param x an object of class `"panelmodel"` or of class
739#'     `"formula"`,
740#' @param data a `data.frame`,
741#' @param \dots further arguments to be passed on to `dwtest`,
742#'     e.g., `alternative`, see [lmtest::dwtest()] for
743#'     further details.
744#' @return An object of class `"htest"`.
745#' @export
746#' @author Giovanni Millo
747#' @seealso [lmtest::dwtest()] for the Durbin--Watson test
748#'     in \CRANpkg{lmtest}, [pbgtest()] for the analogous
749#'     Breusch--Godfrey test for panel models,
750#'     [lmtest::bgtest()] for the Breusch--Godfrey test for
751#'     serial correlation in the linear model. [pbltest()],
752#'     [pbsytest()], [pwartest()] and
753#'     [pwfdtest()] for other serial correlation tests for
754#'     panel models.
755#'
756#' For the Durbin-Watson test generalized to panel data models see
757#' [pbnftest()].
758#' @references
759#'
760#' \insertRef{DURB:WATS:50}{plm}
761#'
762#' \insertRef{DURB:WATS:51}{plm}
763#'
764#' \insertRef{DURB:WATS:71}{plm}
765#'
766#' \insertRef{WOOL:02}{plm}
767#'
768#' \insertRef{WOOL:10}{plm}
769#'
770#' @keywords htest
771#' @examples
772#'
773#' data("Grunfeld", package = "plm")
774#' g <- plm(inv ~ value + capital, data = Grunfeld, model="random")
775#' pdwtest(g)
776#' pdwtest(g, alternative="two.sided")
777#' ## formula interface
778#' pdwtest(inv ~ value + capital, data=Grunfeld, model="random")
779#'
780pdwtest <- function (x, ...) {
781    UseMethod("pdwtest")
782}
783
784#' @rdname pdwtest
785#' @export
786pdwtest.panelmodel <- function(x, ...) {
787    ## does not respect panel structure:
788    ## residual serial correlation test based on the residuals of the demeaned
789    ## model and passed on to lmtest::dwtest() for the original DW test
790    ## approach justified in Wooldridge (2002/2010), Econometric Analysis of Cross Section and Panel Data, p. 288/328.
791    ##
792    ## For the Bhargava et al. (1982) generalized DW test see pbnftest()
793
794    ## structure:
795    ## 1: take demeaned data from 'plm' object
796    ## 2: est. auxiliary model by OLS on demeaned data
797    ## 3: apply lmtest::dwtest() to auxiliary model and return the result
798
799    model <- describe(x, "model")
800    effect <- describe(x, "effect")
801    theta <- x$ercomp$theta
802
803    ## retrieve demeaned data
804    demX <- model.matrix(x, model = model, effect = effect, theta = theta, cstcovar.rm = "all")
805    demy <- pmodel.response(model.frame(x), model = model, effect = effect, theta = theta)
806
807    ## lmtest::dwtest on the demeaned model:
808
809    ## ARtest is the return value of lmtest::dwtest, exception made for the method attribute
810    dots <- list(...)
811    order.by    <- if(is.null(dots$order.by)) NULL else dots$order.by
812    alternative <- if(is.null(dots$alternative)) "greater" else dots$alternative
813    iterations  <- if(is.null(dots$iterations)) 15 else dots$iterations
814    exact       <- if(is.null(dots$exact)) NULL else dots$exact
815    tol         <- if(is.null(dots$tol)) 1e-10 else dots$tol
816
817    demy <- remove_pseries_features(demy) # needed as lmtest::dwtest cannot cope with pseries
818
819    auxformula <- demy ~ demX - 1
820    lm.mod <- lm(auxformula)
821
822    ARtest <- dwtest(lm.mod, order.by = order.by,
823                     alternative = alternative,
824                     iterations = iterations, exact = exact, tol = tol)
825
826    # overwrite elements of the values produced by lmtest::dwtest
827    ARtest$method <- "Durbin-Watson test for serial correlation in panel models"
828    ARtest$alternative <- "serial correlation in idiosyncratic errors"
829    ARtest$data.name <- data.name(x)
830    return(ARtest)
831}
832
833#' @rdname pdwtest
834#' @export
835pdwtest.formula <- function(x, data, ...) {
836  ## formula method for pdwtest;
837  ## defaults to pooling model
838
839  cl <- match.call(expand.dots = TRUE)
840  if (is.null(cl$model)) cl$model <- "pooling"
841  names(cl)[2L] <- "formula"
842  if (names(cl)[3L] == "") names(cl)[3L] <- "data"
843  m <- match(plm.arg, names(cl), 0)
844  cl <- cl[c(1L, m)]
845  cl[[1L]] <- quote(plm)
846  plm.model <- eval(cl, parent.frame())
847  pdwtest(plm.model, ...)
848}
849
850
851
852## references:
853## * balanced and consecutive:
854##    Bhargava/Franzini/Narendranathan (1982), Serial Correlation and the Fixed Effects Model, Review of Economic Studies (1982), XLIX(4), pp. 533-549.
855##    (also in Baltagi (2005/2013), p. 98-99/109-110 for FE application)
856## * unbalanced and/or non-consecutive: modified BNF statistic and LBI statistic
857##    Baltagi/Wu (1999), Unequally spaced panel data regressions with AR(1) disturbances. Econometric Theory, 15(6), pp. 814-823.
858##    (an example is also in Baltagi (2005/2013), p. 90/101)
859
860
861
862#' Modified BNF--Durbin--Watson Test and Baltagi--Wu's LBI Test for Panel
863#' Models
864#'
865#' Tests for AR(1) disturbances in panel models.
866#'
867#' The default, `test = "bnf"`, gives the (modified) BNF statistic,
868#' the generalised Durbin-Watson statistic for panels. For balanced
869#' and consecutive panels, the reference is
870#' Bhargava/Franzini/Narendranathan (1982). The modified BNF is given
871#' for unbalanced and/or non-consecutive panels (d1 in formula 16 of
872#' \insertCite{BALT:WU:99;textual}{plm}).
873#'
874#' `test = "lbi"` yields Baltagi--Wu's LBI statistic
875#' \insertCite{BALT:WU:99}{plm}, the locally best invariant test which
876#' is based on the modified BNF statistic.
877#'
878#' No specific variants of these tests are available for random effect models.
879#' As the within estimator is consistent also under the random effects
880#' assumptions, the test for random effect models is performed by taking the
881#' within residuals.
882#'
883#' No p-values are given for the statistics as their distribution is
884#' quite difficult. \insertCite{BHAR:FRAN:NARE:82;textual}{plm} supply
885#' tabulated bounds for p = 0.05 for the balanced case and consecutive
886#' case.
887#'
888#' For large N, \insertCite{BHAR:FRAN:NARE:82}{plm} suggest it is
889#' sufficient to check whether the BNF statistic is < 2 to test
890#' against positive serial correlation.
891#'
892#' @aliases pbnftest
893#' @param x an object of class `"panelmodel"` or of class `"formula"`,
894#' @param test a character indicating the test to be performed, either
895#'     `"bnf"` or `"lbi"` for the (modified) BNF statistic or
896#'     Baltagi--Wu's LBI statistic, respectively,
897#' @param data a `data.frame` (only relevant for formula interface),
898#' @param model a character indicating on which type of model the test
899#'     shall be performed (`"pooling"`, `"within"`, `"random"`, only
900#'     relevant for formula interface),
901#' @param \dots only relevant for formula interface: further arguments
902#'     to specify the model to test (arguments passed on to plm()),
903#'     e.g., `effect`.
904#' @return An object of class `"htest"`.
905#' @export
906#' @author Kevin Tappe
907#' @seealso [pdwtest()] for the original Durbin--Watson test using
908#'     (quasi-)demeaned residuals of the panel model without taking
909#'     the panel structure into account. [pbltest()], [pbsytest()],
910#'     [pwartest()] and [pwfdtest()] for other serial correlation
911#'     tests for panel models.
912#' @references
913#'
914#' \insertRef{BALT:13}{plm}
915#'
916#' \insertRef{BALT:WU:99}{plm}
917#'
918#' \insertRef{BHAR:FRAN:NARE:82}{plm}
919#'
920#' @keywords htest
921#' @examples
922#'
923#' data("Grunfeld", package = "plm")
924#'
925#' # formula interface, replicate Baltagi/Wu (1999), table 1, test case A:
926#' data_A <- Grunfeld[!Grunfeld[["year"]] %in% c("1943", "1944"), ]
927#' pbnftest(inv ~ value + capital, data = data_A, model = "within")
928#' pbnftest(inv ~ value + capital, data = data_A, test = "lbi", model = "within")
929#'
930#' # replicate Baltagi (2013), p. 101, table 5.1:
931#' re <- plm(inv ~ value + capital, data = Grunfeld, model = "random")
932#' pbnftest(re)
933#' pbnftest(re, test = "lbi")
934#'
935pbnftest <- function (x, ...) {
936  UseMethod("pbnftest")
937}
938
939#' @rdname pbnftest
940#' @export
941pbnftest.panelmodel <- function(x, test = c("bnf", "lbi"), ...) {
942
943  test <- match.arg(test)
944
945  # no test for random effects available: take FE as also consistent (Verbeek (2004, 2nd edition), p. 358)
946  model <- describe(x, "model")
947  if (model == "random") x <- update(x, model = "within")
948
949  consec <- all(is.pconsecutive(x))
950  balanced <- is.pbalanced(x)
951
952  # residuals are now class pseries, so diff.pseries is used and the
953  # differences are computed within observational units (not across as
954  # it would be the case if base::diff() is used and as it is done for
955  # lm-objects) NAs are introduced by the differencing as one
956  # observation is lost per observational unit
957  if (!inherits(residuals(x), "pseries")) stop("pdwtest internal error: residuals are not of class \"pseries\"") # check to be safe: need pseries
958
959  ind <- unclass(index(x))[[1L]] # unclass for speed
960  obs1 <- !duplicated(ind)                  # first ob of each individual
961  obsn <- !duplicated(ind, fromLast = TRUE) # last ob of each individual
962
963  #### d1, d2, d3, d4 as in Baltagi/Wu (1999), p. 819 formula (16)
964  res_crossprod <- as.numeric(crossprod(residuals(x))) # denominator
965
966  ## d1 consists of two parts:
967  ##  d1.1: BNF statistic (sum of squared differenced residuals of consecutive time periods per individual)
968  ##  d1.2: sum of squared "later" residuals (not differenced) surrounded by gaps in time periods
969  ##  typo in Baltagi/Wu (1999) for d1: index j starts at j = 2, not j = 1
970  res_diff <- diff(residuals(x), shift = "time")
971  d1.1 <- sum(res_diff^2, na.rm = T) / res_crossprod # == BNF (1982), formula (4)
972  d1.2_contrib <- as.logical(is.na(res_diff) - obs1)
973  d1.2 <- as.numeric(crossprod(residuals(x)[d1.2_contrib])) / res_crossprod
974  d1 <- d1.1 + d1.2 # == modified BNF statistic = d1 in Baltagi/Wu (1999) formula (16)
975                    #   [reduces to original BNF in case of balanced and consecutive data (d1.2 is zero)]
976
977  if (test == "bnf") {
978    stat <- d1
979    names(stat) <- "DW"
980    method <- "Bhargava/Franzini/Narendranathan Panel Durbin-Watson Test"
981    if (!consec || !balanced) method <- paste0("modified ", method)
982  }
983
984  if (test == "lbi")  {
985    ## d2 contains the "earlier" obs surrounded by gaps in time periods
986    d2_contrib <- as.logical(is.na(lead(residuals(x), shift = "time")) - obsn)
987    d2 <- as.numeric(crossprod(residuals(x)[d2_contrib])) / res_crossprod
988
989    ## d3, d4: sum squared residual of first/last time period for all individuals / crossprod(residuals)
990    d3 <- as.numeric(crossprod(residuals(x)[obs1])) / res_crossprod
991    d4 <- as.numeric(crossprod(residuals(x)[obsn])) / res_crossprod
992
993    stat <- d1 + d2 + d3 + d4
994    names(stat) <- "LBI"
995    method <- "Baltagi/Wu LBI Test for Serial Correlation in Panel Models"
996  }
997
998  result <- list(statistic   = stat,
999                 # p.value   = NA, # none
1000                 method      = method,
1001                 alternative = "serial correlation in idiosyncratic errors",
1002                 data.name   = data.name(x))
1003  class(result) <- "htest"
1004  return(result)
1005}
1006
1007#' @rdname pbnftest
1008#' @export
1009pbnftest.formula <- function(x, data, test = c("bnf", "lbi"), model = c("pooling", "within", "random"), ...) {
1010  ## formula method for pdwtest;
1011  ## defaults to pooling model
1012
1013  test  <- match.arg(test)
1014  model <- match.arg(model)
1015
1016  cl <- match.call(expand.dots = TRUE)
1017  if (is.null(model)) model <- "pooling"
1018  names(cl)[2L] <- "formula"
1019  if (names(cl)[3L] == "") names(cl)[3L] <- "data"
1020  m <- match(plm.arg, names(cl), 0)
1021  cl <- cl[c(1L, m)]
1022  cl[[1L]] <- quote(plm)
1023  plm.model <- eval(cl, parent.frame())
1024  pbnftest(plm.model, test = test)
1025}
1026
1027######### Baltagi and Li's LM_rho|mu ########
1028## ex Baltagi and Li (1995) Testing AR(1) against MA(1)...,
1029## JE 68, 133-151, test statistic (one-sided) is LM_4;
1030## see also idem (1997), Monte Carlo results...,
1031## Annales d'Econometrie et Statistique 48, formula (8)
1032
1033## from version 2: disposes of Kronecker products,
1034## thus much faster and feasible on large NT (original
1035## is already infeasible for NT>3000, this takes 10''
1036## on N=3000, T=10 and even 20000x10 (55'') is no problem;
1037## lme() hits the memory limit at ca. 20000x20)
1038
1039#' Baltagi and Li Serial Dependence Test For Random Effects Models
1040#'
1041#' \insertCite{BALT:LI:95;textual}{plm}'s Lagrange multiplier test for
1042#' AR(1) or MA(1) idiosyncratic errors in panel models with random
1043#' effects.
1044#'
1045#' This is a Lagrange multiplier test for the null of no serial
1046#' correlation, against the alternative of either an AR(1) or a MA(1)
1047#' process, in the idiosyncratic component of the error term in a
1048#' random effects panel model (as the analytical expression of the
1049#' test turns out to be the same under both alternatives,
1050#' \insertCite{@see @BALT:LI:95 and @BALT:LI:97}{plm}. The
1051#' `alternative` argument, defaulting to `twosided`, allows testing
1052#' for positive serial correlation only, if set to `onesided`.
1053#'
1054#' @aliases pbltest
1055#' @importFrom nlme lme
1056#' @param x a model formula or an estimated random--effects model of
1057#'     class `plm` ,
1058#' @param data for the formula interface only: a `data.frame`,
1059#' @param alternative one of `"twosided"`,
1060#'     `"onesided"`. Selects either \eqn{H_A: \rho \neq 0} or
1061#'     \eqn{H_A: \rho = 0} (i.e., the Normal or the Chi-squared
1062#'     version of the test),
1063#' @param index the index of the `data.frame`,
1064#' @param \dots further arguments.
1065#' @return An object of class `"htest"`.
1066#' @export
1067#' @author Giovanni Millo
1068#' @seealso [pdwtest()], [pbnftest()], [pbgtest()],
1069#'     [pbsytest()], [pwartest()] and
1070#'     [pwfdtest()] for other serial correlation tests for
1071#'     panel models.
1072#' @references
1073#'
1074#' \insertRef{BALT:LI:95}{plm}
1075#'
1076#' \insertRef{BALT:LI:97}{plm}
1077#'
1078#' @keywords htest
1079#' @examples
1080#'
1081#' data("Grunfeld", package = "plm")
1082#'
1083#' # formula interface
1084#' pbltest(inv ~ value + capital, data = Grunfeld)
1085#'
1086#' # plm interface
1087#' re_mod <- plm(inv ~ value + capital, data = Grunfeld, model = "random")
1088#' pbltest(re_mod)
1089#' pbltest(re_mod, alternative = "onesided")
1090#'
1091pbltest <- function (x, ...)
1092{
1093  UseMethod("pbltest")
1094}
1095
1096
1097#' @rdname pbltest
1098#' @export
1099pbltest.formula <- function(x, data, alternative = c("twosided", "onesided"), index = NULL, ...) {
1100 ## this version (pbltest0) based on a "formula, pdataframe" interface
1101
1102
1103  ## reduce X to model matrix value (no NAs)
1104    X <- model.matrix(x, data = data)
1105  ## reduce data accordingly
1106    data <- data[which(row.names(data) %in% row.names(X)), ]
1107    if (! inherits(data, "pdata.frame"))
1108        data <- pdata.frame(data, index = index)
1109
1110  ## need name of individual index
1111  gindex <- dimnames(attr(data, "index"))[[2L]][1L]
1112
1113 ## make random effects formula
1114  rformula <- NULL
1115  eval(parse(text = paste("rformula <- ~1|", gindex, sep = "")))
1116
1117  ## est. MLE model
1118  mymod <- lme(x, data = data, random = rformula, method = "ML")
1119
1120  nt. <- mymod$dims$N
1121  n. <- as.numeric(mymod$dims$ngrps[1])
1122  t. <- nt./n.
1123  Jt <- matrix(1, ncol = t., nrow = t.)/t.
1124  Et <- diag(1, t.) - Jt
1125  ## make 'bidiagonal' matrix (see BL, p.136)
1126  G <- matrix(0, ncol = t., nrow = t.)
1127  for(i in 2:t.) {
1128    G[i-1, i]   <- 1
1129    G[i,   i-1] <- 1
1130  }
1131
1132  ## retrieve composite (=lowest level) residuals
1133  uhat <- residuals(mymod, level = 0)
1134
1135  ## sigma2.e and sigma2.1 as in BL
1136  ## break up residuals by group to get rid of Kronecker prod.
1137  ## data have to be balanced and sorted by group/time, so this works
1138  uhat.i <- vector("list", n.)
1139  for(i in 1:n.) {
1140    uhat.i[[i]] <- uhat[t.*(i-1)+1:t.]
1141    }
1142  s2e <- rep(NA, n.)
1143  s21 <- rep(NA, n.)
1144  for(i in 1:n.) {
1145    u.i <- uhat.i[[i]]
1146    s2e[i] <- as.numeric(crossprod(u.i, Et) %*% u.i)
1147    s21[i] <- as.numeric(crossprod(u.i, Jt) %*% u.i)
1148    }
1149  sigma2.e <- sum(s2e) / (n.*(t.-1))
1150  sigma2.1 <- sum(s21) / n.
1151
1152  ## calc. score under the null:
1153  star1 <- (Jt/sigma2.1 + Et/sigma2.e) %*% G %*% (Jt/sigma2.1 + Et/sigma2.e)
1154  star2 <- rep(NA, n.)
1155  ## again, do this group by group to avoid Kronecker prod.
1156  for(i in 1:n.) {
1157    star2[i] <- as.numeric(crossprod(uhat.i[[i]], star1) %*% uhat.i[[i]])
1158    }
1159  star2 <- sum(star2)
1160  Drho <- (n.*(t.-1)/t.) * (sigma2.1-sigma2.e)/sigma2.1 + sigma2.e/2 * star2
1161  ## star2 is (crossprod(uhat, kronecker(In, star1)) %*% uhat)
1162
1163  ## components for the information matrix
1164  a <- (sigma2.e - sigma2.1)/(t.*sigma2.1)
1165  j.rr <- n. * (2 * a^2 * (t.-1)^2 + 2*a*(2*t.-3) + (t.-1))
1166  j.12 <- n.*(t.-1)*sigma2.e / sigma2.1^2
1167  j.13 <- n.*(t.-1)/t. * sigma2.e * (1/sigma2.1^2 - 1/sigma2.e^2)
1168  j.22 <- (n. * t.^2) / (2 * sigma2.1^2)
1169  j.23 <- (n. * t.) / (2 * sigma2.1^2)
1170  j.33 <- (n./2) * (1/sigma2.1^2 + (t.-1)/sigma2.e^2)
1171
1172  ## build up information matrix
1173  Jmat <- matrix(nrow = 3L, ncol = 3L)
1174  Jmat[1L, ] <- c(j.rr, j.12, j.13)
1175  Jmat[2L, ] <- c(j.12, j.22, j.23)
1176  Jmat[3L, ] <- c(j.13, j.23, j.33)
1177
1178  J11 <- n.^2 * t.^2 * (t.-1) / (det(Jmat) * 4*sigma2.1^2 * sigma2.e^2)
1179  ## this is the same as J11 <- solve(Jmat)[1,1], see BL page 73
1180
1181  switch(match.arg(alternative),
1182         "onesided" = {
1183           LMr.m <- Drho * sqrt(J11)
1184           pval <- pnorm(LMr.m, lower.tail = FALSE)
1185           names(LMr.m) <- "z"
1186           method1 <- "one-sided"
1187           method2 <- "H0: rho = 0, HA: rho > 0"
1188           parameter <- NULL
1189         },
1190         "twosided" = {
1191           LMr.m <- Drho^2 * J11
1192           pval <- pchisq(LMr.m, df = 1, lower.tail = FALSE)
1193           names(LMr.m) <- "chisq"
1194           parameter <- c(df = 1)
1195           method1 <- "two-sided"
1196           method2 <- "H0: rho = 0, HA: rho != 0"
1197         }
1198         )
1199  dname <- paste(deparse(substitute(x)))
1200  method <- paste("Baltagi and Li", method1, "LM test")
1201  alternative <- "AR(1)/MA(1) errors in RE panel model"
1202
1203  res <- list(statistic   = LMr.m,
1204              p.value     = pval,
1205              method      = method,
1206              alternative = alternative,
1207              parameter   = parameter,
1208              data.name   = dname)
1209
1210  class(res) <- "htest"
1211  res
1212}
1213
1214#' @rdname pbltest
1215#' @export
1216pbltest.plm <- function(x, alternative = c("twosided", "onesided"), ...) {
1217  # only continue if random effects model
1218  if (describe(x, "model") != "random") stop("Test is only for random effects models.")
1219
1220  # call pbltest.formula the right way
1221  pbltest.formula(formula(x$formula), data=cbind(index(x), x$model),
1222                  index=names(index(x)), alternative = alternative, ...)
1223}
1224
1225#' Wooldridge first--difference--based test for AR(1) errors in levels
1226#' or first--differenced panel models
1227#'
1228#' First--differencing--based test of serial correlation for (the idiosyncratic
1229#' component of) the errors in either levels or first--differenced panel
1230#' models.
1231#'
1232#' As \insertCite{WOOL:10;textual}{plm}, Sec. 10.6.3 observes, if the
1233#' idiosyncratic errors in the model in levels are uncorrelated (which
1234#' we label hypothesis `"fe"`), then the errors of the model in first
1235#' differences (FD) must be serially correlated with
1236#' \eqn{cor(\hat{e}_{it}, \hat{e}_{is}) = -0.5} for each \eqn{t,s}. If
1237#' on the contrary the levels model's errors are a random walk, then
1238#' there must be no serial correlation in the FD errors (hypothesis
1239#' `"fd"`). Both the fixed effects (FE) and the first--differenced
1240#' (FD) estimators remain consistent under either assumption, but the
1241#' relative efficiency changes: FE is more efficient under `"fe"`, FD
1242#' under `"fd"`.
1243#'
1244#' Wooldridge (ibid.) suggests basing a test for either hypothesis on
1245#' a pooled regression of FD residuals on their first lag:
1246#' \eqn{\hat{e}_{i,t}=\alpha + \rho \hat{e}_{i,t-1} +
1247#' \eta_{i,t}}. Rejecting the restriction \eqn{\rho = -0.5} makes us
1248#' conclude against the null of no serial correlation in errors of the
1249#' levels equation (`"fe"`). The null hypothesis of no serial
1250#' correlation in differenced errors (`"fd"`) is tested in a similar
1251#' way, but based on the zero restriction on \eqn{\rho} (\eqn{\rho =
1252#' 0}). Rejecting `"fe"` favours the use of the first--differences
1253#' estimator and the contrary, although it is possible that both be
1254#' rejected.
1255#'
1256#' `pwfdtest` estimates the `fd` model (or takes an `fd` model as
1257#' input for the panelmodel interface) and retrieves its residuals,
1258#' then estimates an AR(1) `pooling` model on them. The test statistic
1259#' is obtained by applying a F test to the latter model to test the
1260#' relevant restriction on \eqn{\rho}, setting the covariance matrix
1261#' to `vcovHC` with the option `method="arellano"` to control for
1262#' serial correlation.
1263#'
1264#' Unlike the `pbgtest` and `pdwtest`, this test does not rely on
1265#' large--T asymptotics and has therefore good properties in ''short''
1266#' panels.  Furthermore, it is robust to general
1267#' heteroskedasticity. The `"fe"` version can be used to test for
1268#' error autocorrelation regardless of whether the maintained
1269#' specification has fixed or random effects
1270#' \insertCite{@see @DRUK:03}{plm}.
1271#'
1272#' @aliases pwfdtest
1273#' @param x an object of class `formula` or a `"fd"`-model (plm
1274#' object),
1275#' @param data a `data.frame`,
1276#' @param h0 the null hypothesis: one of `"fd"`, `"fe"`,
1277#' @param \dots further arguments to be passed on to `vcovHC` (see Details
1278#' and Examples).
1279#' @return An object of class `"htest"`.
1280#' @export
1281#' @author Giovanni Millo
1282#' @seealso `pdwtest`, `pbgtest`, `pwartest`,
1283#' @references
1284#'
1285#' \insertRef{DRUK:03}{plm}
1286#'
1287#' \insertRef{WOOL:02}{plm}
1288#' Sec. 10.6.3, pp. 282--283.
1289#'
1290#' \insertRef{WOOL:10}{plm}
1291#' Sec. 10.6.3, pp. 319--320
1292#'
1293#' @keywords htest
1294#' @examples
1295#'
1296#' data("EmplUK" , package = "plm")
1297#' pwfdtest(log(emp) ~ log(wage) + log(capital), data = EmplUK)
1298#' pwfdtest(log(emp) ~ log(wage) + log(capital), data = EmplUK, h0 = "fe")
1299#'
1300#' # pass argument 'type' to vcovHC used in test
1301#' pwfdtest(log(emp) ~ log(wage) + log(capital), data = EmplUK, type = "HC3", h0 = "fe")
1302#'
1303#'
1304#' # same with panelmodel interface
1305#' mod <- plm(log(emp) ~ log(wage) + log(capital), data = EmplUK, model = "fd")
1306#' pwfdtest(mod)
1307#' pwfdtest(mod, h0 = "fe")
1308#' pwfdtest(mod, type = "HC3", h0 = "fe")
1309#'
1310#'
1311pwfdtest <- function(x, ...) {
1312  UseMethod("pwfdtest")
1313}
1314
1315#' @rdname pwfdtest
1316#' @export
1317pwfdtest.formula <- function(x, data, ..., h0 = c("fd", "fe")) {
1318  cl <- match.call(expand.dots = TRUE)
1319  if (is.null(cl$model)) cl$model <- "fd"
1320  names(cl)[2L] <- "formula"
1321  if (names(cl)[3L] == "") names(cl)[3L] <- "data"
1322  m <- match(plm.arg, names(cl), 0)
1323  cl <- cl[c(1L, m)]
1324  cl[[1L]] <- quote(plm)
1325  plm.model <- eval(cl, parent.frame())
1326  pwfdtest(plm.model, ..., h0 = h0)
1327}
1328
1329#' @rdname pwfdtest
1330#' @export
1331pwfdtest.panelmodel <- function(x, ..., h0 = c("fd", "fe")) {
1332  ## first-difference-based serial correlation test for panel models
1333  ## ref.: Wooldridge (2002/2010), par. 10.6.3
1334
1335  # interface check
1336  model <- describe(x, "model")
1337  if (model != "fd") stop(paste0("input 'x' needs to be a \"fd\" model (first-differenced model), but is \"", model, "\""))
1338
1339  ## fetch fd residuals
1340  FDres <- x$residuals
1341  ## indices (full length! must reduce by 1st time period)
1342  ## this is an ad-hoc solution for the fact that the 'fd' model
1343  ## carries on the full indices while losing the first time period
1344  xindex <- unclass(attr(model.frame(x), "index")) # unclass for speed
1345  time <- as.numeric(xindex[[2L]])
1346  id   <- as.numeric(xindex[[1L]])
1347
1348  ## fetch dimensions and adapt to those of indices
1349  pdim <- pdim(x)
1350  n <- pdim$nT$n
1351  Ti_minus_one <- pdim$Tint$Ti-1
1352
1353  ## generate new individual index: drop one observation per individual
1354  ## NB: This is based on the assumption that the estimated FD model performs
1355  ##     its diff-ing row-wise (it currently does so). If the diff-ing for FD
1356  ##     is changed to diff-ing based on time dimension, this part about index
1357  ##     creation needs to be re-worked because more than 1 observation per
1358  ##     individual can be dropped
1359  red_id <- integer()
1360  for(i in 1:n) {
1361    red_id <- c(red_id, rep(i, Ti_minus_one[i]))
1362  }
1363  # additional check
1364  # (but should error earlier already as the FD model should be nonestimable)
1365  if(length(red_id) == 0L)
1366    stop("only individuals with one observation in original data: test not feasible")
1367
1368  # make pdata.frame for auxiliary regression: time dimension is not relevant
1369  # as the first observation of each individual was dropped -> let time dimension
1370  # be created (is not related to the original times anymore)
1371  auxdata <- pdata.frame(as.data.frame(cbind(red_id, FDres)), index = "red_id")
1372
1373  # lag residuals by row (as the FD model diffs by row)
1374  # NB: need to consider change to shift = "time" if behaviour of FD model is changed
1375  auxdata[["FDres.1"]] <- lag(auxdata[["FDres"]], shift = "row")
1376
1377  ## pooling model FDres vs. lag(FDres), with intercept (might as well do it w.o.)
1378  auxmod <- plm(FDres ~ FDres.1, data = auxdata, model = "pooling")
1379
1380  switch(match.arg(h0),
1381         "fd" = {h0des <- "differenced"
1382         ## theoretical rho under H0: no serial
1383         ## corr. in differenced errors is 0
1384         rho.H0 <- 0},
1385
1386         "fe" = {h0des <- "original"
1387         ## theoretical rho under H0: no serial
1388         ## corr. in original errors is -0.5
1389         rho.H0 <- -0.5})
1390
1391  myH0 <- paste("FDres.1 = ", as.character(rho.H0), sep="")
1392
1393  ## test H0: rho=rho.H0 with HAC, more params may be passed via ellipsis
1394  myvcov <- function(x) vcovHC(x, method = "arellano", ...)
1395
1396  # calc F stat with restriction rho.H0 and robust vcov
1397  FDARstat <- ((coef(auxmod)["FDres.1"] - rho.H0)/sqrt(myvcov(auxmod)["FDres.1", "FDres.1"]))^2
1398  names(FDARstat) <- "F"
1399  df1 <- c(df1 = 1)
1400  df2 <- c(df2 = df.residual(auxmod))
1401  pFDARstat <- pf(FDARstat, df1 = df1, df2 = df2, lower.tail = FALSE)
1402
1403  ## insert usual htest features
1404  RVAL <- list(statistic   = FDARstat,
1405               parameter   = c(df1, df2),
1406               p.value     = pFDARstat,
1407               method      = "Wooldridge's first-difference test for serial correlation in panels",
1408               alternative = paste("serial correlation in", h0des, "errors"),
1409               data.name   = paste(deparse(substitute(x))))
1410  class(RVAL) <- "htest"
1411  return(RVAL)
1412}
1413