1## This file contains the relevant transformations used for panel data, 2## namely of course Within and between/Between, but also Sum (useful for 3## unbalanced panels). 4 5## They are all generics and have default, pseries and matrix 6## methods. The effect argument is an index vector for the default method 7## and a character ("individual", "time", "group", "twoways") for the 8## pseries method. It can be any of the two for the matrix method (the 9## second one only if the matrix argument has an index attribute 10 11## diff, lag and lead methods for pseries are also provided (lead is a 12## generic exported by plm, lag and diff being generic exported by 13## stats). All of them have a shift argument which can be either "time" 14## or "row". 15 16 17 18#' panel series 19#' 20#' A class for panel series for which several useful computations and 21#' data transformations are available. 22#' 23#' The functions `between`, `Between`, `Within`, and `Sum` perform specific 24#' data transformations, i. e., the between, within, and sum transformation, 25#' respectively. 26#' 27#' `between` returns a vector/matrix containing the individual means (over 28#' time) with the length of the vector equal to the number of 29#' individuals (if `effect = "individual"` (default); if `effect = "time"`, 30#' it returns the time means (over individuals)). `Between` 31#' duplicates the values and returns a vector/matrix which length/number of rows 32#' is the number of total observations. `Within` returns a vector/matrix 33#' containing the values in deviation from the individual means 34#' (if `effect = "individual"`, from time means if `effect = "time"`), the so 35#' called demeaned data. `Sum` returns a vector/matrix with sum per individual 36#' (over time) or the sum per time period (over individuals) with 37#' `effect = "individual"` or `effect = "time"`, respectively, and has length/ 38#' number of rows of the total observations (like `Between`). 39#' 40#' For `between`, `Between`, `Within`, and `Sum` in presence of NA values it 41#' can be useful to supply `na.rm = TRUE` as an additional argument to 42#' keep as many observations as possible in the resulting transformation. 43#' na.rm is passed on to the mean()/sum() function used by these transformations 44#' (i.e., it does not remove NAs prior to any processing!), see also 45#' **Examples**. 46#' 47#' @name pseries 48#' @aliases pseries 49#' @param x,object a `pseries` or a matrix; or a `summary.pseries` object, 50#' @param effect for the pseries methods: character string indicating the 51#' `"individual"`, `"time"`, or `"group"` effect, for `Within` 52#' `"twoways"` additionally; for non-pseries methods, `effect` is a factor 53#' specifying the dimension (`"twoways"` is not possible), 54#' @param idbyrow if `TRUE` in the `as.matrix` method, the lines of 55#' the matrix are the individuals, 56#' @param rm.null if `TRUE`, for the `Within.matrix` method, remove 57#' the empty columns, 58#' @param plot,scale,transparency,col,lwd plot arguments, 59#' @param \dots further arguments, e. g., `na.rm = TRUE` for 60#' transformation functions like `beetween`, see **Details** 61#' and **Examples**. 62#' @return All these functions return an object of class `pseries` or a matrix, 63#' except:\cr `between`, which returns a numeric vector or a matrix; 64#' `as.matrix`, which returns a matrix. 65#' @author Yves Croissant 66#' @seealso [is.pseries()] to check if an object is a pseries. For 67#' more functions on class 'pseries' see [lag()], [lead()], 68#' [diff()] for lagging values, leading values (negative lags) and 69#' differencing. 70#' @keywords classes 71#' @examples 72#' 73#' # First, create a pdata.frame 74#' data("EmplUK", package = "plm") 75#' Em <- pdata.frame(EmplUK) 76#' 77#' # Then extract a series, which becomes additionally a pseries 78#' z <- Em$output 79#' class(z) 80#' 81#' # obtain the matrix representation 82#' as.matrix(z) 83#' 84#' # compute the between and within transformations 85#' between(z) 86#' Within(z) 87#' 88#' # Between and Sum replicate the values for each time observation 89#' Between(z) 90#' Sum(z) 91#' 92#' # between, Between, Within, and Sum transformations on other dimension 93#' between(z, effect = "time") 94#' Between(z, effect = "time") 95#' Within(z, effect = "time") 96#' Sum(z, effect = "time") 97#' 98#' # NA treatment for between, Between, Within, and Sum 99#' z2 <- z 100#' z2[length(z2)] <- NA # set last value to NA 101#' between(z2, na.rm = TRUE) # non-NA value for last individual 102#' Between(z2, na.rm = TRUE) # only the NA observation is lost 103#' Within(z2, na.rm = TRUE) # only the NA observation is lost 104#' Sum(z2, na.rm = TRUE) # only the NA observation is lost 105#' 106#' sum(is.na(Between(z2))) # 9 observations lost due to one NA value 107#' sum(is.na(Between(z2, na.rm = TRUE))) # only the NA observation is lost 108#' sum(is.na(Within(z2))) # 9 observations lost due to one NA value 109#' sum(is.na(Within(z2, na.rm = TRUE))) # only the NA observation is lost 110#' sum(is.na(Sum(z2))) # 9 observations lost due to one NA value 111#' sum(is.na(Sum(z2, na.rm = TRUE))) # only the NA observation is lost 112#' 113NULL 114 115 116 117#' @rdname pseries 118#' @export 119print.pseries <- function(x, ...){ 120 x.orig <- x 121 attr(x, "index") <- NULL 122 attr(x, "class") <- base::setdiff(attr(x, "class"), "pseries") 123 if(length(attr(x, "class")) == 1L && class(x) %in% c("character", "logical", "numeric", "integer", "complex")) { 124 attr(x, "class") <- NULL 125 } 126 print(x, ...) 127 x.orig 128} 129 130#' @rdname pseries 131#' @export 132as.matrix.pseries <- function(x, idbyrow = TRUE, ...){ 133 index <- unclass(attr(x, "index")) # unclass for speed 134 id <- index[[1L]] 135 time <- index[[2L]] 136 time.names <- levels(time) 137 x <- split(data.frame(x, time), id) 138 x <- lapply(x, function(x){ 139 rownames(x) <- x[ , 2L] 140 x[ , -2L, drop = FALSE] 141 }) 142 x <- lapply(x, function(x){ 143 x <- x[time.names, , drop = FALSE] 144 rownames(x) <- time.names 145 x 146 } 147 ) 148 id.names <- names(x) 149 x <- as.matrix(as.data.frame((x))) 150 colnames(x) <- id.names 151 if(idbyrow) x <- t(x) 152 x 153} 154 155## plots a panel series by time index 156## 157## can supply any panel function, e.g., a loess smoother 158## > mypanel<-function(x,...) { 159## + panel.xyplot(x,...) 160## + panel.loess(x, col="red", ...)} 161## > 162## > plot(pres(mod), panel=mypanel) 163 164#' @rdname pseries 165#' @importFrom lattice xyplot 166#' @export 167plot.pseries <- function(x, plot = c("lattice", "superposed"), 168 scale = FALSE, transparency = TRUE, 169 col = "blue", lwd = 1, ...) { 170 171 if(scale) { 172 scalefun <- function(x) scale(x) 173 } else { 174 scalefun <- function(x) return(x)} 175 176 nx <- as.numeric(x) 177 ind <- attr(x, "index")[[1L]] 178 tind <- attr(x, "index")[[2L]] # possibly as.numeric(): 179 # activates autom. tick 180 # but loses time labels 181 182 xdata <- data.frame(nx = nx, ind = ind, tind = tind) 183 184 switch(match.arg(plot), 185 "lattice" = { 186 ##require(lattice) # make a ggplot2 version 187 xyplot(nx ~ tind | ind, data = xdata, type = "l", col = col, ...) 188 }, 189 "superposed" = { 190 ylim <- c(min(tapply(scalefun(nx), ind, min, na.rm = TRUE)), 191 max(tapply(scalefun(nx), ind, max, na.rm = TRUE))) 192 unind <- unique(ind) 193 nx1 <- nx[ind == unind[1L]] 194 tind1 <- as.numeric(tind[ind == unind[1L]]) 195 ## plot empty plot to provide frame 196 plot(NA, xlim = c(min(as.numeric(tind)), 197 max(as.numeric(tind))), 198 ylim = ylim, xlab = "", ylab = "", xaxt = "n", ...) 199 axis(1, at = as.numeric(unique(tind)), 200 labels = unique(tind)) 201 202 ## determine lwd and transparency level as a function 203 ## of n 204 if(transparency) { 205 alpha <- 5 / length(unind) 206 col <- heat.colors(1, alpha = alpha) 207 lwd <- length(unind) / 10 208 } 209 ## plot lines (notice: tind. are factors, so they 210 ## retain the correct labels which would be lost if 211 ## using as.numeric 212 for(i in 1:length(unind)) { 213 nxi <- nx[ind == unind[i]] 214 tindi <- tind[ind == unind[i]] 215 lines(x = tindi, y = scalefun(nxi), 216 col = col, lwd = lwd, ...) 217 } 218 }) 219} 220 221#' @rdname pseries 222#' @export 223summary.pseries <- function(object, ...) { 224 if(!inherits(object, c("factor", "logical", "character"))) { 225 index <- unclass(attr(object, "index")) # unclass for speed 226 id <- index[[1L]] 227 time <- index[[2L]] 228 Bid <- Between(object, na.rm = TRUE) 229 Btime <- Between(object, effect = "time", na.rm = TRUE) 230 ## res <- structure(c(total = sumsq(object), 231 ## between_id = sumsq(Bid), 232 ## between_time = sumsq(Btime)), 233 ## class = c("summary.pseries", "numeric")) 234 res <- structure(c(total = sum( (na.omit(object) - mean(object, na.rm = TRUE)) ^ 2), 235 between_id = sum( (na.omit(Bid) - mean(Bid, na.rm = TRUE)) ^ 2), 236 between_time = sum( (na.omit(Btime) - mean(Btime, na.rm = TRUE)) ^ 2)), 237 class = c("summary.pseries", "numeric")) 238 } else { 239 class(object) <- setdiff(class(object), c("pseries")) 240 res <- summary(object, ...) 241 class(res) <- c("summary.pseries", class(object), class(res)) 242 } 243 return(res) 244} 245 246#' @rdname pseries 247#' @export 248plot.summary.pseries <- function(x, ...){ 249 x <- as.numeric(x) 250 share <- x[-1L]/x[1L] # vec with length == 2 251 names(share) <- c("id", "time") 252 barplot(share, ...) 253} 254 255#' @rdname pseries 256#' @export 257print.summary.pseries <- function(x, ...){ 258 x.orig <- x 259 digits <- getOption("digits") 260 special_treatment_vars <- c("factor", "logical", "character") 261 if(!inherits(x, special_treatment_vars)) { 262 x <- as.numeric(x) 263 share <- x[-1L]/x[1L] # vec with length == 2 264 names(share) <- c("id", "time") 265 cat(paste("total sum of squares:", signif(x[1L], digits = digits),"\n")) 266 print.default(share, ...) 267 } else { 268 class(x) <- setdiff(class(x), c("summary.pseries", special_treatment_vars)) 269 print(x, ...) 270 } 271 invisible(x.orig) 272} 273 274 275Tapply <- function(x, ...) { 276 UseMethod("Tapply") 277} 278 279myave <- function(x, ...) { 280 UseMethod("myave") 281} 282 283Tapply.default <- function(x, effect, func, ...) { 284 # argument 'effect' is assumed to be a factor 285 na.x <- is.na(x) 286 uniqval <- tapply(x, effect, func, ...) 287 nms <- attr(uniqval, "dimnames")[[1L]] 288 attr(uniqval, "dimnames") <- attr(uniqval, "dim") <- NULL 289 names(uniqval) <- nms 290 result <- uniqval[as.character(effect)] 291 result[na.x] <- NA 292 return(result) 293} 294 295#' @importFrom stats ave 296myave.default <- function(x, effect, func, ...) { 297 # argument 'effect' is assumed to be a factor 298 na.x <- is.na(x) 299 res <- ave(x, effect, FUN = function(x) func(x, ...)) 300 names(res) <- as.character(effect) 301 res[na.x] <- NA 302 return(res) 303} 304 305Tapply.pseries <- function(x, effect = c("individual", "time", "group"), func, ...){ 306 effect <- match.arg(effect) 307 xindex <- unclass(attr(x, "index")) # unclass for speed 308 checkNA.index(xindex) # index may not contain any NA 309 effect <- switch(effect, 310 "individual"= xindex[[1L]], 311 "time" = xindex[[2L]], 312 "group" = xindex[[3L]] 313 ) 314 z <- as.numeric(x) 315 z <- Tapply.default(z, effect, func, ...) 316 attr(z, "index") <- attr(x, "index") # insert original index 317 class(z) <- c("pseries", class(z)) 318 return(z) 319} 320 321myave.pseries <- function(x, effect = c("individual", "time", "group"), func, ...) { 322 effect <- match.arg(effect) 323 xindex <- unclass(attr(x, "index")) # unclass for speed 324 checkNA.index(xindex) # index may not contain any NA 325 eff.fac <- switch(effect, 326 "individual"= xindex[[1L]], 327 "time" = xindex[[2L]], 328 "group" = xindex[[3L]] 329 ) 330 z <- as.numeric(x) 331 z <- myave.default(z, eff.fac, func, ...) 332 attr(z, "index") <- attr(x, "index") # insert original index 333 class(z) <- c("pseries", class(z)) 334 return(z) 335} 336 337Tapply.matrix <- function(x, effect, func, ...) { 338 # argument 'effect' is assumed to be a factor 339 na.x <- is.na(x) 340 uniqval <- apply(x, 2, tapply, effect, func, ...) 341 result <- uniqval[as.character(effect), , drop = FALSE] 342 result[na.x] <- NA_real_ 343 return(result) 344} 345 346myave.matrix <- function(x, effect, func, ...) { 347 # argument 'effect' is assumed to be a factor 348 na.x <- is.na(x) 349 result <- apply(x, 2, FUN = function(x) ave(x, effect, FUN = function(y) func(y, ...))) 350 rownames(result) <- as.character(effect) 351 result[na.x] <- NA_real_ 352 return(result) 353} 354 355## non-exported 356Mean <- function(x) matrix(.colMeans(x, nrow(x), ncol(x)), 357 nrow(x), ncol(x), byrow = TRUE) 358 359#' @rdname pseries 360#' @export 361Sum <- function(x, ...) { 362 UseMethod("Sum") 363} 364 365#' @rdname pseries 366#' @export 367Sum.default <- function(x, effect, ...) { 368# print("Sum.default(.baseR)") 369# browser() 370 371 # argument 'effect' is assumed to be a factor 372 if(!is.numeric(x)) stop("The Sum function only applies to numeric vectors") 373 # Tapply(x, effect, sum, ...) 374 return(myave(x, droplevels(effect), sum, ...)) 375} 376 377#' @rdname pseries 378#' @export 379Sum.pseries <- function(x, effect = c("individual", "time", "group"), ...) { 380# print("Sum.pseries(.baseR)") 381# browser() 382 383 effect <- match.arg(effect) 384 # Tapply(x, effect, sum, ...) 385 # myave.pseries takes care of checking the index for NAs 386 return(myave(x, effect, sum, ...)) 387} 388 389#' @rdname pseries 390#' @export 391Sum.matrix <- function(x, effect, ...) { 392# print("Sum.matrix(.baseR)") 393# browser() 394 395 # if no index attribute, argument 'effect' is assumed to be a factor 396 eff.fac <- if(is.null(xindex <- attr(x, "index"))) { 397 droplevels(effect) 398 } else { 399 if(!is.character(effect) && length(effect) > 1L) 400 stop("for matrices with index attributes, the effect argument must be a character") 401 if(! effect %in% c("individual", "time", "group")) 402 stop("irrelevant effect for a between transformation") 403 eff.no <- switch(effect, 404 "individual" = 1L, 405 "time" = 2L, 406 "group" = 3L, 407 stop("unknown value of argument 'effect'")) 408 xindex <- unclass(xindex) # unclass for speed 409 checkNA.index(xindex) # index may not contain any NA 410 xindex[[eff.no]] 411 } 412 return(myave(x, eff.fac, sum, ...)) 413} 414 415#' @rdname pseries 416#' @export 417Between <- function(x, ...) { 418 UseMethod("Between") 419} 420 421#' @rdname pseries 422#' @export 423Between.default <- function(x, effect, ...) { 424# print("Between.default(.baseR)") 425# browser() 426 427 # argument 'effect' is assumed to be a factor 428 if(!is.numeric(x)) stop("The Between function only applies to numeric vectors") 429 # Tapply(x, effect, mean, ...) 430 return(myave(x, droplevels(effect), mean, ...)) 431} 432 433#' @rdname pseries 434#' @export 435Between.pseries <- function(x, effect = c("individual", "time", "group"), ...) { 436# print("Between.pseries(.baseR)") 437# browser() 438 439 effect <- match.arg(effect) 440 # Tapply(x, effect = effect, mean, ...) 441 # myave.pseries takes care of checking the index for NAs 442 return(myave(x, effect = effect, mean, ...)) 443} 444 445#' @rdname pseries 446#' @export 447Between.matrix <- function(x, effect, ...) { 448# print("Between.matrix(.baseR)") 449# browser() 450 451 # if no index attribute, argument 'effect' is assumed to be a factor 452 eff.fac <- if(is.null(xindex <- attr(x, "index"))) { 453 droplevels(effect) 454 } else { 455 if(!is.character(effect) && length(effect) > 1L) 456 stop("for matrices with index attributes, the effect argument must be a character") 457 if(! effect %in% c("individual", "time", "group")) 458 stop("irrelevant effect for a between transformation") 459 eff.no <- switch(effect, 460 "individual" = 1L, 461 "time" = 2L, 462 "group" = 3L, 463 stop("unknown value of argument 'effect'")) 464 xindex <- unclass(xindex) 465 checkNA.index(xindex) # index may not contain any NA 466 xindex[[eff.no]] 467 } 468 return(myave.matrix(x, eff.fac, mean, ...)) 469} 470 471#' @rdname pseries 472#' @export 473between <- function(x, ...) { 474 UseMethod("between") 475} 476 477#' @rdname pseries 478#' @export 479between.default <- function(x, effect, ...) { 480# print("between.default(.baseR)") 481# browser() 482 483 # argument 'effect' is assumed to be a factor 484 if(!is.numeric(x)) stop("The between function only applies to numeric vectors") 485 486 # use tapply here as tapply's output is sorted by levels factor effect (unlike ave's output) 487 # difference is only relevant for between (small "b") as data is compressed down to # levels 488 res <- tapply(x, droplevels(effect), mean, ...) 489 nms <- attr(res, "dimnames")[[1L]] 490 attr(res, "dimnames") <- attr(res, "dim") <- NULL 491 names(res) <- nms 492 return(res) 493} 494 495#' @rdname pseries 496#' @export 497between.pseries <- function(x, effect = c("individual", "time", "group"), ...) { 498# print("between.pseries(.baseR)") 499# browser() 500 501 effect <- match.arg(effect) 502 xindex <- unclass(attr(x, "index")) # unclass for speed 503 checkNA.index(xindex) # index may not contain any NA 504 eff.fac <- switch(effect, 505 "individual" = xindex[[1L]], 506 "time" = xindex[[2L]], 507 "group" = xindex[[3L]], 508 ) 509 res <- between.default(x, effect = eff.fac, ...) 510 # data compressed by transformation, so pseries features, esp. index, do not make sense 511 res <- remove_pseries_features(res) 512 return(res) 513} 514 515#' @rdname pseries 516#' @export 517between.matrix <- function(x, effect, ...) { 518# print("between.matrix(.baseR)") 519# browser() 520 521 # if no index attribute, argument 'effect' is assumed to be a factor 522 eff.fac <- if(is.null(xindex <- attr(x, "index"))) { 523 droplevels(effect) 524 } else { 525 if(!is.character(effect) && length(effect) > 1L) 526 stop("for matrices with index attributes, the effect argument must be a character") 527 if(! effect %in% c("individual", "time", "group")) 528 stop("irrelevant effect for a between transformation") 529 eff.no <- switch(effect, 530 "individual" = 1L, 531 "time" = 2L, 532 "group" = 3L, 533 stop("unknown value of argument 'effect'")) 534 xindex <- unclass(xindex) # unclass for speed 535 checkNA.index(xindex) # index may not contain any NA 536 xindex[[eff.no]] 537 } 538 539 # use tapply here as tapply's output is sorted by levels factor effect (unlike ave's output) 540 # difference is only relevant for between (small "b") as data is compressed down to # levels 541 res <- apply(x, 2, tapply, eff.fac, mean, ...) 542 return(res) 543} 544 545#' @rdname pseries 546#' @export 547Within <- function(x, ...) { 548 UseMethod("Within") 549} 550 551#' @rdname pseries 552#' @export 553Within.default <- function(x, effect, ...) { 554# print("Within.default(.baseR)") 555# browser() 556 557 # arg 'effect' is assumed to be a factor 558 559 # NB: Contrary to the other Within.* methods, Within.default does not handle 560 # twoways effects 561 # TODO: could add support for twoways by supplying a list containing two factors 562 if(!is.numeric(x)) stop("the within function only applies to numeric vectors") 563 return(x - Between(x, droplevels(effect), ...)) 564} 565 566#' @rdname pseries 567#' @export 568Within.pseries <- function(x, effect = c("individual", "time", "group", "twoways"), ...) { 569# print("Within.pseries(.baseR)") 570# browser() 571 572 effect <- match.arg(effect) 573 xindex <- unclass(attr(x, "index")) # unclass for speed 574 checkNA.index(xindex) # index may not contain any NA 575 if(effect != "twoways") result <- x - Between(x, effect, ...) 576 else { 577 if(is.pbalanced(x)) result <- x - Between(x, "individual", ...) - Between(x, "time") + mean(x, ...) 578 else { 579 time <- xindex[[2L]] 580 Dmu <- model.matrix(~ time - 1) 581 attr(Dmu, "index") <- attr(x, "index") # need original index 582 W1 <- Within(x, "individual", ...) 583 WDmu <- Within(Dmu, "individual", ...) 584 W2 <- lm.fit(WDmu, x)$fitted.values 585 result <- W1 - W2 586 } 587 } 588 return(result) 589} 590 591#' @rdname pseries 592#' @export 593Within.matrix <- function(x, effect, rm.null = TRUE, ...) { 594# print("Within.matrix(.baseR)") 595# browser() 596 597 if(is.null(xindex <- unclass(attr(x, "index")))) { # unclass for speed 598 # non-index case 599 result <- Within.default(x, effect, ...) 600 othervar <- colSums(abs(x)) > sqrt(.Machine$double.eps) 601 if(rm.null) { 602 result <- result[ , othervar, drop = FALSE] 603 attr(result, "constant") <- character(0) 604 } 605 else attr(result, "constant") <- colnames(x)[! othervar] 606 return(result) 607 } 608 else { 609 # index case 610 if(effect %in% c("individual", "time", "group")) result <- x - Between(x, effect, ...) 611 if(effect == "twoways") { 612 checkNA.index(xindex) # index may not contain any NA 613 if(is.pbalanced(xindex[[1L]], xindex[[2L]])) { 614 result <- x - Between(x, "individual", ...) - Between(x, "time", ...) + 615 matrix(colMeans(x, ...), nrow = nrow(x), ncol = ncol(x), byrow = TRUE) 616 } 617 else { # unbalanced twoways 618 time <- xindex[[2L]] 619 Dmu <- model.matrix(~ time - 1) 620 attr(Dmu, "index") <- attr(x, "index") # need orig. index here 621 W1 <- Within(x, "individual", rm.null = FALSE, ...) 622 WDmu <- Within(Dmu, "individual", ...) 623 W2 <- lm.fit(WDmu, x)$fitted.values 624 result <- W1 - W2 625 } 626 } 627 } 628 return(result) 629} 630 631############### LAG and DIFF 632# 633# lag/lead/diff for pseries are a wrappers for lagt, leadt, difft (if shift = "time") and 634# for lagr, leadr, diffr (if shift = "row") 635# 636# The "t" and "r" methods are not exported (by intention). 637# 638# The "t" methods perform shifting while taking the time period into 639# account (they "look" at the value in the time dimension). 640# 641# The "r" methods perform shifting row-wise (without taking the value 642# in the time dimension into account). 643# 644# Generic needed only for lead (lag and diff generics are already included in base R) 645 646 647#' lag, lead, and diff for panel data 648#' 649#' lag, lead, and diff functions for class pseries. 650#' 651#' This set of functions perform lagging, leading (lagging in the 652#' opposite direction), and differencing operations on `pseries` 653#' objects, i. e., they take the panel structure of the data into 654#' account by performing the operations per individual. 655#' 656#' Argument `shift` controls the shifting of observations to be used 657#' by methods `lag`, `lead`, and `diff`: 658#' 659#' - `shift = "time"` (default): Methods respect the 660#' numerical value in the time dimension of the index. The time 661#' dimension needs to be interpretable as a sequence t, t+1, t+2, 662#' \ldots{} where t is an integer (from a technical viewpoint, 663#' `as.numeric(as.character(index(your_pdata.frame)[[2]]))` needs to 664#' result in a meaningful integer). 665#' 666#' - `shift = "row": `Methods perform the shifting operation based 667#' solely on the "physical position" of the observations, 668#' i.e., neighbouring rows are shifted per individual. The value in the 669#' time index is not relevant in this case. 670#' 671#' For consecutive time periods per individual, a switch of shifting 672#' behaviour results in no difference. Different return values will 673#' occur for non-consecutive time periods per individual 674#' ("holes in time"), see also Examples. 675#' 676#' @name lag.plm 677#' @aliases lag lead diff 678#' @importFrom stats lag 679#' @param x a `pseries` object, 680#' @param k an integer, the number of lags for the `lag` and `lead` 681#' methods (can also be negative). For the `lag` method, a 682#' positive (negative) `k` gives lagged (leading) values. For the 683#' `lead` method, a positive (negative) `k` gives leading (lagged) 684#' values, thus, `lag(x, k = -1L)` yields the same as `lead(x, k = 1L)`. 685#' If `k` is an integer with length > 1 (`k = c(k1, k2, ...)`), a 686#' `matrix` with multiple lagged `pseries` is returned, 687#' @param lag integer, the number of lags for the `diff` method, can also be of 688#' length > 1 (see argument `k`) (only non--negative values in 689#' argument `lag` are allowed for `diff`), 690#' @param shift character, either `"time"` (default) or `"row"` 691#' determining how the shifting in the `lag`/`lead`/`diff` 692#' functions is performed (see Details and Examples). 693#' @param ... further arguments (currently none evaluated). 694#' @return 695#' 696#' - An object of class `pseries`, if the argument specifying the lag 697#' has length 1 (argument `k` in functions `lag` and `lead`, 698#' argument `lag` in function `diff`). 699#' 700#' - A matrix containing the various series in its columns, if the 701#' argument specifying the lag has length > 1. 702#' 703#' @note The sign of `k` in `lag.pseries` results in inverse behaviour 704#' compared to [stats::lag()] and [zoo::lag.zoo()]. 705#' @author Yves Croissant and Kevin Tappe 706#' @seealso To check if the time periods are consecutive per 707#' individual, see [is.pconsecutive()]. 708#' 709#' For further function for 'pseries' objects: [between()], 710#' [Between()], [Within()], [summary.pseries()], 711#' [print.summary.pseries()], [as.matrix.pseries()]. 712#' @keywords classes 713#' @examples 714#' 715#' # First, create a pdata.frame 716#' data("EmplUK", package = "plm") 717#' Em <- pdata.frame(EmplUK) 718#' 719#' # Then extract a series, which becomes additionally a pseries 720#' z <- Em$output 721#' class(z) 722#' 723#' # compute the first and third lag, and the difference lagged twice 724#' lag(z) 725#' lag(z, 3L) 726#' diff(z, 2L) 727#' 728#' # compute negative lags (= leading values) 729#' lag(z, -1L) 730#' lead(z, 1L) # same as line above 731#' identical(lead(z, 1L), lag(z, -1L)) # TRUE 732#' 733#' # compute more than one lag and diff at once (matrix returned) 734#' lag(z, c(1L,2L)) 735#' diff(z, c(1L,2L)) 736#' 737#' ## demonstrate behaviour of shift = "time" vs. shift = "row" 738#' # delete 2nd time period for first individual (1978 is missing (not NA)): 739#' Em_hole <- Em[-2L, ] 740#' is.pconsecutive(Em_hole) # check: non-consecutive for 1st individual now 741#' 742#' # original non-consecutive data: 743#' head(Em_hole$emp, 10) 744#' # for shift = "time", 1-1979 contains the value of former 1-1977 (2 periods lagged): 745#' head(lag(Em_hole$emp, k = 2L, shift = "time"), 10L) 746#' # for shift = "row", 1-1979 contains NA (2 rows lagged (and no entry for 1976): 747#' head(lag(Em_hole$emp, k = 2L, shift = "row"), 10L) 748#' 749NULL 750 751#' @rdname lag.plm 752#' @export 753lead <- function(x, k = 1L, ...) { 754 UseMethod("lead") 755} 756 757#' @rdname lag.plm 758#' @exportS3Method 759#' @export lag 760lag.pseries <- function(x, k = 1L, shift = c("time", "row"), ...) { 761 shift <- match.arg(shift) 762 res <- if(shift == "time") lagt.pseries(x = x, k = k, ...) else lagr.pseries(x = x, k = k, ...) 763 return(res) 764} 765 766#' @rdname lag.plm 767#' @export 768lead.pseries <- function(x, k = 1L, shift = c("time", "row"), ...) { 769 shift <- match.arg(shift) 770 res <- if(shift == "time") leadt.pseries(x = x, k = k, ...) else leadr.pseries(x = x, k = k, ...) 771 return(res) 772} 773 774#' @rdname lag.plm 775#' @exportS3Method 776diff.pseries <- function(x, lag = 1L, shift = c("time", "row"), ...) { 777 shift <- match.arg(shift) 778 res <- if(shift == "time") difft.pseries(x = x, lag = lag, ...) else diffr.pseries(x = x, lag = lag, ...) 779 return(res) 780} 781 782## lagt.pseries lagging taking the time variable into account 783lagt.pseries <- function(x, k = 1L, ...) { 784 index <- unclass(attr(x, "index")) # unclass for speed 785 id <- index[[1L]] 786 time <- index[[2L]] 787 788 if(length(k) > 1L) { 789 rval <- sapply(k, function(i) alagt(x, i)) 790 colnames(rval) <- k 791 } 792 else { 793 rval <- alagt(x, k) 794 } 795 return(rval) 796} 797 798## leadt.pseries(x, k) is a wrapper for lagt.pseries(x, -k) 799leadt.pseries <- function(x, k = 1L, ...) { 800 ret <- lagt.pseries(x, k = -k) 801 if(length(k) > 1L) colnames(ret) <- k 802 return(ret) 803} 804 805## difft: diff-ing taking the time variable into account 806difft.pseries <- function(x, lag = 1L, ...){ 807 ## copied/adapted from diffr.pseries except lines which use lagt() ("t") instead of lagr() ("r") 808 islogi <- is.logical(x) 809 if(! (is.numeric(x) || islogi)) stop("diff is only relevant for numeric or logical series") 810 811 non.int <- vapply(lag, function(l) round(l) != l, FUN.VALUE = TRUE, USE.NAMES = FALSE) 812 if(any(non.int)) stop("Lagging value(s) in 'lag' must be whole-numbered (and non-negative)") 813 814 # prevent input of negative values, because it will most likely confuse users 815 # what difft would do in this case 816 neg <- vapply(lag, function(l) l < 0L, FUN.VALUE = TRUE, USE.NAMES = FALSE) 817 if(any(neg)) stop("diff is only relevant for non-negative values in 'lag'") 818 819 lagtx <- lagt.pseries(x, k = lag) # use "time-based" lagging for difft 820 821 if(is.matrix(lagtx)) { 822 # if 'lagtx' is matrix (case length(lag) > 1): 823 # perform subtraction without pseries feature of 'x', because otherwise 824 # the result would be c("pseries", "matrix") which is not supported 825 res <- as.numeric(x) - lagtx 826 } else { 827 res <- x - lagtx 828 } 829 830 return(res) 831} 832 833## alagt: non-exported helper function for lagt (actual work horse), 834## performs shifting of observations while respecting the time dimension 835alagt <- function(x, ak) { 836 if(round(ak) != ak) stop("Lagging value 'k' must be whole-numbered (positive, negative or zero)") 837 if(ak != 0) { 838 index <- unclass(attr(x, "index")) # unclass for speed 839 id <- index[[1L]] 840 time <- index[[2L]] 841 842 # Idea: split times in blocks per individuals and do lagging there 843 # by computation of correct time shifting 844 845 # need to convert to numeric, do this by coercing to character 846 # first (otherwise wrong results!) 847 # see R FAQ 7.10 for coercing factors to numeric: 848 # as.numeric(levels(factor_var))[as.integer(factor_var)] is 849 # more efficient than 850 # as.numeric(as.character(factor_var)) 851 852 # YC 2019/08/29 only works if time values can be coerced to 853 ## numeric, ie integers like years. When year is period (ie 5 years), 854 ## values used to be 1950 for the 1950-54 period, time is now a 855 ## factor in the original data.frame with levels "1950-54", 856 ## "1955-59", ... In this case coercing the levels to a numeric gives 857 ## NA so coerce the *factor* to a numeric. 858 859 levtime <- levels(time) 860 numlevtime <- suppressWarnings(as.numeric(levtime)) 861 if(! anyNA(numlevtime)) time <- as.numeric(levels(time))[as.integer(time)] 862 else time <- as.numeric(time) 863 864 list_id_timevar <- split(time, id, drop = TRUE) 865 866 index_lag_ak_all_list <- sapply(list_id_timevar, 867 function(x) match(x - ak, x, incomparables = NA), 868 simplify = FALSE, USE.NAMES = FALSE) 869 870 # translate block-wise positions to positions in full vector 871 index_lag_ak_all <- unlist(index_lag_ak_all_list, use.names = FALSE) 872 873 NApos <- is.na(index_lag_ak_all) # save NA positions for later 874 substitute_blockwise <- index_lag_ak_all 875 876 block_lengths <- vapply(index_lag_ak_all_list, length, FUN.VALUE = 0.0, USE.NAMES = FALSE) 877 878 # not needed but leave here for illustration: 879 # startpos_block <- cumsum(block_lengths) - block_lengths + 1 880 # endpos_block <- startpos_block + block_lengths - 1 881 882 indexes_blockwise <- unlist(sapply(block_lengths, function(x) seq(from = 1, to = x), simplify = FALSE), use.names = FALSE) 883 884 orig_pos_x <- seq.int(x) # make vector with indexes for original input 885 new_pos <- orig_pos_x - (indexes_blockwise - substitute_blockwise) # calc. new positions 886 new_pos[NApos] <- orig_pos_x[NApos] # fill NAs with arbitrary values to allow proper subsetting in next step 887 888 orig_attr <- attributes(x) 889 x <- x[new_pos] # re-arrange according to lagging 890 x[NApos] <- NA # set NAs where necessary 891 attributes(x) <- orig_attr # restore original names and 'pseries' class (lost by subsetting x) 892 } 893 return(x) 894} # END alagt 895 896 897## lagr: lagging row-wise 898lagr.pseries <- function(x, k = 1L, ...) { 899 index <- unclass(attr(x, "index")) # unclass for speed 900 id <- index[[1L]] 901 time <- index[[2L]] 902 903 # catch the case when an index of pdata.frame shall be lagged 904 # (index variables are always factors) NB: this catches - 905 # unintentionally - also the case when a factor variable is the 906 # same "on the character level" as one of the corresponding index 907 # variables but not the index variable itself 908 # 909 # -> shall we prevent lagging of index variables at all? -> turned 910 # off for now, 2016-03-03 if(is.factor(x)) if 911 # (all(as.character(x) == as.character(id)) | 912 # all(as.character(x)==as.character(time))) stop("Lagged vector 913 # cannot be index.") 914 915 alagr <- function(x, ak){ 916 if(round(ak) != ak) stop("Lagging value 'k' must be whole-numbered (positive, negative or zero)") 917 if(ak > 0L) { 918 919 # NB: this code does row-wise shifting 920 # delete first ak observations for each unit 921 isNAtime <- c(rep(TRUE, ak), (diff(as.numeric(time), lag = ak) != ak)) 922 isNAid <- c(rep(TRUE, ak), (diff(as.numeric(id), lag = ak) != 0L)) 923 isNA <- (isNAtime | isNAid) 924 925 result <- x # copy x first ... 926 result[1:ak] <- NA # ... then make first ak obs NA ... 927 result[(ak+1):length(result)] <- x[1:(length(x)-ak)] # ... shift and ... 928 result[isNA] <- NA # ... make more NAs in between: this way, we keep: all factor levels, names, classes 929 930 } else if(ak < 0L) { # => compute leading values 931 932 # delete last |ak| observations for each unit 933 num_time <- as.numeric(time) 934 num_id <- as.numeric(id) 935 isNAtime <- c(c((num_time[1:(length(num_time)+ak)] - num_time[(-ak+1):length(num_time)]) != ak), rep(TRUE, -ak)) 936 isNAid <- c(c((num_id[1:(length(num_id)+ak)] - num_id[(-ak+1):length(num_id)]) != 0L), rep(TRUE, -ak)) 937 isNA <- (isNAtime | isNAid) 938 939 result <- x # copy x first ... 940 result[(length(result)+ak+1):length(result)] <- NA # ... then make last |ak| obs NA ... 941 result[1:(length(result)+ak)] <- x[(1-ak):(length(x))] # ... shift and ... 942 result[isNA] <- NA # ... make more NAs in between: this way, we keep: all factor levels, names, classes 943 944 } else { # ak == 0 => nothing to do, return original pseries (no lagging/no leading) 945 result <- x 946 } 947 948 return(result) 949 } # END function alagr 950 951 if(length(k) > 1L) { 952 rval <- sapply(k, function(i) alagr(x, i)) 953 colnames(rval) <- k 954 } 955 else { 956 rval <- alagr(x, k) 957 } 958 return(rval) 959} 960 961 962# leadr.pseries(x, k) is a wrapper for lagr.pseries(x, -k) 963leadr.pseries <- function(x, k = 1L, ...) { 964 ret <- lagr.pseries(x, k = -k) 965 if(length(k) > 1L) colnames(ret) <- k 966 return(ret) 967} 968 969## diffr: lagging row-wise 970diffr.pseries <- function(x, lag = 1L, ...) { 971 islogi <- is.logical(x) 972 if(! (is.numeric(x) || islogi)) stop("diff is only relevant for numeric or logical series") 973 974 non.int <- vapply(lag, function(l) round(l) != l, FUN.VALUE = TRUE, USE.NAMES = FALSE) 975 if(any(non.int)) stop("Lagging value(s) in 'lag' must be whole-numbered (and non-negative)") 976 977 # prevent input of negative values, because it will most likely confuse users 978 # what diff would do in this case 979 neg <- vapply(lag, function(l) l < 0L, FUN.VALUE = TRUE, USE.NAMES = FALSE) 980 if(any(neg)) stop("diff is only relevant for non-negative values in 'lag'") 981 982 lagrx <- lagr.pseries(x, k = lag) 983 984 if(is.matrix(lagrx)) { 985 # if 'lagrx' is matrix (case length(lag) > 1): 986 # perform subtraction without pseries feature of 'x', because otherwise 987 # the result would be c("pseries", "matrix") which is not supported 988 res <- as.numeric(x) - lagrx 989 } else { 990 res <- x - lagrx 991 } 992 return(res) 993} 994 995## pdiff is (only) used in model.matrix.pFormula to calculate the 996## model.matrix for FD models, works for effect = "individual" only, 997## see model.matrix on how to call pdiff. Result is in order (id, 998## time) for both effects 999## 1000## Performs row-wise shifting 1001pdiff <- function(x, effect = c("individual", "time"), has.intercept = FALSE){ 1002 # NB: x is assumed to have an index attribute, e.g., a pseries 1003 # can check with has.index(x) 1004 effect <- match.arg(effect) 1005 cond <- as.numeric(unclass(attr(x, "index"))[[1L]]) # unclass for speed 1006 n <- if(is.matrix(x)) nrow(x) else length(x) 1007 cond <- c(NA, cond[2:n] - cond[1:(n-1)]) # this assumes a certain ordering 1008 cond[cond != 0] <- NA 1009 if(! is.matrix(x)){ 1010 result <- c(NA , x[2:n] - x[1:(n-1)]) 1011 result[is.na(cond)] <- NA 1012 result <- na.omit(result) 1013 } 1014 else{ 1015 result <- rbind(NA, x[2:n, , drop = FALSE] - x[1:(n-1), , drop = FALSE]) 1016 result[is.na(cond), ] <- NA 1017 result <- na.omit(result) 1018 result <- result[ , apply(result, 2, var) > 1E-12, drop = FALSE] 1019 if(has.intercept){ 1020 result <- cbind(1, result) 1021 colnames(result)[1L] <- "(Intercept)" 1022 } 1023 } 1024 attr(result, "na.action") <- NULL 1025 result 1026} 1027 1028