1## Structural changes made to plm's original data transformation functions 2## need to be mimicked in the *.collapse(.*) versions and vice versa. 3 4## 1) Give the base-R version of the functions defined in tool_transformations.R 5## a new name (*.baseR). 6## 2) Implement wrapper switched which call the *.baseR or *.collapse versions 7## based on the option plm.fast (a logical, can be set via R's regular option 8## mechanism: options("plm.fast" = TRUE). 9 10## ad 1) new name for base R functions defined in tool_transformations.R 11Sum.default.baseR <- plm:::Sum.default 12Sum.pseries.baseR <- plm:::Sum.pseries 13Sum.matrix.baseR <- plm:::Sum.matrix 14 15between.default.baseR <- plm:::between.default 16between.pseries.baseR <- plm:::between.pseries 17between.matrix.baseR <- plm:::between.matrix 18 19Between.default.baseR <- plm:::Between.default 20Between.pseries.baseR <- plm:::Between.pseries 21Between.matrix.baseR <- plm:::Between.matrix 22 23Within.default.baseR <- plm:::Within.default 24Within.pseries.baseR <- plm:::Within.pseries 25Within.matrix.baseR <- plm:::Within.matrix 26 27pseriesfy.baseR <- plm:::pseriesfy # ... in tool_pdata.frame.R: 28 29 30## ad 2) implement wrapper switches 31 32#### Sum wrapper switches #### 33Sum.default <- function(x, effect, ...) { 34 if(!isTRUE(getOption("plm.fast"))) { 35 Sum.default.baseR(x, effect, ...) } else { 36 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 37 Sum.default.collapse(x, effect, ...) } 38} 39 40Sum.pseries <- function(x, effect = c("individual", "time", "group"), ...) { 41 if(!isTRUE(getOption("plm.fast"))) { 42 Sum.pseries.baseR(x, effect, ...) } else { 43 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 44 Sum.pseries.collapse(x, effect, ...) } 45} 46 47Sum.matrix <- function(x, effect, ...) { 48 if(!isTRUE(getOption("plm.fast"))) { 49 Sum.matrix.baseR(x, effect, ...) } else { 50 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 51 Sum.matrix.collapse(x, effect, ...) } 52} 53 54#### Between wrapper switches #### 55Between.default <- function(x, effect, ...) { 56 if(!isTRUE(getOption("plm.fast"))) { 57 Between.default.baseR(x, effect, ...) } else { 58 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 59 Between.default.collapse(x, effect, ...) } 60} 61 62Between.pseries <- function(x, effect = c("individual", "time", "group"), ...) { 63 if(!isTRUE(getOption("plm.fast"))) { 64 Between.pseries.baseR(x, effect, ...) } else { 65 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 66 Between.pseries.collapse(x, effect, ...) } 67} 68 69Between.matrix <- function(x, effect, ...) { 70 if(!isTRUE(getOption("plm.fast"))) { 71 Between.matrix.baseR(x, effect, ...) } else { 72 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 73 Between.matrix.collapse(x, effect, ...) } 74} 75 76#### between wrapper switches #### 77between.default <- function(x, effect, ...) { 78 if(!isTRUE(getOption("plm.fast"))) { 79 between.default.baseR(x, effect, ...) } else { 80 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 81 between.default.collapse(x, effect, ...) } 82} 83 84between.pseries <- function(x, effect = c("individual", "time", "group"), ...) { 85 if(!isTRUE(getOption("plm.fast"))) { 86 between.pseries.baseR(x, effect, ...) } else { 87 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 88 between.pseries.collapse(x, effect, ...) } 89} 90 91between.matrix <- function(x, effect, ...) { 92 if(!isTRUE(getOption("plm.fast"))) { 93 between.matrix.baseR(x, effect, ...) } else { 94 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 95 between.matrix.collapse(x, effect, ...) } 96} 97 98#### Within wrapper switches #### 99Within.default <- function(x, effect, ...) { 100 if(!isTRUE(getOption("plm.fast"))) { 101 Within.default.baseR(x, effect, ...) } else { 102 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 103 Within.default.collapse(x, effect, ...) } 104} 105 106Within.pseries <- function(x, effect = c("individual", "time", "group", "twoways"), ...) { 107 if(!isTRUE(getOption("plm.fast"))) { 108 Within.pseries.baseR(x, effect, ...) 109 } else { 110 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 111 112 if(is.null(getOption("plm.fast.pkg.FE.tw"))) options("plm.fast.pkg.FE.tw" = "collapse") 113 switch(getOption("plm.fast.pkg.FE.tw"), 114 "collapse" = Within.pseries.collapse(x, effect, ...), # collapse only, 115 "fixest" = Within.pseries.collapse.fixest(x, effect, ...), # collapse for 1-way FE + fixest for 2-way FE, 116 "lfe" = Within.pseries.collapse.lfe(x, effect, ...), # collapse for 1-way FE + lfe for 2-way FE, 117 stop("unknown value of option 'plm.fast.pkg.FE.tw'")) 118 } 119} 120 121Within.matrix <- function(x, effect, rm.null = TRUE, ...) { 122 if(!isTRUE(getOption("plm.fast"))) { 123 Within.matrix.baseR(x, effect, ...) 124 } else { 125 if (!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 126 127 if(is.null(getOption("plm.fast.pkg.FE.tw"))) options("plm.fast.pkg.FE.tw" = "collapse") 128 switch(getOption("plm.fast.pkg.FE.tw"), 129 "collapse" = Within.matrix.collapse(x, effect, ...), # collapse only, 130 "fixest" = Within.matrix.collapse.fixest(x, effect, ...), # collapse for 1-way FE + fixest for 2-way FE, 131 "lfe" = Within.matrix.collapse.lfe(x, effect, ...), # collapse for 1-way FE + lfe for 2-way FE, 132 stop("unknown value of option 'plm.fast.pkg.FE.tw'")) 133 } 134} 135 136 137#### Sum #### 138 139Sum.default.collapse <- function(x, effect, ...) { 140# print("Sum.default.collapse") 141# browser() 142 # argument 'effect' is assumed to be a factor 143 144 if(!is.numeric(x)) stop("The Sum function only applies to numeric vectors") 145 # check for presence of na.rm in dots, if not present set to FALSE 146 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 147 res <- collapse::fsum(x, g = effect, w = NULL, na.rm = na.rm, TRA = "replace") 148 names(res) <- as.character(effect) 149 return(res) 150} 151 152Sum.pseries.collapse <- function(x, effect = c("individual", "time", "group"), ...) { 153# print("Sum.pseries.collapse") 154# browser() 155 effect <- match.arg(effect) 156 # check for presence of na.rm in dots, if not present set to FALSE 157 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 158 eff.no <- switch(effect, 159 "individual" = 1L, 160 "time" = 2L, 161 "group" = 3L, 162 stop("unknown value of argument 'effect'")) 163 xindex <- unclass(attr(x, "index")) # unclass for speed 164 checkNA.index(xindex) # index may not contain any NA 165 eff.fac <- xindex[[eff.no]] 166 res <- collapse::fsum(x, g = eff.fac, w = NULL, na.rm = na.rm, TRA = "replace") 167 names(res) <- as.character(eff.fac) 168 res <- add_pseries_features(res, attr(x, "index")) 169 return(res) 170} 171 172Sum.matrix.collapse <- function(x, effect, ...) { 173# print("Sum.matrix.collapse") 174# browser() 175 # if no index attribute, argument 'effect' is assumed to be a factor 176 eff.fac <- if(is.null(xindex <- attr(x, "index"))) { 177 effect 178 } else { 179 if(!is.character(effect) && length(effect) > 1L) 180 stop("for matrices with index attributes, the effect argument must be a character") 181 if(! effect %in% c("individual", "time", "group")) 182 stop("irrelevant effect for a Sum transformation") 183 eff.no <- switch(effect, 184 "individual" = 1L, 185 "time" = 2L, 186 "group" = 3L, 187 stop("unknown value of argument 'effect'")) 188 xindex <- unclass(xindex) # unclass for speed 189 checkNA.index(xindex) # index may not contain any NA 190 xindex[[eff.no]] 191 } 192 # check for presence of na.rm in dots, if not present set to FALSE 193 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 194 res <- collapse::fsum(x, g = eff.fac, w = NULL, na.rm = na.rm, drop = FALSE, TRA = "replace") 195 rownames(res) <- as.character(eff.fac) 196 attr(res, "index") <- NULL 197 return(res) 198} 199 200#### B/between #### 201 202# Need separate implementations of Between.pseries and between.pseries due to different NA handling 203 204Between.default.collapse <- function(x, effect, ...) { 205# print("Between.default.collapse") 206# browser() 207 208 # argument 'effect' is assumed to be a factor 209 if(!is.numeric(x)) stop("The Between function only applies to numeric vectors") 210 # check for presence of na.rm in dots, if not present set to FALSE 211 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 212 nms <- as.character(effect) 213 res <- collapse::fbetween(x, g = effect, w = NULL, na.rm = na.rm) 214 names(res) <- nms 215 return(res) 216} 217 218between.default.collapse <- function(x, effect, ...) { 219# print("between.default.collapse") 220# browser() 221 222 # argument 'effect' is assumed to be a factor 223 if(!is.numeric(x)) stop("The Between function only applies to numeric vectors") 224 # check for presence of na.rm in dots, if not present set to FALSE 225 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 226 res <- collapse::fbetween(x, g = effect, w = NULL, na.rm = na.rm, fill = TRUE) 227 keep <- !duplicated(effect) 228 res <- res[keep] 229 names(res) <- as.character(effect[keep]) 230 # bring into factor level order (not order as appears in orig. data) 231 lvl <- levels(collapse::fdroplevels(effect)) 232 res <- res[lvl] 233 return(res) 234} 235 236Between.pseries.collapse <- function(x, effect = c("individual", "time", "group"), ...) { 237# print("Between.pseries.collapse") 238# browser() 239 240 # translate arguments 241 effect <- match.arg(effect) 242 # check for presence of na.rm in dots, if not present set to FALSE 243 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 244 eff.no <- switch(effect, 245 "individual" = 1L, 246 "time" = 2L, 247 "group" = 3L, 248 stop("unknown value of argument 'effect'")) 249 250 xindex <- unclass(attr(x, "index")) # unclass for speed 251 checkNA.index(xindex) # index may not contain any NA 252 nms <- as.character(xindex[[eff.no]]) 253 na.x <- is.na(x) 254 # must be fill = TRUE [to catch case when 1 obs of an individual is NA (otherwise result could contain non-intended NA)] 255 res <- collapse::fbetween(x, effect = eff.no, w = NULL, na.rm = na.rm, fill = TRUE) 256 names(res) <- nms 257 res[na.x] <- NA 258 return(res) 259} 260 261between.pseries.collapse <- function(x, effect = c("individual", "time", "group"), ...) { 262# print("between.pseries.collapse") 263# browser() 264 effect <- match.arg(effect) 265 # check for presence of na.rm in dots, if not present set to FALSE 266 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 267 eff.no <- switch(effect, 268 "individual" = 1L, 269 "time" = 2L, 270 "group" = 3L, 271 stop("unknown value of argument 'effect'")) 272 273 xindex <- unclass(attr(x, "index")) # unclass for speed 274 checkNA.index(xindex) # index may not contain any NA 275 i <- xindex[[eff.no]] 276 # must be fill = TRUE [to catch case when 1 obs of an individual is NA 277 # (otherwise result could contain non-intended NA)] 278 res <- collapse::fbetween(x, effect = eff.no, w = NULL, na.rm = na.rm, fill = TRUE) 279 res <- remove_pseries_features(res) 280 keep <- !duplicated(i) 281 res <- res[keep] 282 names(res) <- as.character(i[keep]) 283 # bring into factor level order (not order as appears in orig. data) 284 lvl <- levels(collapse::fdroplevels(i)) 285 res <- res[lvl] 286 return(res) 287} 288 289 290 291Between.matrix.collapse <- function(x, effect, ...) { 292# print("Between.matrix.collapse") 293# browser() 294 # if no index attribute, argument 'effect' is assumed to be a factor 295 eff.fac <- if(is.null(xindex <- attr(x, "index"))) { 296 effect 297 } else { 298 if(!is.character(effect) && length(effect) > 1L) 299 stop("for matrices with index attributes, the effect argument must be a character") 300 if(! effect %in% c("individual", "time", "group")) 301 stop("irrelevant effect for a between transformation") 302 eff.no <- switch(effect, 303 "individual" = 1L, 304 "time" = 2L, 305 "group" = 3L, 306 stop("unknown value of argument 'effect'")) 307 xindex <- unclass(xindex) # unclass for speed 308 checkNA.index(xindex) # index may not contain any NA 309 xindex[[eff.no]] 310 } 311 # check for presence of na.rm in dots, if not present set to FALSE 312 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 313 na.x <- is.na(x) 314 res <- collapse::fbetween(x, g = eff.fac, w = NULL, na.rm = na.rm, fill = TRUE) 315 attr(res, "index") <- NULL 316 rownames(res) <- as.character(eff.fac) 317 res[na.x] <- NA 318 return(res) 319} 320 321between.matrix.collapse <- function(x, effect, ...) { 322# print("between.matrix.collapse") 323# browser() 324 # if no index attribute, argument 'effect' is assumed to be a factor 325 eff.fac <- if(is.null(xindex <- attr(x, "index"))) { 326 effect 327 } else { 328 if(!is.character(effect) && length(effect) > 1L) 329 stop("for matrices with index attributes, the effect argument must be a character") 330 if(! effect %in% c("individual", "time", "group")) 331 stop("irrelevant effect for a between transformation") 332 eff.no <- switch(effect, 333 "individual" = 1L, 334 "time" = 2L, 335 "group" = 3L, 336 stop("unknown value of argument 'effect'")) 337 xindex <- unclass(xindex) # unclass for speed 338 checkNA.index(xindex) # index may not contain any NA 339 xindex[[eff.no]] 340 } 341 # check for presence of na.rm in dots, if not present set to FALSE 342 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 343 res <- collapse::fbetween(x, g = eff.fac, w = NULL, na.rm = na.rm, fill = TRUE) 344 rownames(res) <- as.character(eff.fac) 345 # compress data to number of unique individuals (or time periods) 346 res <- res[!duplicated(eff.fac), , drop = FALSE] 347 # bring into factor level order (not order as appears in orig. data) 348 lvl <- levels(collapse::fdroplevels(eff.fac)) 349 res <- res[lvl, , drop = FALSE] 350 return(res) 351} 352 353 354#### Within #### 355# Within - default 356 357Within.default.collapse <- function(x, effect, ...) { 358# print("Within.default.collapse") 359# browser() 360 361 # argument 'effect' is assumed to be a factor 362 if(!is.numeric(x)) stop("the within function only applies to numeric vectors") 363 # check for presence of na.rm in dots, if not present set to FALSE 364 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 365 res <- collapse::fwithin(x, g = effect, w = NULL, na.rm = na.rm) 366 # =(plm)= res <- x - Between(x, effect, ...) 367 names(res) <- as.character(effect) 368 return(res) 369} 370 371 372Within.pseries.collapse <- function(x, effect = c("individual", "time", "group", "twoways"), ...) { 373# print("Within.pseries.collapse") 374# browser() 375 effect <- match.arg(effect) 376 # check for presence of na.rm in dots, if not present set to FALSE 377 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 378 xindex <- unclass(attr(x, "index")) # unclass for speed 379 checkNA.index(xindex) # index may not contain any NA 380 if(effect != "twoways") { 381 eff.no <- switch(effect, 382 "individual" = 1L, 383 "time" = 2L, 384 "group" = 3L, 385 stop("unknown value of argument 'effect'")) 386 res <- collapse::fwithin(x, effect = eff.no, w = NULL, na.rm = na.rm, mean = 0) 387 } else { 388 eff.ind.fac <- xindex[[1L]] 389 eff.time.fac <- xindex[[2L]] 390 if(is.pbalanced(eff.ind.fac, eff.time.fac)) { 391 # effect = "twoways" - balanced 392 res <- collapse::fwithin( x, effect = 1L, w = NULL, na.rm = na.rm, mean = "overall.mean") - 393 collapse::fbetween(x, effect = 2L, w = NULL, na.rm = na.rm, fill = TRUE) 394 # =(plm)= res <- x - Between(x, "individual", ...) - Between(x, "time", ...) + mean(x, ...) 395 } else { 396 # effect = "twoways" - unbalanced 397 Dmu <- model.matrix(~ eff.time.fac - 1) 398 W1 <- collapse::fwithin(x, effect = 1L, w = NULL, na.rm = na.rm, mean = 0) # pseries interface 399 WDmu <- collapse::fwithin(Dmu, g = eff.ind.fac, w = NULL, na.rm = na.rm, mean = 0) # matrix interface 400 W2 <- lm.fit(WDmu, x)$fitted.values 401 res <- W1 - W2 402 } 403 } 404 return(res) 405} 406 407Within.matrix.collapse <- function(x, effect, rm.null = TRUE, ...) { 408# print("Within.matrix.collapse") 409# browser() 410 411 412 if(is.null(xindex <- attr(x, "index"))) { 413 # non-index case, 'effect' needs to be a factor 414 result <- Within.default(x, effect, ...) 415 othervar <- colSums(abs(x)) > sqrt(.Machine$double.eps) 416 if(rm.null) { 417 result <- result[ , othervar, drop = FALSE] 418 attr(result, "constant") <- character(0) 419 } 420 else attr(result, "constant") <- colnames(x)[! othervar] 421 return(result) 422 } 423 else { 424 # index case 425 xindex <- unclass(xindex) # unclass for speed 426 checkNA.index(xindex) # index may not contain any NA 427 428 # check for presence of na.rm in dots, if not present set to FALSE 429 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 430 431 if(effect != "twoways") { 432 eff.fac <- switch(effect, 433 "individual" = xindex[[1L]], 434 "time" = xindex[[2L]], 435 "group" = xindex[[3L]], 436 stop("unknown value of argument 'effect'")) 437 438 result <- collapse::fwithin(x, g = eff.fac, w = NULL, na.rm = na.rm, mean = 0) 439 # =(plm)= result <- x - Between(x, effect) 440 441 } else { 442 # effect = "twoways" 443 eff.ind.fac <- xindex[[1L]] 444 eff.time.fac <- xindex[[2L]] 445 if(is.pbalanced(eff.ind.fac, eff.time.fac)) { 446 # balanced twoways 447 result <- collapse::fwithin( x, g = eff.ind.fac, w = NULL, na.rm = na.rm, mean = "overall.mean") - 448 collapse::fbetween(x, g = eff.time.fac, w = NULL, na.rm = na.rm, fill = TRUE) 449 # =(plm)= result <- x - Between(x, "individual", ...) - Between(x, "time", ...) + 450 # matrix(colMeans(x, ...), nrow = nrow(x), ncol = ncol(x), byrow = TRUE) 451 } 452 else { # unbalanced twoways 453 # as factor is used twice below, make it a collapse::GRP object -> should give some speed-up 454 eff.ind.fac <- collapse::GRP(eff.ind.fac, group.sizes = FALSE, return.groups = FALSE, call = FALSE) 455 Dmu <- model.matrix(~ eff.time.fac - 1) 456 W1 <- collapse::fwithin(x, g = eff.ind.fac, w = NULL, na.rm = na.rm, mean = 0) 457 WDmu <- collapse::fwithin(Dmu, g = eff.ind.fac, w = NULL, na.rm = na.rm, mean = 0) 458 W2 <- lm.fit(WDmu, x)$fitted.values 459 result <- W1 - W2 460 } 461 } 462 } 463 return(result) 464} 465 466#### These functions use collpase::fhdwithin (using internally fixest::demean) 467#### or lfe::demeanlist respectively, for 468#### the 2-way within transformation which are dramatically faster than 469#### the implementation via separate collapse::fwithin calls (due to the special 470#### algorithms used to partial out the fixed effects) 471Within.pseries.collapse.fixest <- function(x, effect = c("individual", "time", "group", "twoways"), ...) { 472# print("Within.pseries.collapse.fixest") 473# browser() 474 effect <- match.arg(effect) 475 # check for presence of na.rm in dots, if not present set to FALSE 476 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 477 xindex <- unclass(attr(x, "index")) # unclass for speed 478 checkNA.index(xindex) # index may not contain any NA 479 if(effect != "twoways") { 480 eff.no <- switch(effect, 481 "individual" = 1L, 482 "time" = 2L, 483 "group" = 3L, 484 stop("unknown value of argument 'effect'")) 485 # in 1-way case fwithin seems faster than fhdwithin, so keep 1-way and 2-way 486 # cases separated 487 res <- collapse::fwithin(x, effect = eff.no, w = NULL, na.rm = na.rm, mean = 0) 488 } else { 489 # effect = "twoways" 490 491 # dispatches to pseries method 492 res <- collapse::fhdwithin(x, effect = 1:2, w = NULL, na.rm = na.rm) 493 } 494 return(res) 495} 496 497Within.matrix.collapse.fixest <- function(x, effect, rm.null = TRUE, ...) { 498# print("Within.matrix.collapse.fixest") 499# browser() 500 501 if(is.null(xindex <- attr(x, "index"))) { 502 # non-index case, 'effect' needs to be a factor 503 result <- Within.default(x, effect, ...) 504 othervar <- colSums(abs(x)) > sqrt(.Machine$double.eps) 505 if(rm.null) { 506 result <- result[ , othervar, drop = FALSE] 507 attr(result, "constant") <- character(0) 508 } 509 else attr(result, "constant") <- colnames(x)[! othervar] 510 return(result) 511 } 512 else { 513 # index case 514 xindex <- unclass(xindex) # unclass for speed 515 checkNA.index(xindex) # index may not contain any NA 516 # check for presence of na.rm in dots, if not present set to FALSE 517 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 518 519 if(effect != "twoways") { 520 eff.fac <- switch(effect, 521 "individual" = xindex[[1L]], 522 "time" = xindex[[2L]], 523 "group" = xindex[[3L]], 524 stop("unknown value of argument 'effect'")) 525 526 ## result <- collapse::fhdwithin(x, eff.fac) # needs pkg fixest 527 # --> for one-way effect, this seems slower than collapse::fwithin 528 result <- collapse::fwithin(x, g = eff.fac, w = NULL, na.rm = na.rm, mean = 0) 529 # =(plm)= result <- x - Between(x, effect) 530 } else { 531 # effect = "twoways" 532 # no need to distinguish between balanced/unbalanced 533 # as this is fully handled by collapse::fhdwithin() 534 # collapse::fhdwithin needs pkg fixest as it uses fixest::demean 535 result <- collapse::fhdwithin(x, fl = xindex[1:2], w = NULL, na.rm = na.rm) 536 } 537 } 538 return(result) 539} 540 541Within.pseries.collapse.lfe <- function(x, effect = c("individual", "time", "group", "twoways"), ...) { 542# print("Within.pseries.collapse.lfe") 543# browser() 544 545 effect <- match.arg(effect) 546 xindex <- unclass(attr(x, "index")) 547 checkNA.index(xindex) # index may not contain any NA 548 # check for presence of na.rm in dots, if not present set to FALSE 549 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 550 if(effect != "twoways") { 551 eff.no <- switch(effect, 552 "individual" = 1L, 553 "time" = 2L, 554 "group" = 3L, 555 stop("unknown value of argument 'effect'")) 556 # collapse::fwithin is faster in 1-ways case than lfe::demanlist, so 557 # keep cases separated 558 res <- collapse::fwithin(x, effect = eff.no, w = NULL, na.rm = na.rm, mean = 0) 559 } else { 560 # effect = "twoways" 561 # no need to distinguish between balanced/unbalanced 562 # as this is fully handled by lfe::dmeanlist() 563 res <- unlist(lfe::demeanlist(x, fl = xindex[1:2], na.rm = na.rm)) 564 res <- add_pseries_features(res, attr(x, "index")) # index needs to be a proper pindex here! 565 } 566 return(res) 567} 568 569Within.matrix.collapse.lfe <- function(x, effect, rm.null = TRUE, ...) { 570# print("Within.matrix.collapse.lfe") 571# browser() 572 573 574 if(is.null(xindex <- attr(x, "index"))) { 575 # non-index case, 'effect' needs to be a factor 576 result <- Within.default(x, effect, ...) 577 othervar <- colSums(abs(x)) > sqrt(.Machine$double.eps) 578 if(rm.null) { 579 result <- result[ , othervar, drop = FALSE] 580 attr(result, "constant") <- character(0) 581 } 582 else attr(result, "constant") <- colnames(x)[! othervar] 583 return(result) 584 } 585 else { 586 # index case 587 xindex <- unclass(xindex) 588 checkNA.index(xindex) # index may not contain any NA 589 # check for presence of na.rm in dots, if not present set to FALSE 590 na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm 591 592 if(effect != "twoways") { 593 eff.fac <- switch(effect, 594 "individual" = xindex[[1L]], 595 "time" = xindex[[2L]], 596 "group" = xindex[[3L]], 597 stop("unknown value of argument 'effect'")) 598 # collapse::fwithin is faster in 1-ways case than lfe::demanlist, so 599 # keep cases separated 600 result <- collapse::fwithin(x, g = eff.fac, w = NULL, na.rm = na.rm, mean = 0) 601 # =(plm)= result <- x - Between(x, effect) 602 } else { 603 # effect = "twoways" 604 # no need to distinguish between balanced/unbalanced 605 # as this is fully handled by lfe::dmeanlist() 606 # 607 # lfe::demeanlist (lfe vers. 2.8-6) return value for matrix input is 608 # inconsistent / depends on value of argument na.rm, 609 # see https://github.com/sgaure/lfe/issues/50. 610 result <- lfe::demeanlist(x, fl = xindex[1:2], na.rm = na.rm) 611 if(is.list(result)) result <- result[[1L]] 612 attr(result, "index") <- attr(x, "index") # index needs to be a proper pindex here! 613 } 614 } 615 return(result) 616} 617 618#### wrapper for pseriesfy #### 619# both pseriesfy functions are in file tool_pdata.frame.R 620pseriesfy <- function(x, ...) { 621 if(!isTRUE(getOption("plm.fast"))) { 622 pseriesfy.baseR(x, ...) } else { 623 if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE) 624 pseriesfy.collapse(x, ...) } 625} 626 627.onAttach <- function(libname, pkgname) { 628 # options("plm.fast" = TRUE) # not yet the default, maybe in Q3/2021, 629 # would need pkg collapse as hard dependency 630 631 # determine when pkg plm is attached whether pkg collapse, fixest, and lfe are 632 # available and set (non-documented) options, which packages are available. 633 # These options are used to determine in the wrappers if fast mode can be used 634 # and if the speed up by fixest or lfe for the 2-way FE case can be used. 635 avail.collapse <- requireNamespace("collapse", quietly = TRUE) 636 avail.fixest <- requireNamespace("fixest", quietly = TRUE) 637 avail.lfe <- requireNamespace("lfe", quietly = TRUE) 638 639 if(avail.collapse) { 640 options("plm.fast.pkg.collapse" = TRUE) 641 options("plm.fast.pkg.FE.tw" = "collapse") 642 # fixest wins over lfe 643 if(avail.fixest) { 644 options("plm.fast.pkg.FE.tw" = "fixest") 645 } else { 646 if(avail.lfe) { 647 options("plm.fast.pkg.FE.tw" = "lfe") 648 } 649 } 650 } 651 else options("plm.fast.pkg.collapse" = FALSE) 652} 653 654 655#' Option to Switch On/Off Fast Data Transformations 656#' 657#' A significant speed up can be gained by using fast (panel) data transformation 658#' functions from package `collapse`. 659#' An additional significant speed up for the two-way fixed effects case can be 660#' achieved if package `fixest` or `lfe` is installed (package `collapse` 661#' needs to be installed for the fast mode in any case). 662#' 663#' @details By default, this speed up is not enabled. 664#' Option `plm.fast` can be used to enable/disable the speed up. The option is 665#' evaluated prior to execution of supported transformations (see below), so 666#' `option("plm.fast" = TRUE)` enables the speed up while 667#' `option("plm.fast" = FALSE)` disables the speed up. 668#' 669#' To have it always switched on, put `options("plm.fast" = TRUE)` in your 670#' .Rprofile file. 671#' 672#' See **Examples** for how to use the option and for a benchmarking example. 673#' 674#' By default, package `plm` uses base R implementations and R-based code. The 675#' package `collapse` provides fast data transformation functions written 676#' in C/C++, among them some especially suitable for panel data. 677#' Having package `collapse` installed is a requirement for the speed up. 678#' However, this package is currently not a hard dependency for package `plm` 679#' but a 'Suggests' dependency. 680#' 681#' Availability of packages `fixest` and `lfe` is checked for once when 682#' package plm is attached and the additional speed up for the two-way fixed 683#' effect case is enabled automatically (`fixest` wins over `lfe`), 684#' given one of the packages is detected and `options("plm.fast" = TRUE)` is set. 685#' If so, the packages' fast algorithms to partial out fixed effects are 686#' used (`fixest::demean` (via `collapse::fhdwithin`), `lfe::demeanlist`). 687#' Both packages are 'Suggests' dependencies. 688#' 689#' Currently, these functions benefit from the speed-up (more functions are 690#' under investigation): 691#' \itemize{ 692#' \item between, 693#' \item Between, 694#' \item Sum, 695#' \item Within, 696#' \item pseriesfy. 697#' } 698#' 699#' @name plm.fast 700#' @keywords sysdata manip 701#' @examples 702#' \dontrun{ 703#' ### A benchmark of plm without and with speed-up 704#' library("plm") 705#' library("collapse") 706#' library("microbenchmark") 707#' rm(list = ls()) 708#' data("wlddev", package = "collapse") 709#' form <- LIFEEX ~ PCGDP + GINI 710#' 711#' # produce big data set (taken from collapse's vignette) 712#' wlddevsmall <- get_vars(wlddev, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA")) 713#' wlddevsmall$iso3c <- as.character(wlddevsmall$iso3c) 714#' data <- replicate(100, wlddevsmall, simplify = FALSE) 715#' rm(wlddevsmall) 716#' uniquify <- function(x, i) { 717#' x$iso3c <- paste0(x$iso3c, i) 718#' x 719#' } 720#' data <- unlist2d(Map(uniquify, data, as.list(1:100)), idcols = FALSE) 721#' data <- pdata.frame(data, index = c("iso3c", "year")) 722#' pdim(data) # Balanced Panel: n = 21600, T = 59, N = 1274400 // but many NAs 723#' # data <- na.omit(data) 724#' # pdim(data) # Unbalanced Panel: n = 13300, T = 1-31, N = 93900 725#' 726#' times <- 1 # no. of repetitions for benchmark - this takes quite long! 727#' 728#' onewayFE <- microbenchmark( 729#' {options("plm.fast" = FALSE); plm(form, data = data, model = "within")}, 730#' {options("plm.fast" = TRUE); plm(form, data = data, model = "within")}, 731#' times = times, unit = "relative") 732#' 733#' summary(onewayFE) 734#' 735#' ## two-ways FE benchmark requires pkg fixest and lfe 736#' ## (End-users shall only set option plm.fast. Option plm.fast.pkg.FE.tw shall 737#' ## _not_ be set by the end-user, it is determined automatically when pkg plm 738#' ## is attached; however, it needs to be set explicitly in this example for the 739#' ## benchmark.) 740#' if(requireNamespace("fixest", quietly = TRUE) && 741#' requireNamespace("lfe", quietly = TRUE)) { 742#' 743#' twowayFE <- microbenchmark( 744#' {options("plm.fast" = FALSE); 745#' plm(form, data = data, model = "within", effect = "twoways")}, 746#' {options("plm.fast" = TRUE, "plm.fast.pkg.FE.tw" = "collapse"); 747#' plm(form, data = data, model = "within", effect = "twoways")}, 748#' {options("plm.fast" = TRUE, "plm.fast.pkg.FE.tw" = "fixest"); 749#' plm(form, data = data, model = "within", effect = "twoways")}, 750#' {options("plm.fast" = TRUE, "plm.fast.pkg.FE.tw" = "lfe"); 751#' plm(form, data = data, model = "within", effect = "twoways")}, 752#' times = times, unit = "relative") 753#' 754#' summary(twowayFE) 755#' } 756#' 757#' onewayRE <- microbenchmark( 758#' {options("plm.fast" = FALSE); plm(form, data = data, model = "random")}, 759#' {options("plm.fast" = TRUE); plm(form, data = data, model = "random")}, 760#' times = times, unit = "relative") 761#' 762#' summary(onewayRE) 763#' 764#' twowayRE <- microbenchmark( 765#' {options("plm.fast" = FALSE); plm(form, data = data, model = "random", effect = "twoways")}, 766#' {options("plm.fast" = TRUE); plm(form, data = data, model = "random", effect = "twoways")}, 767#' times = times, unit = "relative") 768#' 769#' summary(twowayRE) 770#' } 771NULL 772 773 774txt.no.collapse <- paste0("options(\"plm.fast\") is set to TRUE but package 'collapse' ", 775 "is not available which is needed for fast data transformation functions. ", 776 "Either set 'options(\"plm.fast\" = FALSE)' or install the ", 777 "missing package, e.g., with 'install.packages(\"collapse\")'. \n", 778 "Having additionally package 'fixest' or 'lfe' installed ", 779 "will speed up the two-way fixed effect case further. \n", 780 "Availability of packages is determined only when ", 781 "plm is attached, so restart R/reload plm when mentioned ", 782 "packages have been installed.") 783