1## Structural changes made to plm's original data transformation functions
2## need to be mimicked in the *.collapse(.*) versions and vice versa.
3
4## 1) Give the base-R version of the functions defined in tool_transformations.R
5##    a new name (*.baseR).
6## 2) Implement wrapper switched which call the *.baseR or *.collapse versions
7##    based on the option plm.fast (a logical, can be set via R's regular option
8##    mechanism: options("plm.fast" = TRUE).
9
10## ad 1) new name for base R functions defined in tool_transformations.R
11Sum.default.baseR <- plm:::Sum.default
12Sum.pseries.baseR <- plm:::Sum.pseries
13Sum.matrix.baseR  <- plm:::Sum.matrix
14
15between.default.baseR <- plm:::between.default
16between.pseries.baseR <- plm:::between.pseries
17between.matrix.baseR  <- plm:::between.matrix
18
19Between.default.baseR <- plm:::Between.default
20Between.pseries.baseR <- plm:::Between.pseries
21Between.matrix.baseR  <- plm:::Between.matrix
22
23Within.default.baseR <- plm:::Within.default
24Within.pseries.baseR <- plm:::Within.pseries
25Within.matrix.baseR  <- plm:::Within.matrix
26
27pseriesfy.baseR      <- plm:::pseriesfy # ... in tool_pdata.frame.R:
28
29
30## ad 2) implement wrapper switches
31
32#### Sum wrapper switches ####
33Sum.default <- function(x, effect, ...) {
34  if(!isTRUE(getOption("plm.fast"))) {
35    Sum.default.baseR(x, effect, ...) } else {
36    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
37    Sum.default.collapse(x, effect, ...) }
38}
39
40Sum.pseries <- function(x, effect = c("individual", "time", "group"), ...) {
41  if(!isTRUE(getOption("plm.fast"))) {
42    Sum.pseries.baseR(x, effect, ...) } else {
43    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
44    Sum.pseries.collapse(x, effect, ...) }
45}
46
47Sum.matrix <- function(x, effect, ...) {
48  if(!isTRUE(getOption("plm.fast"))) {
49    Sum.matrix.baseR(x, effect, ...) } else {
50    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
51    Sum.matrix.collapse(x, effect, ...) }
52}
53
54#### Between wrapper switches ####
55Between.default <- function(x, effect, ...) {
56  if(!isTRUE(getOption("plm.fast"))) {
57    Between.default.baseR(x, effect, ...) } else {
58    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
59    Between.default.collapse(x, effect, ...) }
60}
61
62Between.pseries <- function(x, effect = c("individual", "time", "group"), ...) {
63  if(!isTRUE(getOption("plm.fast"))) {
64    Between.pseries.baseR(x, effect, ...) } else {
65    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
66    Between.pseries.collapse(x, effect, ...) }
67}
68
69Between.matrix <- function(x, effect, ...) {
70  if(!isTRUE(getOption("plm.fast"))) {
71    Between.matrix.baseR(x, effect, ...) } else {
72    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
73    Between.matrix.collapse(x, effect, ...) }
74}
75
76#### between wrapper switches ####
77between.default <- function(x, effect, ...) {
78  if(!isTRUE(getOption("plm.fast"))) {
79     between.default.baseR(x, effect, ...) } else {
80     if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
81     between.default.collapse(x, effect, ...) }
82}
83
84between.pseries <- function(x, effect = c("individual", "time", "group"), ...) {
85  if(!isTRUE(getOption("plm.fast"))) {
86    between.pseries.baseR(x, effect, ...) } else {
87    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
88    between.pseries.collapse(x, effect, ...) }
89}
90
91between.matrix <- function(x, effect, ...) {
92  if(!isTRUE(getOption("plm.fast"))) {
93    between.matrix.baseR(x, effect, ...) } else {
94    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
95    between.matrix.collapse(x, effect, ...) }
96}
97
98#### Within wrapper switches ####
99Within.default <- function(x, effect, ...) {
100  if(!isTRUE(getOption("plm.fast"))) {
101    Within.default.baseR(x, effect, ...) } else {
102    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
103    Within.default.collapse(x, effect, ...) }
104}
105
106Within.pseries <- function(x, effect = c("individual", "time", "group", "twoways"), ...) {
107  if(!isTRUE(getOption("plm.fast"))) {
108    Within.pseries.baseR(x, effect, ...)
109  } else {
110    if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
111
112    if(is.null(getOption("plm.fast.pkg.FE.tw"))) options("plm.fast.pkg.FE.tw" = "collapse")
113    switch(getOption("plm.fast.pkg.FE.tw"),
114           "collapse" = Within.pseries.collapse(x, effect, ...),        # collapse only,
115           "fixest"   = Within.pseries.collapse.fixest(x, effect, ...), # collapse for 1-way FE + fixest for 2-way FE,
116           "lfe"      = Within.pseries.collapse.lfe(x, effect, ...),    # collapse for 1-way FE + lfe for 2-way FE,
117           stop("unknown value of option 'plm.fast.pkg.FE.tw'"))
118  }
119}
120
121Within.matrix <- function(x, effect, rm.null = TRUE, ...) {
122  if(!isTRUE(getOption("plm.fast"))) {
123    Within.matrix.baseR(x, effect, ...)
124  } else {
125    if (!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
126
127    if(is.null(getOption("plm.fast.pkg.FE.tw"))) options("plm.fast.pkg.FE.tw" = "collapse")
128    switch(getOption("plm.fast.pkg.FE.tw"),
129           "collapse" = Within.matrix.collapse(x, effect, ...),        # collapse only,
130           "fixest"   = Within.matrix.collapse.fixest(x, effect, ...), # collapse for 1-way FE + fixest for 2-way FE,
131           "lfe"      = Within.matrix.collapse.lfe(x, effect, ...),    # collapse for 1-way FE + lfe for 2-way FE,
132           stop("unknown value of option 'plm.fast.pkg.FE.tw'"))
133  }
134}
135
136
137#### Sum ####
138
139Sum.default.collapse <- function(x, effect, ...) {
140# print("Sum.default.collapse")
141# browser()
142  # argument 'effect' is assumed to be a factor
143
144  if(!is.numeric(x)) stop("The Sum function only applies to numeric vectors")
145  # check for presence of na.rm in dots, if not present set to FALSE
146  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
147  res <- collapse::fsum(x, g = effect, w = NULL, na.rm = na.rm, TRA = "replace")
148  names(res) <- as.character(effect)
149  return(res)
150}
151
152Sum.pseries.collapse <- function(x, effect = c("individual", "time", "group"), ...) {
153# print("Sum.pseries.collapse")
154# browser()
155  effect <- match.arg(effect)
156  # check for presence of na.rm in dots, if not present set to FALSE
157  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
158  eff.no <- switch(effect,
159                   "individual" = 1L,
160                   "time"       = 2L,
161                   "group"      = 3L,
162                   stop("unknown value of argument 'effect'"))
163  xindex <- unclass(attr(x, "index")) # unclass for speed
164  checkNA.index(xindex) # index may not contain any NA
165  eff.fac <- xindex[[eff.no]]
166  res <- collapse::fsum(x, g = eff.fac, w = NULL, na.rm = na.rm, TRA = "replace")
167  names(res) <- as.character(eff.fac)
168  res <- add_pseries_features(res, attr(x, "index"))
169  return(res)
170}
171
172Sum.matrix.collapse <- function(x, effect, ...) {
173# print("Sum.matrix.collapse")
174# browser()
175  # if no index attribute, argument 'effect' is assumed to be a factor
176  eff.fac <- if(is.null(xindex <- attr(x, "index"))) {
177    effect
178  } else {
179    if(!is.character(effect) && length(effect) > 1L)
180      stop("for matrices with index attributes, the effect argument must be a character")
181    if(! effect %in% c("individual", "time", "group"))
182      stop("irrelevant effect for a Sum transformation")
183    eff.no <- switch(effect,
184                     "individual" = 1L,
185                     "time"       = 2L,
186                     "group"      = 3L,
187                     stop("unknown value of argument 'effect'"))
188    xindex <- unclass(xindex) # unclass for speed
189    checkNA.index(xindex) # index may not contain any NA
190    xindex[[eff.no]]
191  }
192  # check for presence of na.rm in dots, if not present set to FALSE
193  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
194  res <- collapse::fsum(x, g = eff.fac, w = NULL, na.rm = na.rm, drop = FALSE, TRA = "replace")
195  rownames(res) <- as.character(eff.fac)
196  attr(res, "index") <- NULL
197  return(res)
198}
199
200#### B/between ####
201
202# Need separate implementations of Between.pseries and between.pseries due to different NA handling
203
204Between.default.collapse <- function(x, effect, ...) {
205# print("Between.default.collapse")
206# browser()
207
208  # argument 'effect' is assumed to be a factor
209  if(!is.numeric(x)) stop("The Between function only applies to numeric vectors")
210  # check for presence of na.rm in dots, if not present set to FALSE
211  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
212  nms <- as.character(effect)
213  res <- collapse::fbetween(x, g = effect, w = NULL, na.rm = na.rm)
214  names(res) <- nms
215  return(res)
216}
217
218between.default.collapse <- function(x, effect, ...) {
219# print("between.default.collapse")
220# browser()
221
222  # argument 'effect' is assumed to be a factor
223  if(!is.numeric(x)) stop("The Between function only applies to numeric vectors")
224  # check for presence of na.rm in dots, if not present set to FALSE
225  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
226  res <- collapse::fbetween(x, g = effect, w = NULL, na.rm = na.rm, fill = TRUE)
227  keep <- !duplicated(effect)
228  res <- res[keep]
229  names(res) <- as.character(effect[keep])
230  # bring into factor level order (not order as appears in orig. data)
231  lvl <- levels(collapse::fdroplevels(effect))
232  res <- res[lvl]
233  return(res)
234}
235
236Between.pseries.collapse <- function(x, effect = c("individual", "time", "group"), ...) {
237# print("Between.pseries.collapse")
238# browser()
239
240  # translate arguments
241  effect <- match.arg(effect)
242  # check for presence of na.rm in dots, if not present set to FALSE
243  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
244  eff.no <- switch(effect,
245            "individual" = 1L,
246            "time"       = 2L,
247            "group"      = 3L,
248            stop("unknown value of argument 'effect'"))
249
250  xindex <- unclass(attr(x, "index")) # unclass for speed
251  checkNA.index(xindex) # index may not contain any NA
252  nms <- as.character(xindex[[eff.no]])
253  na.x <- is.na(x)
254  # must be fill = TRUE [to catch case when 1 obs of an individual is NA (otherwise result could contain non-intended NA)]
255  res <- collapse::fbetween(x, effect = eff.no, w = NULL, na.rm = na.rm, fill = TRUE)
256  names(res) <- nms
257  res[na.x] <- NA
258  return(res)
259}
260
261between.pseries.collapse <- function(x, effect = c("individual", "time", "group"), ...) {
262# print("between.pseries.collapse")
263# browser()
264  effect <- match.arg(effect)
265  # check for presence of na.rm in dots, if not present set to FALSE
266  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
267  eff.no <-  switch(effect,
268           "individual" = 1L,
269           "time"       = 2L,
270           "group"      = 3L,
271           stop("unknown value of argument 'effect'"))
272
273  xindex <- unclass(attr(x, "index")) # unclass for speed
274  checkNA.index(xindex) # index may not contain any NA
275  i <- xindex[[eff.no]]
276  # must be fill = TRUE [to catch case when 1 obs of an individual is NA
277  # (otherwise result could contain non-intended NA)]
278  res <- collapse::fbetween(x, effect = eff.no, w = NULL, na.rm = na.rm, fill = TRUE)
279  res <- remove_pseries_features(res)
280  keep <- !duplicated(i)
281  res <- res[keep]
282  names(res) <- as.character(i[keep])
283  # bring into factor level order (not order as appears in orig. data)
284  lvl <- levels(collapse::fdroplevels(i))
285  res <- res[lvl]
286  return(res)
287}
288
289
290
291Between.matrix.collapse <- function(x, effect, ...) {
292# print("Between.matrix.collapse")
293# browser()
294  # if no index attribute, argument 'effect' is assumed to be a factor
295  eff.fac <- if(is.null(xindex <- attr(x, "index"))) {
296    effect
297  } else {
298    if(!is.character(effect) && length(effect) > 1L)
299      stop("for matrices with index attributes, the effect argument must be a character")
300    if(! effect %in% c("individual", "time", "group"))
301      stop("irrelevant effect for a between transformation")
302    eff.no <- switch(effect,
303                     "individual" = 1L,
304                     "time"       = 2L,
305                     "group"      = 3L,
306                     stop("unknown value of argument 'effect'"))
307    xindex <- unclass(xindex) # unclass for speed
308    checkNA.index(xindex) # index may not contain any NA
309    xindex[[eff.no]]
310  }
311  # check for presence of na.rm in dots, if not present set to FALSE
312  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
313  na.x <- is.na(x)
314  res <- collapse::fbetween(x, g = eff.fac, w = NULL, na.rm = na.rm, fill = TRUE)
315  attr(res, "index") <- NULL
316  rownames(res) <- as.character(eff.fac)
317  res[na.x] <- NA
318  return(res)
319}
320
321between.matrix.collapse <- function(x, effect, ...) {
322# print("between.matrix.collapse")
323# browser()
324  # if no index attribute, argument 'effect' is assumed to be a factor
325  eff.fac <- if(is.null(xindex <- attr(x, "index"))) {
326    effect
327  } else {
328    if(!is.character(effect) && length(effect) > 1L)
329      stop("for matrices with index attributes, the effect argument must be a character")
330    if(! effect %in% c("individual", "time", "group"))
331      stop("irrelevant effect for a between transformation")
332    eff.no <- switch(effect,
333                     "individual" = 1L,
334                     "time"       = 2L,
335                     "group"      = 3L,
336                     stop("unknown value of argument 'effect'"))
337    xindex <- unclass(xindex) # unclass for speed
338    checkNA.index(xindex) # index may not contain any NA
339    xindex[[eff.no]]
340  }
341  # check for presence of na.rm in dots, if not present set to FALSE
342  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
343  res <- collapse::fbetween(x, g = eff.fac, w = NULL, na.rm = na.rm, fill = TRUE)
344  rownames(res) <- as.character(eff.fac)
345  # compress data to number of unique individuals (or time periods)
346  res <- res[!duplicated(eff.fac), , drop = FALSE]
347  # bring into factor level order (not order as appears in orig. data)
348  lvl <- levels(collapse::fdroplevels(eff.fac))
349  res <- res[lvl, , drop = FALSE]
350  return(res)
351}
352
353
354#### Within ####
355# Within - default
356
357Within.default.collapse <- function(x, effect, ...) {
358# print("Within.default.collapse")
359# browser()
360
361  # argument 'effect' is assumed to be a factor
362  if(!is.numeric(x)) stop("the within function only applies to numeric vectors")
363  # check for presence of na.rm in dots, if not present set to FALSE
364  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
365  res <- collapse::fwithin(x, g = effect, w = NULL, na.rm = na.rm)
366  # =(plm)= res <- x - Between(x, effect, ...)
367  names(res) <- as.character(effect)
368  return(res)
369}
370
371
372Within.pseries.collapse <- function(x, effect = c("individual", "time", "group", "twoways"), ...) {
373# print("Within.pseries.collapse")
374# browser()
375  effect <- match.arg(effect)
376  # check for presence of na.rm in dots, if not present set to FALSE
377  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
378  xindex <- unclass(attr(x, "index")) # unclass for speed
379  checkNA.index(xindex) # index may not contain any NA
380  if(effect != "twoways") {
381    eff.no <- switch(effect,
382             "individual" = 1L,
383             "time"       = 2L,
384             "group"      = 3L,
385             stop("unknown value of argument 'effect'"))
386    res <- collapse::fwithin(x, effect = eff.no, w = NULL, na.rm = na.rm, mean = 0)
387  } else {
388    eff.ind.fac  <- xindex[[1L]]
389    eff.time.fac <- xindex[[2L]]
390    if(is.pbalanced(eff.ind.fac, eff.time.fac)) {
391      # effect = "twoways" - balanced
392      res <- collapse::fwithin(  x, effect = 1L, w = NULL, na.rm = na.rm, mean = "overall.mean") -
393              collapse::fbetween(x, effect = 2L, w = NULL, na.rm = na.rm, fill = TRUE)
394            # =(plm)= res <- x - Between(x, "individual", ...) - Between(x, "time", ...) + mean(x, ...)
395    } else {
396      # effect = "twoways" - unbalanced
397      Dmu <- model.matrix(~ eff.time.fac - 1)
398      W1   <- collapse::fwithin(x,   effect = 1L,          w = NULL, na.rm = na.rm, mean = 0) # pseries interface
399      WDmu <- collapse::fwithin(Dmu, g      = eff.ind.fac, w = NULL, na.rm = na.rm, mean = 0) # matrix interface
400      W2 <- lm.fit(WDmu, x)$fitted.values
401      res <- W1 - W2
402    }
403  }
404  return(res)
405}
406
407Within.matrix.collapse <- function(x, effect, rm.null = TRUE, ...) {
408# print("Within.matrix.collapse")
409# browser()
410
411
412  if(is.null(xindex <- attr(x, "index"))) {
413    # non-index case, 'effect' needs to be a factor
414    result <- Within.default(x, effect, ...)
415    othervar <- colSums(abs(x)) > sqrt(.Machine$double.eps)
416    if(rm.null) {
417      result <- result[ , othervar, drop = FALSE]
418      attr(result, "constant") <- character(0)
419    }
420    else attr(result, "constant") <- colnames(x)[! othervar]
421    return(result)
422  }
423  else {
424    # index case
425    xindex <- unclass(xindex) # unclass for speed
426    checkNA.index(xindex) # index may not contain any NA
427
428    # check for presence of na.rm in dots, if not present set to FALSE
429    na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
430
431    if(effect != "twoways") {
432      eff.fac <- switch(effect,
433                       "individual" = xindex[[1L]],
434                       "time"       = xindex[[2L]],
435                       "group"      = xindex[[3L]],
436                       stop("unknown value of argument 'effect'"))
437
438      result <- collapse::fwithin(x, g = eff.fac, w = NULL, na.rm = na.rm, mean = 0)
439      # =(plm)= result <- x - Between(x, effect)
440
441    } else {
442      # effect = "twoways"
443      eff.ind.fac  <- xindex[[1L]]
444      eff.time.fac <- xindex[[2L]]
445      if(is.pbalanced(eff.ind.fac, eff.time.fac)) {
446        # balanced twoways
447        result <- collapse::fwithin(  x, g = eff.ind.fac,  w = NULL, na.rm = na.rm, mean = "overall.mean") -
448                   collapse::fbetween(x, g = eff.time.fac, w = NULL, na.rm = na.rm, fill = TRUE)
449        # =(plm)= result <- x - Between(x, "individual", ...) - Between(x, "time", ...) +
450        #                        matrix(colMeans(x, ...), nrow = nrow(x), ncol = ncol(x), byrow = TRUE)
451      }
452      else { # unbalanced twoways
453        # as factor is used twice below, make it a collapse::GRP object -> should give some speed-up
454        eff.ind.fac <- collapse::GRP(eff.ind.fac, group.sizes = FALSE, return.groups = FALSE, call = FALSE)
455        Dmu <- model.matrix(~ eff.time.fac - 1)
456        W1   <- collapse::fwithin(x,   g = eff.ind.fac, w = NULL, na.rm = na.rm, mean = 0)
457        WDmu <- collapse::fwithin(Dmu, g = eff.ind.fac, w = NULL, na.rm = na.rm, mean = 0)
458        W2 <- lm.fit(WDmu, x)$fitted.values
459        result <- W1 - W2
460      }
461    }
462  }
463  return(result)
464}
465
466#### These functions use collpase::fhdwithin (using internally fixest::demean)
467#### or lfe::demeanlist respectively, for
468#### the 2-way within transformation which are dramatically faster than
469#### the implementation via separate collapse::fwithin calls (due to the special
470#### algorithms used to partial out the fixed effects)
471Within.pseries.collapse.fixest <- function(x, effect = c("individual", "time", "group", "twoways"), ...) {
472# print("Within.pseries.collapse.fixest")
473# browser()
474  effect <- match.arg(effect)
475  # check for presence of na.rm in dots, if not present set to FALSE
476  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
477  xindex <- unclass(attr(x, "index")) # unclass for speed
478  checkNA.index(xindex) # index may not contain any NA
479  if(effect != "twoways") {
480    eff.no <- switch(effect,
481                     "individual" = 1L,
482                     "time"       = 2L,
483                     "group"      = 3L,
484                     stop("unknown value of argument 'effect'"))
485    # in 1-way case fwithin seems faster than fhdwithin, so keep 1-way and 2-way
486    # cases separated
487    res <- collapse::fwithin(x, effect = eff.no, w = NULL, na.rm = na.rm, mean = 0)
488  } else {
489    # effect = "twoways"
490
491    # dispatches to pseries method
492    res <- collapse::fhdwithin(x, effect = 1:2, w = NULL, na.rm = na.rm)
493  }
494  return(res)
495}
496
497Within.matrix.collapse.fixest <- function(x, effect, rm.null = TRUE, ...) {
498# print("Within.matrix.collapse.fixest")
499# browser()
500
501  if(is.null(xindex <- attr(x, "index"))) {
502    # non-index case, 'effect' needs to be a factor
503    result <- Within.default(x, effect, ...)
504    othervar <- colSums(abs(x)) > sqrt(.Machine$double.eps)
505    if(rm.null) {
506      result <- result[ , othervar, drop = FALSE]
507      attr(result, "constant") <- character(0)
508    }
509    else attr(result, "constant") <- colnames(x)[! othervar]
510    return(result)
511  }
512  else {
513    # index case
514    xindex <- unclass(xindex) # unclass for speed
515    checkNA.index(xindex) # index may not contain any NA
516    # check for presence of na.rm in dots, if not present set to FALSE
517    na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
518
519    if(effect != "twoways") {
520      eff.fac <- switch(effect,
521                        "individual" = xindex[[1L]],
522                        "time"       = xindex[[2L]],
523                        "group"      = xindex[[3L]],
524                        stop("unknown value of argument 'effect'"))
525
526      ## result <- collapse::fhdwithin(x, eff.fac) # needs pkg fixest
527      # --> for one-way effect, this seems slower than collapse::fwithin
528      result <- collapse::fwithin(x, g = eff.fac, w = NULL, na.rm = na.rm, mean = 0)
529      # =(plm)= result <- x - Between(x, effect)
530    } else {
531      # effect = "twoways"
532      # no need to distinguish between balanced/unbalanced
533      # as this is fully handled by collapse::fhdwithin()
534      # collapse::fhdwithin needs pkg fixest as it uses fixest::demean
535      result <- collapse::fhdwithin(x, fl = xindex[1:2], w = NULL, na.rm = na.rm)
536    }
537  }
538  return(result)
539}
540
541Within.pseries.collapse.lfe <- function(x, effect = c("individual", "time", "group", "twoways"), ...) {
542# print("Within.pseries.collapse.lfe")
543# browser()
544
545  effect <- match.arg(effect)
546  xindex <- unclass(attr(x, "index"))
547  checkNA.index(xindex) # index may not contain any NA
548  # check for presence of na.rm in dots, if not present set to FALSE
549  na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
550  if(effect != "twoways") {
551    eff.no <- switch(effect,
552                     "individual" = 1L,
553                     "time"       = 2L,
554                     "group"      = 3L,
555                     stop("unknown value of argument 'effect'"))
556    # collapse::fwithin is faster in 1-ways case than lfe::demanlist, so
557    # keep cases separated
558    res <- collapse::fwithin(x, effect = eff.no, w = NULL, na.rm = na.rm, mean = 0)
559  } else {
560    # effect = "twoways"
561    # no need to distinguish between balanced/unbalanced
562    # as this is fully handled by lfe::dmeanlist()
563      res <- unlist(lfe::demeanlist(x, fl = xindex[1:2], na.rm = na.rm))
564      res <- add_pseries_features(res, attr(x, "index")) # index needs to be a proper pindex here!
565    }
566  return(res)
567}
568
569Within.matrix.collapse.lfe <- function(x, effect, rm.null = TRUE, ...) {
570# print("Within.matrix.collapse.lfe")
571# browser()
572
573
574  if(is.null(xindex <- attr(x, "index"))) {
575    # non-index case, 'effect' needs to be a factor
576    result <- Within.default(x, effect, ...)
577    othervar <- colSums(abs(x)) > sqrt(.Machine$double.eps)
578    if(rm.null) {
579      result <- result[ , othervar, drop = FALSE]
580      attr(result, "constant") <- character(0)
581    }
582    else attr(result, "constant") <- colnames(x)[! othervar]
583    return(result)
584  }
585  else {
586    # index case
587    xindex <- unclass(xindex)
588    checkNA.index(xindex) # index may not contain any NA
589    # check for presence of na.rm in dots, if not present set to FALSE
590    na.rm <- if(missing(...) || is.null(na.rm <- list(...)$na.rm)) FALSE else na.rm
591
592    if(effect != "twoways") {
593      eff.fac <- switch(effect,
594                        "individual" = xindex[[1L]],
595                        "time"       = xindex[[2L]],
596                        "group"      = xindex[[3L]],
597                        stop("unknown value of argument 'effect'"))
598      # collapse::fwithin is faster in 1-ways case than lfe::demanlist, so
599      # keep cases separated
600      result <- collapse::fwithin(x, g = eff.fac, w = NULL, na.rm = na.rm, mean = 0)
601      # =(plm)= result <- x - Between(x, effect)
602    } else {
603      # effect = "twoways"
604      # no need to distinguish between balanced/unbalanced
605      # as this is fully handled by lfe::dmeanlist()
606      #
607      # lfe::demeanlist (lfe vers. 2.8-6) return value for matrix input is
608      # inconsistent / depends on value of argument na.rm,
609      # see https://github.com/sgaure/lfe/issues/50.
610      result <- lfe::demeanlist(x, fl = xindex[1:2], na.rm = na.rm)
611      if(is.list(result)) result <- result[[1L]]
612      attr(result, "index") <- attr(x, "index") # index needs to be a proper pindex here!
613    }
614  }
615  return(result)
616}
617
618#### wrapper for pseriesfy ####
619# both pseriesfy functions are in file tool_pdata.frame.R
620pseriesfy <- function(x,  ...) {
621  if(!isTRUE(getOption("plm.fast"))) {
622    pseriesfy.baseR(x, ...) } else {
623      if(!isTRUE(getOption("plm.fast.pkg.collapse"))) stop(txt.no.collapse, call. = FALSE)
624      pseriesfy.collapse(x, ...) }
625}
626
627.onAttach <- function(libname, pkgname) {
628  # options("plm.fast" = TRUE) # not yet the default, maybe in Q3/2021,
629                               # would need pkg collapse as hard dependency
630
631  # determine when pkg plm is attached whether pkg collapse, fixest, and lfe are
632  # available and set (non-documented) options, which packages are available.
633  # These options are used to determine in the wrappers if fast mode can be used
634  # and if the speed up by fixest or lfe for the 2-way FE case can be used.
635  avail.collapse <- requireNamespace("collapse", quietly = TRUE)
636  avail.fixest   <- requireNamespace("fixest",   quietly = TRUE)
637  avail.lfe      <- requireNamespace("lfe",      quietly = TRUE)
638
639  if(avail.collapse) {
640    options("plm.fast.pkg.collapse" = TRUE)
641    options("plm.fast.pkg.FE.tw" = "collapse")
642    # fixest wins over lfe
643    if(avail.fixest) {
644      options("plm.fast.pkg.FE.tw" = "fixest")
645    } else {
646      if(avail.lfe) {
647        options("plm.fast.pkg.FE.tw" = "lfe")
648      }
649    }
650  }
651  else options("plm.fast.pkg.collapse" = FALSE)
652}
653
654
655#' Option to Switch On/Off Fast Data Transformations
656#'
657#' A significant speed up can be gained by using fast (panel) data transformation
658#' functions from package `collapse`.
659#' An additional significant speed up for the two-way fixed effects case can be
660#' achieved if package `fixest` or `lfe` is installed (package `collapse`
661#' needs to be installed for the fast mode in any case).
662#'
663#' @details By default, this speed up is not enabled.
664#' Option `plm.fast` can be used to enable/disable the speed up. The option is
665#' evaluated prior to execution of supported transformations (see below), so
666#' `option("plm.fast" = TRUE)` enables the speed up while
667#' `option("plm.fast" = FALSE)` disables the speed up.
668#'
669#' To have it always switched on, put `options("plm.fast" = TRUE)` in your
670#' .Rprofile file.
671#'
672#' See **Examples** for how to use the option and for a benchmarking example.
673#'
674#' By default, package `plm` uses base R implementations and R-based code. The
675#' package `collapse` provides fast data transformation functions written
676#' in C/C++, among them some especially suitable for panel data.
677#' Having package `collapse` installed is a requirement for the speed up.
678#' However, this package is currently not a hard dependency for package `plm`
679#' but a 'Suggests' dependency.
680#'
681#' Availability of packages `fixest` and `lfe` is checked for once when
682#' package plm is attached and the additional speed up for the two-way fixed
683#' effect case is enabled automatically (`fixest` wins over `lfe`),
684#' given one of the packages is detected and `options("plm.fast" = TRUE)` is set.
685#' If so, the packages' fast algorithms to partial out fixed effects are
686#' used (`fixest::demean` (via `collapse::fhdwithin`), `lfe::demeanlist`).
687#' Both packages are 'Suggests' dependencies.
688#'
689#' Currently, these functions benefit from the speed-up (more functions are
690#' under investigation):
691#' \itemize{
692#'   \item between,
693#'   \item Between,
694#'   \item Sum,
695#'   \item Within,
696#'   \item pseriesfy.
697#' }
698#'
699#' @name plm.fast
700#' @keywords sysdata manip
701#' @examples
702#' \dontrun{
703#' ### A benchmark of plm without and with speed-up
704#' library("plm")
705#' library("collapse")
706#' library("microbenchmark")
707#' rm(list = ls())
708#' data("wlddev", package = "collapse")
709#' form <- LIFEEX ~ PCGDP + GINI
710#'
711#' # produce big data set (taken from collapse's vignette)
712#' wlddevsmall <- get_vars(wlddev, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA"))
713#' wlddevsmall$iso3c <- as.character(wlddevsmall$iso3c)
714#' data <- replicate(100, wlddevsmall, simplify = FALSE)
715#' rm(wlddevsmall)
716#' uniquify <- function(x, i) {
717#'   x$iso3c <- paste0(x$iso3c, i)
718#'   x
719#' }
720#' data <- unlist2d(Map(uniquify, data, as.list(1:100)), idcols = FALSE)
721#' data <- pdata.frame(data, index = c("iso3c", "year"))
722#' pdim(data) # Balanced Panel: n = 21600, T = 59, N = 1274400 // but many NAs
723#' # data <- na.omit(data)
724#' # pdim(data) # Unbalanced Panel: n = 13300, T = 1-31, N = 93900
725#'
726#' times <- 1 # no. of repetitions for benchmark - this takes quite long!
727#'
728#' onewayFE <- microbenchmark(
729#'  {options("plm.fast" = FALSE); plm(form, data = data, model = "within")},
730#'  {options("plm.fast" = TRUE);  plm(form, data = data, model = "within")},
731#'   times = times, unit = "relative")
732#'
733#' summary(onewayFE)
734#'
735#' ## two-ways FE benchmark requires pkg fixest and lfe
736#' ## (End-users shall only set option plm.fast. Option plm.fast.pkg.FE.tw shall
737#' ##  _not_ be set by the end-user, it is determined automatically when pkg plm
738#' ## is attached; however, it needs to be set explicitly in this example for the
739#' ## benchmark.)
740#' if(requireNamespace("fixest", quietly = TRUE) &&
741#'    requireNamespace("lfe", quietly = TRUE)) {
742#'
743#' twowayFE <-  microbenchmark(
744#'  {options("plm.fast" = FALSE);
745#'     plm(form, data = data, model = "within", effect = "twoways")},
746#'  {options("plm.fast" = TRUE, "plm.fast.pkg.FE.tw" = "collapse");
747#'     plm(form, data = data, model = "within", effect = "twoways")},
748#'  {options("plm.fast" = TRUE, "plm.fast.pkg.FE.tw" = "fixest");
749#'     plm(form, data = data, model = "within", effect = "twoways")},
750#'  {options("plm.fast" = TRUE, "plm.fast.pkg.FE.tw" = "lfe");
751#'     plm(form, data = data, model = "within", effect = "twoways")},
752#'   times = times, unit = "relative")
753#'
754#' summary(twowayFE)
755#' }
756#'
757#' onewayRE <- microbenchmark(
758#'  {options("plm.fast" = FALSE); plm(form, data = data, model = "random")},
759#'  {options("plm.fast" = TRUE);  plm(form, data = data, model = "random")},
760#'   times = times, unit = "relative")
761#'
762#' summary(onewayRE)
763#'
764#' twowayRE <-  microbenchmark(
765#'  {options("plm.fast" = FALSE); plm(form, data = data, model = "random", effect = "twoways")},
766#'  {options("plm.fast" = TRUE);  plm(form, data = data, model = "random", effect = "twoways")},
767#'   times = times, unit = "relative")
768#'
769#' summary(twowayRE)
770#' }
771NULL
772
773
774txt.no.collapse <- paste0("options(\"plm.fast\") is set to TRUE but package 'collapse' ",
775                          "is not available which is needed for fast data transformation functions. ",
776                          "Either set 'options(\"plm.fast\" = FALSE)' or install the ",
777                          "missing package, e.g., with 'install.packages(\"collapse\")'. \n",
778                          "Having additionally package 'fixest' or 'lfe' installed ",
779                          "will speed up the two-way fixed effect case further. \n",
780                          "Availability of packages is determined only when ",
781                          "plm is attached, so restart R/reload plm when mentioned ",
782                          "packages have been installed.")
783