1## Function that are used in more than on place in plm (or likely to be used in more than one place in the future)
2
3## - bdiag : takes matrices as argument and returns the block-diagonal matrix (used in pgmm and plm.list)
4## - mylm : inner fitting func based on stats::lm with matrix inputs (used in plm.fit)
5## - my.lm.fit : like the barebone stats::lm.fit but with some extra information (e.g., SEs, sigma) used in purtest
6## - twosls : computes the 2SLS estimator (used in plm and ercomp)
7## - data.name : used in a lot tests to generate the 'data.name' entry for htest objects from the model object's call$formula
8## - has.intercept : tests the presence of an intercept
9## - pres : extract model residuals as pseries (used in several estimation functions)
10## - punbalancedness : measures for the unbalancedness of panel data
11## - myvar : calculates variance with NA removal, checks if input is constant (also for factor and character)
12## - pvar : checks if input varies in individual / time dimension
13
14bdiag <- function(...){
15  ## non-exported
16  if (nargs() == 1L)
17    x <- as.list(...)
18  else
19    x <- list(...)
20  n <- length(x)
21  if(n == 0L) return(NULL)
22  x <- lapply(x, function(y) if(length(y)) as.matrix(y) else
23              stop("Zero-length component in x"))
24  d <- array(unlist(lapply(x, dim)), c(2, n))
25  rr <- d[1L, ]
26  cc <- d[2L, ]
27  rsum <- sum(rr)
28  csum <- sum(cc)
29  out <- array(0, c(rsum, csum))
30  ind <- array(0, c(4, n))
31  rcum <- cumsum(rr)
32  ccum <- cumsum(cc)
33  ind[1, -1] <- rcum[-n]
34  ind[2,   ] <- rcum
35  ind[3, -1] <- ccum[-n]
36  ind[4,   ] <- ccum
37  imat <- array(1:(rsum * csum), c(rsum, csum))
38  iuse <- apply(ind, 2, function(y, imat) imat[(y[1L]+1):y[2L],
39                                               (y[3L]+1):y[4L]], imat = imat)
40  iuse <- as.vector(unlist(iuse))
41  out[iuse] <- unlist(x)
42  return(out)
43}
44
45# mylm is used in plm.fit()
46mylm <- function(y, X, W = NULL) {
47  ## non-exported
48  names.X <- colnames(X)
49  result <- if(is.null(W)) lm(y ~ X - 1) else twosls(y, X, W)
50  if(any(na.coef <- is.na(result$coefficients))) {
51    ## for debug purpose:
52    # warning("Coefficient(s) '", paste((names.X)[na.coef], collapse = ", "),
53    #"' could not be estimated and is (are) dropped.")
54    X <- X[ , !na.coef, drop = FALSE]
55    if(dim(X)[2L] == 0L) stop(paste("estimation not possible: all coefficients",
56                                    "omitted from estimation due to aliasing"))
57
58    ## re-estimate without the columns which resulted previously in NA-coefficients
59    result <- if(is.null(W)) lm(y ~ X - 1) else twosls(y, X, W)
60  }
61  result$vcov <- vcov(result)
62  result$X <- X
63  result$y <- y
64  result$W <- W
65  # aliased is an element of summary.lm-objects:
66  # since plm drops aliased coefs, store this info in plm object
67  # NB: this only sets coefs to NA that are detected/set to NA by mylm()/lm.fit();
68  #     covariates dropped earlier by model.matrix( , cstcovar.rm) are not included here anymore
69  result$aliased <- na.coef
70  names(result$aliased) <- names.X
71  names(result$coefficients) <- colnames(result$vcov) <-
72    rownames(result$vcov) <- colnames(X)
73  result
74}
75
76# my.lm.fit is used in purtest()
77my.lm.fit <- function(X, y, dfcor = TRUE, ...){
78  reg <- lm.fit(X, y)
79  ## 'as' summary method for lm.fit
80  p <- reg$rank
81  Qr <- reg$qr
82  n <- NROW(Qr$qr)
83  rdf <- n - p
84  p1 <- 1L:p
85  r <- reg$residuals
86  rss <- as.numeric(crossprod(r))
87  resvar <- if (dfcor) rss/rdf else rss/n
88  sigma <- sqrt(resvar)
89  R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
90  thecoef <- reg$coefficients[Qr$pivot[p1]] #[lags+1]
91  these <- sigma * sqrt(diag(R)) #[lags+1])
92  list(coef = thecoef, se = these, sigma = sigma,
93       rss = rss, n = n, K = p, rdf = rdf)
94}
95
96#' @importFrom stats .lm.fit
97twosls <- function(y, X, W, intercept = FALSE, lm.type = "lm"){
98  ## non-exported
99  # Return value can be controlled by argument lm.type. Often, a full lm model
100  # is needed for further processing but can select one of the fast but less
101  # rich objects produced by lm.fit or .lm.fit (the latter does not contain, e.g.,
102  # fitted.values and is to be used very carefully (e.g., coefs not in input order).
103
104  # As NA/NaN/(+/-)Inf-freeness needs to be guaranteed when functions call
105  # twosls(), so can use lm.fit to calc. Xhat.
106  Xhat <- lm.fit(cbind(1, W), X)$fitted.values
107  # old: Xhat <- lm(X ~ W)$fitted.values
108
109  if(!is.matrix(Xhat)){
110    Xhat <- matrix(Xhat, ncol = 1L)
111    colnames(Xhat) <- colnames(X)
112  }
113
114  if(intercept){
115    model <- switch(lm.type,
116                    "lm"      =  lm(y ~ Xhat),
117                    "lm.fit"  =  lm.fit(cbind(1, Xhat), y),
118                    ".lm.fit" = .lm.fit(cbind(1, Xhat), y))
119    yhat <- as.vector(crossprod(t(cbind(1, X)), coef(model)))
120  }
121  else{
122    model <- switch(lm.type,
123                    "lm"      =  lm(y ~ Xhat - 1),
124                    "lm.fit"  =  lm.fit(Xhat, y),
125                    ".lm.fit" = .lm.fit(Xhat, y))
126    yhat <- as.vector(crossprod(t(X), coef(model)))
127  }
128  model$residuals <- y - yhat
129  model
130}
131
132data.name <- function(x) {
133  ## non-exported, used in various tests
134  data.name <- paste(deparse(x$call$formula))
135  if (length(data.name) > 1L) paste(data.name[1L], "...")
136  else data.name
137}
138
139##### has.intercept methods #####
140
141#' Check for the presence of an intercept in a formula or in a fitted
142#' model
143#'
144#' The presence of an intercept is checked using the formula which is
145#' either provided as the argument of the function or extracted from
146#' a fitted model.
147#'
148#' @param object a `formula`, a `Formula` or a fitted model (of class
149#'     `plm` or `panelmodel`),
150#' @param rhs an integer (length > 1 is possible), indicating the parts of right
151#'      hand sides of the formula to be evaluated for the presence of an
152#'      intercept or NULL for all parts of the right hand side
153#'      (relevant for the `Formula` and the `plm` methods)
154#' @param \dots further arguments.
155#'
156#' @return a logical
157#' @export
158has.intercept <- function(object, ...) {
159  UseMethod("has.intercept")
160}
161
162#' @rdname has.intercept
163#' @export
164has.intercept.default <- function(object, ...) {
165  has.intercept(formula(object), ...)
166}
167
168#' @rdname has.intercept
169#' @export
170has.intercept.formula <- function(object, ...) {
171  attr(terms(object), "intercept") == 1L
172}
173
174#' @rdname has.intercept
175#' @export
176has.intercept.Formula <- function(object, rhs = NULL, ...) {
177  ## NOTE: returns a logical vector of the necessary length
178  ## (which might be > 1)
179  if (is.null(rhs)) rhs <- 1:length(attr(object, "rhs"))
180  res <- sapply(rhs, function(x) {
181    aform <- formula(object, lhs = 0, rhs = x)
182    # expand the dot if any in all the parts except the first
183    if (x > 1L) aform <- update(formula(object, lhs = 0, rhs = 1), aform)
184    has.intercept(aform)
185  })
186  return(res)
187}
188
189#' @rdname has.intercept
190#' @export
191has.intercept.panelmodel <- function(object, ...) {
192  object <- attr(model.frame(object), "formula")
193  has.intercept(object)
194}
195
196#' @rdname has.intercept
197#' @export
198has.intercept.plm <- function(object, rhs = 1L, ...) {
199
200  # catch deprecated argument "part": convert and warn / 2021-03-10
201  dots <- list(...)
202  if(!is.null(part <- dots[["part"]])) {
203    warning("has.intercept.plm: argument 'part' is deprecated and will soon be removed, use argument 'rhs' instead")
204    warning("has.intercept.plm: arguement 'rhs' (if present) overwritten by argument 'part'")
205    if(part[1L] == "first") {
206      rhs <- 1L
207      } else {
208        if(is.numeric(part)) {
209          rhs <- part
210          } else stop("unsupported value for argument 'part', only \"first\" or an integer allowed")
211      }
212  }
213  has.intercept(formula(object), rhs = rhs)
214}
215
216
217pres <- function(x) {  # pres.panelmodel
218  ## extracts model residuals as pseries
219  ## not necessary for plm models as residuals.plm returns a pseries,
220  ## but used in residuals.pggls, residuals.pcce, residuals.pmg
221
222  ## extract indices
223  xindex <- unclass(attr(x$model, "index")) # unclass for speed
224  groupind <- xindex[[1L]]
225  timeind  <- xindex[[2L]]
226
227  # fix to allow operation with pggls, pmg
228  # [TODO: one day, make this cleaner; with the describe framework?]
229  if (!is.null(x$args$model))                 maybe_fd <- x$args$model
230  if (!is.null(attr(x, "pmodel")$model.name)) maybe_fd <- attr(x, "pmodel")$model.name # this line is currently needed to detect pggls models
231
232  ## Achim's fix: reduce id and time index to accommodate first-differences model's number of observations
233  if(exists("maybe_fd") && maybe_fd == "fd") {
234    groupi <- as.numeric(groupind)
235    ## make vector =1 on first obs in each group, 0 elsewhere
236    selector <- groupi - c(0, groupi[-length(groupi)])
237    selector[1L] <- 1 # the first must always be 1
238    ## eliminate first obs in time for each group
239    groupind <- groupind[!selector]
240    timeind <- timeind[!selector]
241  }
242
243  resdata <- data.frame(ee = x$residuals, ind = groupind, tind = timeind)
244  pee <- pdata.frame(resdata, index = c("ind", "tind"))
245  pres <- pee$ee
246  return(pres)
247}
248
249
250# punbalancedness: measures for unbalancedness of a panel data set as
251# defined in Ahrens/Pincus (1981), p. 228 (gamma and
252# nu) and for nested panel structures as in Baltagi/Song/Jung (2001), pp. 368-369 .
253#
254# Ahrens/Pincus (1981), On Two Measures of Unbalancedness in a One-Way Model
255#  and Their Relation to Efficiency, Biometrical Journal, Vol. 23, pp. 227-235.
256#
257# Baltagi/Song/Jung (2001), The unbalanced nested error component regression model,
258#  Journal of Econometrics, Vol. 101, pp. 357-381
259
260
261#' Measures for Unbalancedness of Panel Data
262#'
263#' This function reports unbalancedness measures for panel data as
264#' defined in \insertCite{AHRE:PINC:81;textual}{plm} and
265#' \insertCite{BALT:SONG:JUNG:01;textual}{plm}.
266#'
267#' `punbalancedness` returns measures for the unbalancedness of a
268#' panel data set.
269#'
270#' - For two-dimensional data:\cr The two measures of
271#' \insertCite{AHRE:PINC:81;textual}{plm} are calculated, called
272#' "gamma" (\eqn{\gamma}) and "nu" (\eqn{\nu}).
273#'
274#' If the panel data are balanced, both measures equal 1. The more
275#' "unbalanced" the panel data, the lower the measures (but > 0). The
276#' upper and lower bounds as given in \insertCite{AHRE:PINC:81;textual}{plm}
277#' are:\cr
278#' \eqn{0 < \gamma, \nu \le 1}, and for \eqn{\nu} more precisely
279#' \eqn{\frac{1}{n} < \nu \le 1}{1/n < \nu \le 1}, with \eqn{n} being
280#' the number of individuals (as in `pdim(x)$nT$n`).
281#'
282#' - For nested panel data (meaning including a grouping variable):\cr
283#' The extension of the above measures by
284#' \insertCite{BALT:SONG:JUNG:01;textual}{plm}, p. 368, are
285#' calculated:\cr
286#'
287#'   - c1: measure of subgroup (individual) unbalancedness,
288#'   - c2: measure of time unbalancedness,
289#'   - c3: measure of group unbalancedness due to each group size.
290#'
291#' Values are 1 if the data are balanced and become smaller as the
292#' data become more unbalanced.
293#'
294#'
295#' An application of the measure "gamma" is found in e. g.
296#' \insertCite{BALT:SONG:JUNG:01;textual}{plm}, pp. 488-491, and
297#' \insertCite{BALT:CHAN:94;textual}{plm}, pp. 78--87, where it is
298#' used to measure the unbalancedness of various unbalanced data sets
299#' used for Monte Carlo simulation studies. Measures c1, c2, c3 are
300#' used for similar purposes in
301#' \insertCite{BALT:SONG:JUNG:01;textual}{plm}.
302#'
303#' In the two-dimensional case, `punbalancedness` uses output of
304#' [pdim()] to calculate the two unbalancedness measures, so inputs to
305#' `punbalancedness` can be whatever `pdim` works on. `pdim` returns
306#' detailed information about the number of individuals and time
307#' observations (see [pdim()]).
308#'
309#' @param x a `panelmodel`, a `data.frame`, or a `pdata.frame` object,
310#' @param index only relevant for `data.frame` interface, for details
311#'     see [pdata.frame()],
312#' @param \dots further arguments.
313#' @return A named numeric containing either two or three entries,
314#'     depending on the panel structure inputted:
315#'
316#' - For the two-dimensional panel structure, the entries are called
317#' `gamma` and `nu`,
318#'
319#' - For a nested panel structure, the entries are called `c1`, `c2`,
320#' `c3`.
321#'
322#' @note Calling `punbalancedness` on an estimated `panelmodel` object
323#'     and on the corresponding `(p)data.frame` used for this
324#'     estimation does not necessarily yield the same result (true
325#'     also for `pdim`). When called on an estimated `panelmodel`, the
326#'     number of observations (individual, time) actually used for
327#'     model estimation are taken into account. When called on a
328#'     `(p)data.frame`, the rows in the `(p)data.frame` are
329#'     considered, disregarding any NA values in the dependent or
330#'     independent variable(s) which would be dropped during model
331#'     estimation.
332#' @export
333#' @author Kevin Tappe
334#' @seealso [nobs()], [pdim()], [pdata.frame()]
335#' @references
336#'
337#' \insertRef{AHRE:PINC:81}{plm}
338#'
339#' \insertRef{BALT:CHAN:94}{plm}
340#'
341#' \insertRef{BALT:SONG:JUNG:01}{plm}
342#'
343#' \insertRef{BALT:SONG:JUNG:02}{plm}
344#'
345#' @keywords attribute
346#' @examples
347#'
348#' # Grunfeld is a balanced panel, Hedonic is an unbalanced panel
349#' data(list=c("Grunfeld", "Hedonic"), package="plm")
350#'
351#' # Grunfeld has individual and time index in first two columns
352#' punbalancedness(Grunfeld) # c(1,1) indicates balanced panel
353#' pdim(Grunfeld)$balanced   # TRUE
354#'
355#' # Hedonic has individual index in column "townid" (in last column)
356#' punbalancedness(Hedonic, index="townid") # c(0.472, 0.519)
357#' pdim(Hedonic, index="townid")$balanced   # FALSE
358#'
359#' # punbalancedness on estimated models
360#' plm_mod_pool <- plm(inv ~ value + capital, data = Grunfeld)
361#' punbalancedness(plm_mod_pool)
362#'
363#' plm_mod_fe <- plm(inv ~ value + capital, data = Grunfeld[1:99, ], model = "within")
364#' punbalancedness(plm_mod_fe)
365#'
366#' # replicate results for panel data design no. 1 in Ahrens/Pincus (1981), p. 234
367#' ind_d1  <- c(1,1,1,2,2,2,3,3,3,3,3,4,4,4,4,4,4,4,5,5,5,5,5,5,5)
368#' time_d1 <- c(1,2,3,1,2,3,1,2,3,4,5,1,2,3,4,5,6,7,1,2,3,4,5,6,7)
369#' df_d1 <- data.frame(individual = ind_d1, time = time_d1)
370#' punbalancedness(df_d1) # c(0.868, 0.887)
371#'
372#' # example for a nested panel structure with a third index variable
373#' # specifying a group (states are grouped by region) and without grouping
374#' data("Produc", package = "plm")
375#' punbalancedness(Produc, index = c("state", "year", "region"))
376#' punbalancedness(Produc, index = c("state", "year"))
377#'
378#' @rdname punbalancedness
379#' @export
380punbalancedness <- function(x, ...) {
381  UseMethod("punbalancedness")
382}
383
384
385punbalancedness.default <- function(x, ...) {
386
387  ii <- index(x)
388  if(!is.index(ii)) stop("no valid index found for input object 'x'")
389
390  if (ncol(ii) == 2L) {
391   ## original Ahrens/Pincus (1981)
392    pdim <- pdim(x, ...)
393    N <- pdim$nT$n # no. of individuals
394    Totalobs <- pdim$nT$N # no. of total observations
395    Ti <- pdim$Tint$Ti
396    Tavg <- sum(Ti)/N
397
398    r1 <- N / (Tavg * sum(1/Ti))
399    r2 <- 1 / (N * (sum( (Ti/Totalobs)^2)))
400    result <- c(gamma = r1, nu = r2)
401  } else {
402    if (ncol(ii) == 3L) {
403     ## extension to nested model with additional group variable
404     ## Baltagi/Song/Jung (2001), pp. 368-369
405      ii <- unclass(ii) # unclass for speed
406      ids <- ii[[1L]]
407      tss <- ii[[2L]]
408      gps <- ii[[3L]]
409      Tis <- unique(data.frame(tss, gps))
410      Tis <- table(Tis$gps)               # no of max time periods per group
411      Nis <- unique(data.frame(ids, gps))
412      Nis <- table(Nis$gps)               # no of individuals per group
413      M <- length(unique(gps))            # no of unique groups
414      Nbar <- sum(Nis)/M
415      Tbar <- sum(Tis)/M
416
417      c1 <- M / (Nbar * sum(1/Nis))
418      c2 <- M / (Tbar * sum(1/Tis))
419      c3 <- M / (sum(Nis * Tis)/M * sum(1/(Nis*Tis)))
420      result <- (c(c1 = c1, c2 = c2, c3 = c3))
421    } else stop(paste0("unsupported number of dimensions: ", ncol(ii)))
422  }
423  return(result)
424}
425
426#' @rdname punbalancedness
427#' @export
428punbalancedness.pdata.frame <- function(x, ...) {
429  punbalancedness.default(x, ...)
430}
431
432#' @rdname punbalancedness
433#' @export
434punbalancedness.data.frame <- function(x, index = NULL, ...) {
435  x <- pdata.frame(x, index = index, ...)
436  punbalancedness.default(x, ...)
437}
438
439#' @rdname punbalancedness
440#' @export
441punbalancedness.panelmodel <- function(x, ...) {
442  punbalancedness.default(x, ...)
443}
444
445
446
447myvar <- function(x){
448  ## non-exported
449  x.na <- is.na(x)
450  if(anyNA(x.na)) x <- x[!x.na]
451  n <- length(x)
452
453  if(n <= 1L) {
454    if(n == 0L) z <- NA
455    if(n == 1L) z <- 0
456  } else {
457    z <- if(!(is.factor(x) || is.character(x))) var(x)
458         else !all(duplicated(x)[-1L])
459  }
460  z
461}
462
463
464
465#' Check for Cross-Sectional and Time Variation
466#'
467#' This function checks for each variable of a panel if it varies
468#' cross-sectionally and over time.
469#'
470#' For (p)data.frame and matrix interface: All-NA columns are removed
471#' prior to calculation of variation due to coercing to pdata.frame
472#' first.
473#'
474#' @aliases pvar
475#' @param x a `(p)data.frame` or a `matrix`,
476#' @param index see [pdata.frame()],
477#' @param \dots further arguments.
478#' @return An object of class `pvar` containing the following
479#'     elements:
480#'
481#' \item{id.variation}{a logical vector with `TRUE` values if the
482#' variable has individual variation, `FALSE` if not,}
483#'
484#' \item{time.variation}{a logical vector with `TRUE` values if the
485#' variable has time variation, `FALSE` if not,}
486#'
487#' \item{id.variation_anyNA}{a logical vector with `TRUE` values if
488#' the variable has at least one individual-time combination with all
489#' NA values in the individual dimension for at least one time period,
490#' `FALSE` if not,}
491#'
492#' \item{time.variation_anyNA}{a logical vector with `TRUE` values if
493#' the variable has at least one individual-time combination with all
494#' NA values in the time dimension for at least one individual,
495#' `FALSE` if not.}
496#'
497#' @note `pvar` can be time consuming for ``big'' panels. As a fast alternative
498#' [collapse::varying()] from package \CRANpkg{collapse} could be used.
499#' @export
500#' @author Yves Croissant
501#' @seealso [pdim()] to check the dimensions of a 'pdata.frame' (and
502#'     other objects),
503#' @keywords attribute
504#' @examples
505#'
506#'
507#' # Gasoline contains two variables which are individual and time
508#' # indexes and are the first two variables
509#' data("Gasoline", package = "plm")
510#' pvar(Gasoline)
511#'
512#' # Hedonic is an unbalanced panel, townid is the individual index;
513#' # the drop.index argument is passed to pdata.frame
514#' data("Hedonic", package = "plm")
515#' pvar(Hedonic, "townid", drop.index = TRUE)
516#'
517#' # same using pdata.frame
518#' Hed <- pdata.frame(Hedonic, "townid", drop.index = TRUE)
519#' pvar(Hed)
520#'
521#' # Gasoline with pvar's matrix interface
522#' Gasoline_mat <- as.matrix(Gasoline)
523#' pvar(Gasoline_mat)
524#' pvar(Gasoline_mat, index=c("country", "year"))
525#'
526pvar <- function(x, ...){
527  UseMethod("pvar")
528}
529
530pvar.default <- function(x, id, time, ...){
531  name.var <- names(x)
532  len <- length(x)
533  time.variation <- rep(TRUE, len)
534  id.variation   <- rep(TRUE, len)
535  time.variation_anyNA <- rep(FALSE, len)
536  id.variation_anyNA   <- rep(FALSE, len)
537  lid   <- split(x, id)   # these split() functions seem particularly slow
538  ltime <- split(x, time)
539  if(is.list(x)){
540    if(len == 1L){
541      # time variation
542      temp_time.var          <- sapply(lid, function(x) sapply(x, myvar))
543      temp_time.var_sumNoVar <- sum(temp_time.var == 0, na.rm = TRUE) # number of non-varying id-time comb. (without all NA groups)
544      temp_time.var_sumNA    <- sum(is.na(temp_time.var))             # number of all-NA groups
545      temp_time.varResult    <- temp_time.var_sumNoVar + temp_time.var_sumNA
546      time.variation         <- temp_time.varResult != length(lid) # no variation if (no. non-varying + no. all-NA) == number of groups
547      time.variation_anyNA   <- temp_time.var_sumNA > 0            # indicates if at least one id-time comb is all NA
548
549      # id variation
550      temp_id.var          <- sapply(ltime, function(x) sapply(x, myvar))
551      temp_id.var_sumNoVar <- sum(temp_id.var == 0, na.rm = TRUE)
552      temp_id.var_sumNA    <- sum(is.na(temp_id.var))
553      temp_id.varResult    <- temp_id.var_sumNoVar + temp_id.var_sumNA
554      id.variation         <- temp_id.varResult != length(ltime)
555      id.variation_anyNA   <- temp_id.var_sumNA > 0
556    }
557    else{
558      # time variation
559      temp_time.var          <- sapply(lid, function(x) sapply(x, myvar))
560      temp_time.var_sumNoVar <- apply(temp_time.var == 0, 1, sum, na.rm = TRUE)
561      temp_time.var_sumNA    <- apply(is.na(temp_time.var), 1, sum)
562      temp_time.varResult    <- temp_time.var_sumNoVar + temp_time.var_sumNA
563      time.variation         <- temp_time.varResult != length(lid)
564      time.variation_anyNA   <- temp_time.var_sumNA > 0
565
566      # id variation
567      temp_id.var          <- sapply(ltime, function(x) sapply(x, myvar))
568      temp_id.var_sumNoVar <- apply(temp_id.var == 0, 1, sum, na.rm = TRUE)
569      temp_id.var_sumNA    <- apply(is.na(temp_id.var), 1, sum)
570      temp_id.varResult    <- temp_id.var_sumNoVar + temp_id.var_sumNA
571      id.variation         <- temp_id.varResult != length(ltime)
572      id.variation_anyNA   <- temp_id.var_sumNA > 0
573    }
574  }
575  else{ # not a list (not a data.frame, pdata.frame) - try our best for that unknown data structure
576    # time variation
577    temp_time.var          <- sapply(lid, function(x) sapply(x, myvar))
578    temp_time.var_sumNoVar <- sum(temp_time.var == 0, na.rm = TRUE)
579    temp_time.var_sumNA    <- sum(is.na(temp_time.var))
580    temp_time.varResult    <- temp_time.var_sumNoVar + temp_time.var_sumNA
581    time.variation         <- temp_time.varResult != length(lid)
582    time.variation_anyNA   <- temp_time.var_sumNA > 0
583
584    # id variation
585    temp_id.var          <- sapply(ltime, function(x) sapply(x, myvar))
586    temp_id.var_sumNoVar <- sum(temp_id.var == 0, na.rm = TRUE)
587    temp_id.var_sumNA    <- sum(is.na(temp_id.var))
588    temp_id.varResult    <- temp_id.var_sumNoVar + temp_id.var_sumNA
589    id.variation         <- temp_id.varResult != length(ltime)
590    id.variation_anyNA   <- temp_id.var_sumNA > 0
591  }
592
593  # make 'pvar' object
594  names(id.variation) <- names(time.variation) <- names(id.variation_anyNA) <- names(time.variation_anyNA) <- name.var
595  dim.var <- list(id.variation         = id.variation,
596                  time.variation       = time.variation,
597                  id.variation_anyNA   = id.variation_anyNA,
598                  time.variation_anyNA = time.variation_anyNA)
599  class(dim.var) <- "pvar"
600  return(dim.var)
601}
602
603#' @rdname pvar
604#' @export
605pvar.matrix <- function(x, index = NULL, ...){
606  x <- pdata.frame(as.data.frame(x), index, ...)
607  pvar(x)
608}
609
610#' @rdname pvar
611#' @export
612pvar.data.frame <- function(x, index = NULL, ...){
613  x <- pdata.frame(x, index, ...)
614  pvar(x)
615}
616
617#' @rdname pvar
618#' @export
619pvar.pdata.frame <- function(x, ...){
620  index <- unclass(attr(x, "index")) # unclass for speed
621  pvar.default(x, index[[1L]], index[[2L]])
622}
623
624#' @rdname pvar
625#' @export
626pvar.pseries <- function(x, ...){
627  # use drop.index = TRUE so that the index columns'
628  # variations are not evaluated:
629  pdfx <- pseries2pdataframe(x, drop.index = TRUE)
630  pvar.pdata.frame(pdfx)
631}
632
633#' @rdname pvar
634#' @export
635print.pvar <- function(x, ...){
636  varnames <- names(x$time.variation)
637  if(any(!x$time.variation)){
638    var <- varnames[x$time.variation == FALSE]
639    #    if (!is.null(y)) var <- var[-which(var==y$id)]
640    if(length(var)!=0) cat(paste("no time variation:      ", paste(var,collapse=" "),"\n"))
641  }
642  if(any(!x$id.variation)){
643    var <- varnames[x$id.variation == FALSE]
644    #    if (!is.null(y)) var <- var[-which(var==y$time)]
645    if(length(var)!=0) cat(paste("no individual variation:", paste(var,collapse=" "),"\n"))
646  }
647
648  # any individual-time combinations all NA?
649  if(any(x$time.variation_anyNA)){
650    var_anyNA <- varnames[x$time.variation_anyNA]
651    if(length(var_anyNA)!=0) cat(paste("all NA in time dimension for at least one individual: ", paste(var_anyNA,collapse=" "),"\n"))
652  }
653  if(any(x$id.variation_anyNA)){
654    var_anyNA <- varnames[x$id.variation_anyNA]
655    if(length(var_anyNA)!=0) cat(paste("all NA in ind. dimension for at least one time period:", paste(var_anyNA,collapse=" "),"\n"))
656  }
657  invisible(x)
658}
659
660
661