1## Function that are used in more than on place in plm (or likely to be used in more than one place in the future) 2 3## - bdiag : takes matrices as argument and returns the block-diagonal matrix (used in pgmm and plm.list) 4## - mylm : inner fitting func based on stats::lm with matrix inputs (used in plm.fit) 5## - my.lm.fit : like the barebone stats::lm.fit but with some extra information (e.g., SEs, sigma) used in purtest 6## - twosls : computes the 2SLS estimator (used in plm and ercomp) 7## - data.name : used in a lot tests to generate the 'data.name' entry for htest objects from the model object's call$formula 8## - has.intercept : tests the presence of an intercept 9## - pres : extract model residuals as pseries (used in several estimation functions) 10## - punbalancedness : measures for the unbalancedness of panel data 11## - myvar : calculates variance with NA removal, checks if input is constant (also for factor and character) 12## - pvar : checks if input varies in individual / time dimension 13 14bdiag <- function(...){ 15 ## non-exported 16 if (nargs() == 1L) 17 x <- as.list(...) 18 else 19 x <- list(...) 20 n <- length(x) 21 if(n == 0L) return(NULL) 22 x <- lapply(x, function(y) if(length(y)) as.matrix(y) else 23 stop("Zero-length component in x")) 24 d <- array(unlist(lapply(x, dim)), c(2, n)) 25 rr <- d[1L, ] 26 cc <- d[2L, ] 27 rsum <- sum(rr) 28 csum <- sum(cc) 29 out <- array(0, c(rsum, csum)) 30 ind <- array(0, c(4, n)) 31 rcum <- cumsum(rr) 32 ccum <- cumsum(cc) 33 ind[1, -1] <- rcum[-n] 34 ind[2, ] <- rcum 35 ind[3, -1] <- ccum[-n] 36 ind[4, ] <- ccum 37 imat <- array(1:(rsum * csum), c(rsum, csum)) 38 iuse <- apply(ind, 2, function(y, imat) imat[(y[1L]+1):y[2L], 39 (y[3L]+1):y[4L]], imat = imat) 40 iuse <- as.vector(unlist(iuse)) 41 out[iuse] <- unlist(x) 42 return(out) 43} 44 45# mylm is used in plm.fit() 46mylm <- function(y, X, W = NULL) { 47 ## non-exported 48 names.X <- colnames(X) 49 result <- if(is.null(W)) lm(y ~ X - 1) else twosls(y, X, W) 50 if(any(na.coef <- is.na(result$coefficients))) { 51 ## for debug purpose: 52 # warning("Coefficient(s) '", paste((names.X)[na.coef], collapse = ", "), 53 #"' could not be estimated and is (are) dropped.") 54 X <- X[ , !na.coef, drop = FALSE] 55 if(dim(X)[2L] == 0L) stop(paste("estimation not possible: all coefficients", 56 "omitted from estimation due to aliasing")) 57 58 ## re-estimate without the columns which resulted previously in NA-coefficients 59 result <- if(is.null(W)) lm(y ~ X - 1) else twosls(y, X, W) 60 } 61 result$vcov <- vcov(result) 62 result$X <- X 63 result$y <- y 64 result$W <- W 65 # aliased is an element of summary.lm-objects: 66 # since plm drops aliased coefs, store this info in plm object 67 # NB: this only sets coefs to NA that are detected/set to NA by mylm()/lm.fit(); 68 # covariates dropped earlier by model.matrix( , cstcovar.rm) are not included here anymore 69 result$aliased <- na.coef 70 names(result$aliased) <- names.X 71 names(result$coefficients) <- colnames(result$vcov) <- 72 rownames(result$vcov) <- colnames(X) 73 result 74} 75 76# my.lm.fit is used in purtest() 77my.lm.fit <- function(X, y, dfcor = TRUE, ...){ 78 reg <- lm.fit(X, y) 79 ## 'as' summary method for lm.fit 80 p <- reg$rank 81 Qr <- reg$qr 82 n <- NROW(Qr$qr) 83 rdf <- n - p 84 p1 <- 1L:p 85 r <- reg$residuals 86 rss <- as.numeric(crossprod(r)) 87 resvar <- if (dfcor) rss/rdf else rss/n 88 sigma <- sqrt(resvar) 89 R <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) 90 thecoef <- reg$coefficients[Qr$pivot[p1]] #[lags+1] 91 these <- sigma * sqrt(diag(R)) #[lags+1]) 92 list(coef = thecoef, se = these, sigma = sigma, 93 rss = rss, n = n, K = p, rdf = rdf) 94} 95 96#' @importFrom stats .lm.fit 97twosls <- function(y, X, W, intercept = FALSE, lm.type = "lm"){ 98 ## non-exported 99 # Return value can be controlled by argument lm.type. Often, a full lm model 100 # is needed for further processing but can select one of the fast but less 101 # rich objects produced by lm.fit or .lm.fit (the latter does not contain, e.g., 102 # fitted.values and is to be used very carefully (e.g., coefs not in input order). 103 104 # As NA/NaN/(+/-)Inf-freeness needs to be guaranteed when functions call 105 # twosls(), so can use lm.fit to calc. Xhat. 106 Xhat <- lm.fit(cbind(1, W), X)$fitted.values 107 # old: Xhat <- lm(X ~ W)$fitted.values 108 109 if(!is.matrix(Xhat)){ 110 Xhat <- matrix(Xhat, ncol = 1L) 111 colnames(Xhat) <- colnames(X) 112 } 113 114 if(intercept){ 115 model <- switch(lm.type, 116 "lm" = lm(y ~ Xhat), 117 "lm.fit" = lm.fit(cbind(1, Xhat), y), 118 ".lm.fit" = .lm.fit(cbind(1, Xhat), y)) 119 yhat <- as.vector(crossprod(t(cbind(1, X)), coef(model))) 120 } 121 else{ 122 model <- switch(lm.type, 123 "lm" = lm(y ~ Xhat - 1), 124 "lm.fit" = lm.fit(Xhat, y), 125 ".lm.fit" = .lm.fit(Xhat, y)) 126 yhat <- as.vector(crossprod(t(X), coef(model))) 127 } 128 model$residuals <- y - yhat 129 model 130} 131 132data.name <- function(x) { 133 ## non-exported, used in various tests 134 data.name <- paste(deparse(x$call$formula)) 135 if (length(data.name) > 1L) paste(data.name[1L], "...") 136 else data.name 137} 138 139##### has.intercept methods ##### 140 141#' Check for the presence of an intercept in a formula or in a fitted 142#' model 143#' 144#' The presence of an intercept is checked using the formula which is 145#' either provided as the argument of the function or extracted from 146#' a fitted model. 147#' 148#' @param object a `formula`, a `Formula` or a fitted model (of class 149#' `plm` or `panelmodel`), 150#' @param rhs an integer (length > 1 is possible), indicating the parts of right 151#' hand sides of the formula to be evaluated for the presence of an 152#' intercept or NULL for all parts of the right hand side 153#' (relevant for the `Formula` and the `plm` methods) 154#' @param \dots further arguments. 155#' 156#' @return a logical 157#' @export 158has.intercept <- function(object, ...) { 159 UseMethod("has.intercept") 160} 161 162#' @rdname has.intercept 163#' @export 164has.intercept.default <- function(object, ...) { 165 has.intercept(formula(object), ...) 166} 167 168#' @rdname has.intercept 169#' @export 170has.intercept.formula <- function(object, ...) { 171 attr(terms(object), "intercept") == 1L 172} 173 174#' @rdname has.intercept 175#' @export 176has.intercept.Formula <- function(object, rhs = NULL, ...) { 177 ## NOTE: returns a logical vector of the necessary length 178 ## (which might be > 1) 179 if (is.null(rhs)) rhs <- 1:length(attr(object, "rhs")) 180 res <- sapply(rhs, function(x) { 181 aform <- formula(object, lhs = 0, rhs = x) 182 # expand the dot if any in all the parts except the first 183 if (x > 1L) aform <- update(formula(object, lhs = 0, rhs = 1), aform) 184 has.intercept(aform) 185 }) 186 return(res) 187} 188 189#' @rdname has.intercept 190#' @export 191has.intercept.panelmodel <- function(object, ...) { 192 object <- attr(model.frame(object), "formula") 193 has.intercept(object) 194} 195 196#' @rdname has.intercept 197#' @export 198has.intercept.plm <- function(object, rhs = 1L, ...) { 199 200 # catch deprecated argument "part": convert and warn / 2021-03-10 201 dots <- list(...) 202 if(!is.null(part <- dots[["part"]])) { 203 warning("has.intercept.plm: argument 'part' is deprecated and will soon be removed, use argument 'rhs' instead") 204 warning("has.intercept.plm: arguement 'rhs' (if present) overwritten by argument 'part'") 205 if(part[1L] == "first") { 206 rhs <- 1L 207 } else { 208 if(is.numeric(part)) { 209 rhs <- part 210 } else stop("unsupported value for argument 'part', only \"first\" or an integer allowed") 211 } 212 } 213 has.intercept(formula(object), rhs = rhs) 214} 215 216 217pres <- function(x) { # pres.panelmodel 218 ## extracts model residuals as pseries 219 ## not necessary for plm models as residuals.plm returns a pseries, 220 ## but used in residuals.pggls, residuals.pcce, residuals.pmg 221 222 ## extract indices 223 xindex <- unclass(attr(x$model, "index")) # unclass for speed 224 groupind <- xindex[[1L]] 225 timeind <- xindex[[2L]] 226 227 # fix to allow operation with pggls, pmg 228 # [TODO: one day, make this cleaner; with the describe framework?] 229 if (!is.null(x$args$model)) maybe_fd <- x$args$model 230 if (!is.null(attr(x, "pmodel")$model.name)) maybe_fd <- attr(x, "pmodel")$model.name # this line is currently needed to detect pggls models 231 232 ## Achim's fix: reduce id and time index to accommodate first-differences model's number of observations 233 if(exists("maybe_fd") && maybe_fd == "fd") { 234 groupi <- as.numeric(groupind) 235 ## make vector =1 on first obs in each group, 0 elsewhere 236 selector <- groupi - c(0, groupi[-length(groupi)]) 237 selector[1L] <- 1 # the first must always be 1 238 ## eliminate first obs in time for each group 239 groupind <- groupind[!selector] 240 timeind <- timeind[!selector] 241 } 242 243 resdata <- data.frame(ee = x$residuals, ind = groupind, tind = timeind) 244 pee <- pdata.frame(resdata, index = c("ind", "tind")) 245 pres <- pee$ee 246 return(pres) 247} 248 249 250# punbalancedness: measures for unbalancedness of a panel data set as 251# defined in Ahrens/Pincus (1981), p. 228 (gamma and 252# nu) and for nested panel structures as in Baltagi/Song/Jung (2001), pp. 368-369 . 253# 254# Ahrens/Pincus (1981), On Two Measures of Unbalancedness in a One-Way Model 255# and Their Relation to Efficiency, Biometrical Journal, Vol. 23, pp. 227-235. 256# 257# Baltagi/Song/Jung (2001), The unbalanced nested error component regression model, 258# Journal of Econometrics, Vol. 101, pp. 357-381 259 260 261#' Measures for Unbalancedness of Panel Data 262#' 263#' This function reports unbalancedness measures for panel data as 264#' defined in \insertCite{AHRE:PINC:81;textual}{plm} and 265#' \insertCite{BALT:SONG:JUNG:01;textual}{plm}. 266#' 267#' `punbalancedness` returns measures for the unbalancedness of a 268#' panel data set. 269#' 270#' - For two-dimensional data:\cr The two measures of 271#' \insertCite{AHRE:PINC:81;textual}{plm} are calculated, called 272#' "gamma" (\eqn{\gamma}) and "nu" (\eqn{\nu}). 273#' 274#' If the panel data are balanced, both measures equal 1. The more 275#' "unbalanced" the panel data, the lower the measures (but > 0). The 276#' upper and lower bounds as given in \insertCite{AHRE:PINC:81;textual}{plm} 277#' are:\cr 278#' \eqn{0 < \gamma, \nu \le 1}, and for \eqn{\nu} more precisely 279#' \eqn{\frac{1}{n} < \nu \le 1}{1/n < \nu \le 1}, with \eqn{n} being 280#' the number of individuals (as in `pdim(x)$nT$n`). 281#' 282#' - For nested panel data (meaning including a grouping variable):\cr 283#' The extension of the above measures by 284#' \insertCite{BALT:SONG:JUNG:01;textual}{plm}, p. 368, are 285#' calculated:\cr 286#' 287#' - c1: measure of subgroup (individual) unbalancedness, 288#' - c2: measure of time unbalancedness, 289#' - c3: measure of group unbalancedness due to each group size. 290#' 291#' Values are 1 if the data are balanced and become smaller as the 292#' data become more unbalanced. 293#' 294#' 295#' An application of the measure "gamma" is found in e. g. 296#' \insertCite{BALT:SONG:JUNG:01;textual}{plm}, pp. 488-491, and 297#' \insertCite{BALT:CHAN:94;textual}{plm}, pp. 78--87, where it is 298#' used to measure the unbalancedness of various unbalanced data sets 299#' used for Monte Carlo simulation studies. Measures c1, c2, c3 are 300#' used for similar purposes in 301#' \insertCite{BALT:SONG:JUNG:01;textual}{plm}. 302#' 303#' In the two-dimensional case, `punbalancedness` uses output of 304#' [pdim()] to calculate the two unbalancedness measures, so inputs to 305#' `punbalancedness` can be whatever `pdim` works on. `pdim` returns 306#' detailed information about the number of individuals and time 307#' observations (see [pdim()]). 308#' 309#' @param x a `panelmodel`, a `data.frame`, or a `pdata.frame` object, 310#' @param index only relevant for `data.frame` interface, for details 311#' see [pdata.frame()], 312#' @param \dots further arguments. 313#' @return A named numeric containing either two or three entries, 314#' depending on the panel structure inputted: 315#' 316#' - For the two-dimensional panel structure, the entries are called 317#' `gamma` and `nu`, 318#' 319#' - For a nested panel structure, the entries are called `c1`, `c2`, 320#' `c3`. 321#' 322#' @note Calling `punbalancedness` on an estimated `panelmodel` object 323#' and on the corresponding `(p)data.frame` used for this 324#' estimation does not necessarily yield the same result (true 325#' also for `pdim`). When called on an estimated `panelmodel`, the 326#' number of observations (individual, time) actually used for 327#' model estimation are taken into account. When called on a 328#' `(p)data.frame`, the rows in the `(p)data.frame` are 329#' considered, disregarding any NA values in the dependent or 330#' independent variable(s) which would be dropped during model 331#' estimation. 332#' @export 333#' @author Kevin Tappe 334#' @seealso [nobs()], [pdim()], [pdata.frame()] 335#' @references 336#' 337#' \insertRef{AHRE:PINC:81}{plm} 338#' 339#' \insertRef{BALT:CHAN:94}{plm} 340#' 341#' \insertRef{BALT:SONG:JUNG:01}{plm} 342#' 343#' \insertRef{BALT:SONG:JUNG:02}{plm} 344#' 345#' @keywords attribute 346#' @examples 347#' 348#' # Grunfeld is a balanced panel, Hedonic is an unbalanced panel 349#' data(list=c("Grunfeld", "Hedonic"), package="plm") 350#' 351#' # Grunfeld has individual and time index in first two columns 352#' punbalancedness(Grunfeld) # c(1,1) indicates balanced panel 353#' pdim(Grunfeld)$balanced # TRUE 354#' 355#' # Hedonic has individual index in column "townid" (in last column) 356#' punbalancedness(Hedonic, index="townid") # c(0.472, 0.519) 357#' pdim(Hedonic, index="townid")$balanced # FALSE 358#' 359#' # punbalancedness on estimated models 360#' plm_mod_pool <- plm(inv ~ value + capital, data = Grunfeld) 361#' punbalancedness(plm_mod_pool) 362#' 363#' plm_mod_fe <- plm(inv ~ value + capital, data = Grunfeld[1:99, ], model = "within") 364#' punbalancedness(plm_mod_fe) 365#' 366#' # replicate results for panel data design no. 1 in Ahrens/Pincus (1981), p. 234 367#' ind_d1 <- c(1,1,1,2,2,2,3,3,3,3,3,4,4,4,4,4,4,4,5,5,5,5,5,5,5) 368#' time_d1 <- c(1,2,3,1,2,3,1,2,3,4,5,1,2,3,4,5,6,7,1,2,3,4,5,6,7) 369#' df_d1 <- data.frame(individual = ind_d1, time = time_d1) 370#' punbalancedness(df_d1) # c(0.868, 0.887) 371#' 372#' # example for a nested panel structure with a third index variable 373#' # specifying a group (states are grouped by region) and without grouping 374#' data("Produc", package = "plm") 375#' punbalancedness(Produc, index = c("state", "year", "region")) 376#' punbalancedness(Produc, index = c("state", "year")) 377#' 378#' @rdname punbalancedness 379#' @export 380punbalancedness <- function(x, ...) { 381 UseMethod("punbalancedness") 382} 383 384 385punbalancedness.default <- function(x, ...) { 386 387 ii <- index(x) 388 if(!is.index(ii)) stop("no valid index found for input object 'x'") 389 390 if (ncol(ii) == 2L) { 391 ## original Ahrens/Pincus (1981) 392 pdim <- pdim(x, ...) 393 N <- pdim$nT$n # no. of individuals 394 Totalobs <- pdim$nT$N # no. of total observations 395 Ti <- pdim$Tint$Ti 396 Tavg <- sum(Ti)/N 397 398 r1 <- N / (Tavg * sum(1/Ti)) 399 r2 <- 1 / (N * (sum( (Ti/Totalobs)^2))) 400 result <- c(gamma = r1, nu = r2) 401 } else { 402 if (ncol(ii) == 3L) { 403 ## extension to nested model with additional group variable 404 ## Baltagi/Song/Jung (2001), pp. 368-369 405 ii <- unclass(ii) # unclass for speed 406 ids <- ii[[1L]] 407 tss <- ii[[2L]] 408 gps <- ii[[3L]] 409 Tis <- unique(data.frame(tss, gps)) 410 Tis <- table(Tis$gps) # no of max time periods per group 411 Nis <- unique(data.frame(ids, gps)) 412 Nis <- table(Nis$gps) # no of individuals per group 413 M <- length(unique(gps)) # no of unique groups 414 Nbar <- sum(Nis)/M 415 Tbar <- sum(Tis)/M 416 417 c1 <- M / (Nbar * sum(1/Nis)) 418 c2 <- M / (Tbar * sum(1/Tis)) 419 c3 <- M / (sum(Nis * Tis)/M * sum(1/(Nis*Tis))) 420 result <- (c(c1 = c1, c2 = c2, c3 = c3)) 421 } else stop(paste0("unsupported number of dimensions: ", ncol(ii))) 422 } 423 return(result) 424} 425 426#' @rdname punbalancedness 427#' @export 428punbalancedness.pdata.frame <- function(x, ...) { 429 punbalancedness.default(x, ...) 430} 431 432#' @rdname punbalancedness 433#' @export 434punbalancedness.data.frame <- function(x, index = NULL, ...) { 435 x <- pdata.frame(x, index = index, ...) 436 punbalancedness.default(x, ...) 437} 438 439#' @rdname punbalancedness 440#' @export 441punbalancedness.panelmodel <- function(x, ...) { 442 punbalancedness.default(x, ...) 443} 444 445 446 447myvar <- function(x){ 448 ## non-exported 449 x.na <- is.na(x) 450 if(anyNA(x.na)) x <- x[!x.na] 451 n <- length(x) 452 453 if(n <= 1L) { 454 if(n == 0L) z <- NA 455 if(n == 1L) z <- 0 456 } else { 457 z <- if(!(is.factor(x) || is.character(x))) var(x) 458 else !all(duplicated(x)[-1L]) 459 } 460 z 461} 462 463 464 465#' Check for Cross-Sectional and Time Variation 466#' 467#' This function checks for each variable of a panel if it varies 468#' cross-sectionally and over time. 469#' 470#' For (p)data.frame and matrix interface: All-NA columns are removed 471#' prior to calculation of variation due to coercing to pdata.frame 472#' first. 473#' 474#' @aliases pvar 475#' @param x a `(p)data.frame` or a `matrix`, 476#' @param index see [pdata.frame()], 477#' @param \dots further arguments. 478#' @return An object of class `pvar` containing the following 479#' elements: 480#' 481#' \item{id.variation}{a logical vector with `TRUE` values if the 482#' variable has individual variation, `FALSE` if not,} 483#' 484#' \item{time.variation}{a logical vector with `TRUE` values if the 485#' variable has time variation, `FALSE` if not,} 486#' 487#' \item{id.variation_anyNA}{a logical vector with `TRUE` values if 488#' the variable has at least one individual-time combination with all 489#' NA values in the individual dimension for at least one time period, 490#' `FALSE` if not,} 491#' 492#' \item{time.variation_anyNA}{a logical vector with `TRUE` values if 493#' the variable has at least one individual-time combination with all 494#' NA values in the time dimension for at least one individual, 495#' `FALSE` if not.} 496#' 497#' @note `pvar` can be time consuming for ``big'' panels. As a fast alternative 498#' [collapse::varying()] from package \CRANpkg{collapse} could be used. 499#' @export 500#' @author Yves Croissant 501#' @seealso [pdim()] to check the dimensions of a 'pdata.frame' (and 502#' other objects), 503#' @keywords attribute 504#' @examples 505#' 506#' 507#' # Gasoline contains two variables which are individual and time 508#' # indexes and are the first two variables 509#' data("Gasoline", package = "plm") 510#' pvar(Gasoline) 511#' 512#' # Hedonic is an unbalanced panel, townid is the individual index; 513#' # the drop.index argument is passed to pdata.frame 514#' data("Hedonic", package = "plm") 515#' pvar(Hedonic, "townid", drop.index = TRUE) 516#' 517#' # same using pdata.frame 518#' Hed <- pdata.frame(Hedonic, "townid", drop.index = TRUE) 519#' pvar(Hed) 520#' 521#' # Gasoline with pvar's matrix interface 522#' Gasoline_mat <- as.matrix(Gasoline) 523#' pvar(Gasoline_mat) 524#' pvar(Gasoline_mat, index=c("country", "year")) 525#' 526pvar <- function(x, ...){ 527 UseMethod("pvar") 528} 529 530pvar.default <- function(x, id, time, ...){ 531 name.var <- names(x) 532 len <- length(x) 533 time.variation <- rep(TRUE, len) 534 id.variation <- rep(TRUE, len) 535 time.variation_anyNA <- rep(FALSE, len) 536 id.variation_anyNA <- rep(FALSE, len) 537 lid <- split(x, id) # these split() functions seem particularly slow 538 ltime <- split(x, time) 539 if(is.list(x)){ 540 if(len == 1L){ 541 # time variation 542 temp_time.var <- sapply(lid, function(x) sapply(x, myvar)) 543 temp_time.var_sumNoVar <- sum(temp_time.var == 0, na.rm = TRUE) # number of non-varying id-time comb. (without all NA groups) 544 temp_time.var_sumNA <- sum(is.na(temp_time.var)) # number of all-NA groups 545 temp_time.varResult <- temp_time.var_sumNoVar + temp_time.var_sumNA 546 time.variation <- temp_time.varResult != length(lid) # no variation if (no. non-varying + no. all-NA) == number of groups 547 time.variation_anyNA <- temp_time.var_sumNA > 0 # indicates if at least one id-time comb is all NA 548 549 # id variation 550 temp_id.var <- sapply(ltime, function(x) sapply(x, myvar)) 551 temp_id.var_sumNoVar <- sum(temp_id.var == 0, na.rm = TRUE) 552 temp_id.var_sumNA <- sum(is.na(temp_id.var)) 553 temp_id.varResult <- temp_id.var_sumNoVar + temp_id.var_sumNA 554 id.variation <- temp_id.varResult != length(ltime) 555 id.variation_anyNA <- temp_id.var_sumNA > 0 556 } 557 else{ 558 # time variation 559 temp_time.var <- sapply(lid, function(x) sapply(x, myvar)) 560 temp_time.var_sumNoVar <- apply(temp_time.var == 0, 1, sum, na.rm = TRUE) 561 temp_time.var_sumNA <- apply(is.na(temp_time.var), 1, sum) 562 temp_time.varResult <- temp_time.var_sumNoVar + temp_time.var_sumNA 563 time.variation <- temp_time.varResult != length(lid) 564 time.variation_anyNA <- temp_time.var_sumNA > 0 565 566 # id variation 567 temp_id.var <- sapply(ltime, function(x) sapply(x, myvar)) 568 temp_id.var_sumNoVar <- apply(temp_id.var == 0, 1, sum, na.rm = TRUE) 569 temp_id.var_sumNA <- apply(is.na(temp_id.var), 1, sum) 570 temp_id.varResult <- temp_id.var_sumNoVar + temp_id.var_sumNA 571 id.variation <- temp_id.varResult != length(ltime) 572 id.variation_anyNA <- temp_id.var_sumNA > 0 573 } 574 } 575 else{ # not a list (not a data.frame, pdata.frame) - try our best for that unknown data structure 576 # time variation 577 temp_time.var <- sapply(lid, function(x) sapply(x, myvar)) 578 temp_time.var_sumNoVar <- sum(temp_time.var == 0, na.rm = TRUE) 579 temp_time.var_sumNA <- sum(is.na(temp_time.var)) 580 temp_time.varResult <- temp_time.var_sumNoVar + temp_time.var_sumNA 581 time.variation <- temp_time.varResult != length(lid) 582 time.variation_anyNA <- temp_time.var_sumNA > 0 583 584 # id variation 585 temp_id.var <- sapply(ltime, function(x) sapply(x, myvar)) 586 temp_id.var_sumNoVar <- sum(temp_id.var == 0, na.rm = TRUE) 587 temp_id.var_sumNA <- sum(is.na(temp_id.var)) 588 temp_id.varResult <- temp_id.var_sumNoVar + temp_id.var_sumNA 589 id.variation <- temp_id.varResult != length(ltime) 590 id.variation_anyNA <- temp_id.var_sumNA > 0 591 } 592 593 # make 'pvar' object 594 names(id.variation) <- names(time.variation) <- names(id.variation_anyNA) <- names(time.variation_anyNA) <- name.var 595 dim.var <- list(id.variation = id.variation, 596 time.variation = time.variation, 597 id.variation_anyNA = id.variation_anyNA, 598 time.variation_anyNA = time.variation_anyNA) 599 class(dim.var) <- "pvar" 600 return(dim.var) 601} 602 603#' @rdname pvar 604#' @export 605pvar.matrix <- function(x, index = NULL, ...){ 606 x <- pdata.frame(as.data.frame(x), index, ...) 607 pvar(x) 608} 609 610#' @rdname pvar 611#' @export 612pvar.data.frame <- function(x, index = NULL, ...){ 613 x <- pdata.frame(x, index, ...) 614 pvar(x) 615} 616 617#' @rdname pvar 618#' @export 619pvar.pdata.frame <- function(x, ...){ 620 index <- unclass(attr(x, "index")) # unclass for speed 621 pvar.default(x, index[[1L]], index[[2L]]) 622} 623 624#' @rdname pvar 625#' @export 626pvar.pseries <- function(x, ...){ 627 # use drop.index = TRUE so that the index columns' 628 # variations are not evaluated: 629 pdfx <- pseries2pdataframe(x, drop.index = TRUE) 630 pvar.pdata.frame(pdfx) 631} 632 633#' @rdname pvar 634#' @export 635print.pvar <- function(x, ...){ 636 varnames <- names(x$time.variation) 637 if(any(!x$time.variation)){ 638 var <- varnames[x$time.variation == FALSE] 639 # if (!is.null(y)) var <- var[-which(var==y$id)] 640 if(length(var)!=0) cat(paste("no time variation: ", paste(var,collapse=" "),"\n")) 641 } 642 if(any(!x$id.variation)){ 643 var <- varnames[x$id.variation == FALSE] 644 # if (!is.null(y)) var <- var[-which(var==y$time)] 645 if(length(var)!=0) cat(paste("no individual variation:", paste(var,collapse=" "),"\n")) 646 } 647 648 # any individual-time combinations all NA? 649 if(any(x$time.variation_anyNA)){ 650 var_anyNA <- varnames[x$time.variation_anyNA] 651 if(length(var_anyNA)!=0) cat(paste("all NA in time dimension for at least one individual: ", paste(var_anyNA,collapse=" "),"\n")) 652 } 653 if(any(x$id.variation_anyNA)){ 654 var_anyNA <- varnames[x$id.variation_anyNA] 655 if(length(var_anyNA)!=0) cat(paste("all NA in ind. dimension for at least one time period:", paste(var_anyNA,collapse=" "),"\n")) 656 } 657 invisible(x) 658} 659 660 661