plm/R/est_ldv.R

#' Panel estimators for limited dependent variables
#'
#' Fixed and random effects estimators for truncated or censored
#' limited dependent variable
#'
#' `pldv` computes two kinds of models: a LSQ/LAD estimator for the
#' first-difference model (`model = "fd"`) and a maximum likelihood estimator
#' with an assumed normal distribution for the individual effects
#' (`model = "random"` or `"pooling"`).
#'
#' For maximum-likelihood estimations, `pldv` uses internally function
#' [maxLik::maxLik()] (from package \CRANpkg{maxLik}).
#'
#' @aliases pldv
#' @param formula a symbolic description for the model to be
#'     estimated,
#' @param data a `data.frame`,
#' @param subset see `lm`,
#' @param weights see `lm`,
#' @param na.action see `lm`,
#' @param model one of `"fd"`, `"random"`, or `"pooling"`,
#' @param index the indexes, see [pdata.frame()],
#' @param R the number of points for the gaussian quadrature,
#' @param start a vector of starting values,
#' @param lower the lower bound for the censored/truncated dependent
#'     variable,
#' @param upper the upper bound for the censored/truncated dependent
#'     variable,
#' @param objfun the objective function for the fixed effect model (`model = "fd"`,
#'     irrelevant for other values of the `model` argument ):
#'     one of `"lsq"` for least squares (minimise sum of squares of the residuals)
#'     and `"lad"` for least absolute deviations (minimise sum of absolute values
#'     of the residuals),
#' @param sample `"cens"` for a censored (tobit-like) sample,
#'     `"trunc"` for a truncated sample,
#' @param \dots further arguments.
#' @return For `model = "fd"`, an object of class `c("plm", "panelmodel")`, for
#'  `model = "random"` and `model = "pooling"` an object of class `c("maxLik", "maxim")`.
#'
#' @export
#' @importFrom maxLik maxLik
#' @author Yves Croissant
#' @references
#'
#' \insertRef{HONO:92}{plm}
#'
#' @keywords regression
#' @examples
#' ## as these examples take a bit of time, do not run them automatically
#' \dontrun{
#' data("Donors", package = "pder")
#' library("plm")
#' pDonors <- pdata.frame(Donors, index = "id")
#'
#' # replicate Landry/Lange/List/Price/Rupp (2010), online appendix, table 5a, models A and B
#' modA <- pldv(donation ~ treatment +  prcontr, data = pDonors,
#'             model = "random", method = "bfgs")
#' summary(modA)
#' modB <- pldv(donation ~ treatment * prcontr - prcontr, data = pDonors,
#'             model = "random", method = "bfgs")
#' summary(modB)
#' }
#'
#
# TODO: check if argument method = "bfgs" is needed in example (and why)
#   -> seems strange as it is no direct argument of pldv

pldv <- function(formula, data, subset, weights, na.action,
                 model = c("fd", "random", "pooling"), index = NULL,
                 R = 20, start = NULL, lower = 0, upper = +Inf,
                 objfun = c("lsq", "lad"), sample = c("cens", "trunc"), ...){

## Due to the eval() construct with maxLik::maxLik we import maxLik::maxLik
## and re-export it via NAMESPACE as plm::maxLik with a minimal documentation
## pointing to the original documentation.
## This way, we can keep the flexibility of eval() [evalutate in parent frame]
## and can lessen the dependency burden by placing pkg maxLik in 'Imports'
## rather than 'Depends' in DESCRIPTION.

    # use the plm interface to compute the model.frame
    sample <- match.arg(sample)
    model <- match.arg(model)
    cl <- match.call()
    mf <- match.call(expand.dots = FALSE)
    mf <- cl
    m <- match(c("formula", "data", "subset", "weights", "na.action", "index"), names(mf), 0)
    mf <- mf[c(1L, m)]
    mf$model <- NA
    mf[[1L]] <- as.name("plm")
    mf <- eval(mf, parent.frame())
    formula <- attr(mf, "formula")

    # extract the relevant arguments for maxLik
    maxl <- cl
    m <- match(c("print.level", "ftol", "tol", "reltol",
                 "gradtol", "steptol", "lambdatol", "qrtol",
                 "iterlim", "fixed", "activePar", "method"), names(maxl), 0)
    maxl <- maxl[c(1L, m)]
    maxl[[1L]] <- as.name("maxLik")

    # The within model -> Bo Honore (1992)
    if (model == "fd"){
        objfun <- match.arg(objfun)
        # create a data.frame containing y_t and y_{t-1}
        y <- as.character(formula[[2L]])
        y <- mf[[y]]
        ly <- c(NA, y[1:(length(y) - 1)])
        id <- as.integer(index(mf, "id"))
        lid <- c(NA, id[1:(nrow(mf) - 1)])
        keep <- id == lid
        keep[1L] <- FALSE
        Y <- data.frame(y, ly)
        Y <- Y[keep, ]
        yt <- Y$y
        ytm1 <- Y$ly
        # create the matrix of first differenced covariates
        X <- model.matrix(mf, model = "fd")
        start <- rep(.1, ncol(X))
        names(start) <- colnames(X)
        if (sample == "trunc"){
            if (objfun == "lad") fm <- function(x) abs(x)
            if (objfun == "lsq") fm <- function(x) x ^ 2
            psi <- function(a1, a2, b){
                fm( (a2 <= b) * a1 +
                    (b > - a1 & b < a2) * (a2 - a1 - b) +
                    (a1 <= - b) * a2
                   )
            }
        }
        if (sample == "cens"){
            if (objfun == "lad"){
                psi <- function(a1, a2, b){
                    (a1 <= pmax(0, - b) & a2 <= pmax(0, b)) * 0 +
                        (! (a1 <= pmax(0, - b) & a2 <= pmax(0, b)) ) * abs(a2 - a1 - b)
                }
            }
            if (objfun == "lsq"){
                psi <- function(a1, a2, b){
                    (a2 <= b) * (a1 ^ 2 - 2 * a1 * (a2 - b)) +
                        (b > - a1 & b < a2) * (a2 - a1 - b) ^ 2 +
                        (a1 <= - b) * (a2 ^ 2 - 2 * a2 * (b + a1))
                }
            }
        }
        BO <- function(param){
            bdx <- as.numeric(X %*% param)
            lnl <- - psi(ytm1, yt, bdx)
            selobs <- (bdx > - ytm1 & bdx < yt)
            if (objfun == "lsq" && sample == "cens"){
                attr(lnl, "gradient") <- -
                    ( (ytm1 > - bdx & yt > bdx) * (- 2 * (yt - ytm1 - bdx)) +
                      (ytm1 > - bdx & yt < bdx) * (  2 * ytm1) +
                      (ytm1 < - bdx & yt > bdx) * (- 2 * yt) ) * X
                attr(lnl, "hessian") <-  - crossprod( (ytm1 > - bdx & yt > bdx) * X)
            }
            lnl
        }
        maxl[c("logLik", "start")] <- list(BO, start)
        result <- eval(maxl, parent.frame())
        if (objfun == "lsq" && sample == "cens"){
            bdx <- as.numeric((crossprod(t(X), coef(result))))
            V4 <- yt ^ 2 * (bdx <= - ytm1) + ytm1 ^ 2 * (yt <= bdx) +
                (yt - ytm1 - bdx) ^ 2 * (bdx > - ytm1 & bdx < yt)
            V4 <- crossprod(X, V4 * X) / length(V4)
            T4 <- crossprod((bdx > - ytm1 & bdx < yt) * X, X) / length(V4)
            solve_T4 <- solve(T4)
            vcov <- solve_T4 %*% V4 %*% solve_T4
            result$vcov <- V4
        }
        if (is.null(result$vcov)) result$vcov <- solve(- result$hessian)
        resid <- yt - as.numeric(crossprod(t(X), coef(result)))
        result <- list(coefficients = coef(result),
                       vcov         = result$vcov,
                       formula      = formula,
                       model        = mf,
                       df.residual  = nrow(X) - ncol(X),
                       residuals    = resid,
                       args         = list(model = "fd", effect = "individual"),
                       call         = cl)
        class(result) <- c("plm", "panelmodel")
    }
    else{ # model != "fd" => cases model = "random" / "pooling"

        # old pglm stuff for the pooling and the random model, with
        # update to allow upper and lower bonds
        X <- model.matrix(mf, rhs = 1, model = "pooling", effect = "individual")

        if (ncol(X) == 0L) stop("empty model")
        y <- pmodel.response(mf, model = "pooling", effect = "individual")
        id <- attr(mf, "index")[[1L]]

          # The following is the only instance of statmod::gauss.quad, so check for
          # the package's availability. (We placed 'statmod' in 'Suggests' rather
          # than 'Imports' so that it is not an absolutely required dependency.)
          ## Procedure for pkg check for pkg in 'Suggests' as recommended in
          ## Wickham, R packages (http://r-pkgs.had.co.nz/description.html).
          if (!requireNamespace("statmod", quietly = TRUE)) {
            stop(paste("Function 'gauss.quad' from package 'statmod' needed for this function to work.",
                       "Please install it, e.g., with 'install.packages(\"statmod\")"),
                 call. = FALSE)
          }
        # compute the nodes and the weights for the gaussian quadrature
        rn <- statmod::gauss.quad(R, kind = 'hermite')
        # compute the starting values
        ls <- length(start)
        if (model == "pooling"){
            K <- ncol(X)
            if (! ls %in% c(0, K + 1)) stop("irrelevant length for the start vector")
            if (ls == 0L){
                m <- match(c("formula", "data", "subset", "na.action"), names(cl), 0)
                lmcl <- cl[c(1,m)]
                lmcl[[1L]] <- as.name("lm")
                lmcl <- eval(lmcl, parent.frame()) # eval stats::lm()
                sig2 <- deviance(lmcl) / df.residual(lmcl)
                sigma <- sqrt(sig2)
                start <- c(coef(lmcl), sd.nu = sigma)
            }
        }
        else{ # case model != "pooling" and != "fd" => model ="random"
            if (ls <= 1L){
                startcl <- cl
                startcl$model <- "pooling"
                startcl$method <- "bfgs"
                pglmest <- eval(startcl, parent.frame()) # eval pldv() with updated args
                thestart <- coef(pglmest)
                if (ls == 1L){
                    start <- c(thestart, start)
                }
                else{
                   # case ls = 0
                    resid <- y -  as.numeric(tcrossprod(X, t(coef(pglmest)[1:ncol(X)])))
                    eta <- tapply(resid, id, mean)[as.character(id)]
                    nu <- resid - eta
                    start <- c(thestart[1:ncol(X)], sd.nu = sd(nu), sd.eta = sd(eta))
                }
            }
        }
        # call to maxLik with the relevant arguments
        argschar <- function(args){
            paste(as.character(names(args)), as.character(args),
                  sep= "=", collapse= ",")
        }
        args <- list(param = "start",
                     y = "y", X = "X", id = "id", model = "model",
                     rn = "rn", lower = lower, upper = upper)
        thefunc <- paste("function(start) lnl.tobit", "(", argschar(args), ")", sep = "")
        maxl$logLik <- eval(parse(text = thefunc))
        maxl$start <- start
        result <- eval(maxl, parent.frame())
        result[c('call', 'args', 'model')] <- list(cl, args, data)
    } # end cases model = "random" / "pooling"
    result
}


lnl.tobit <- function(param, y, X, id, lower = 0, upper = +Inf, model = "pooling", rn = NULL){
    compute.gradient <- TRUE
    compute.hessian <- FALSE
    mills <- function(x) exp(dnorm(x, log = TRUE) - pnorm(x, log.p = TRUE))
    O <- length(y)
    K <- ncol(X)
    beta <- param[1L:K]
    sigma <- param[K + 1L]
    Xb <- as.numeric(crossprod(t(X), beta))
    YLO <- (y == lower)
    YUT <- (y > lower) & (y < upper)
    YUP <- y == upper
    if (model == "random"){
        R <- length(rn$nodes)
        seta <- param[K + 2L]
    }
    else seta <- 0

    f <- function(i = NA){
        result <- numeric(length = length(y))
        if (is.na(i)) z <- 0 else z <- rn$nodes[i]
        e <- (y - Xb - sqrt(2) * seta * z) / sigma
        result[YLO] <- pnorm(  e[YLO], log.p = TRUE)
        result[YUT] <- dnorm(  e[YUT], log = TRUE) - log(sigma)
        result[YUP] <- pnorm(- e[YUP], log.p = TRUE)
        result
    }

    g <- function(i = NA){
        if (is.na(i)) z <- 0 else z <- rn$nodes[i]
        e <- (y - Xb - sqrt(2) * seta * z) / sigma
        mz <-  mills(e)
        mmz <- mills(- e)
        gradi <- matrix(0, nrow = nrow(X), ncol = ncol(X) + 1L)
        gradi[YLO, 1L:K]   <- - mz[YLO] * X[YLO, , drop = FALSE]
        gradi[YLO, K + 1L] <- -  e[YLO] * mz[YLO]
        gradi[YUT, 1L:K]   <-    e[YUT] * X[YUT, , drop = FALSE]
        gradi[YUT, K + 1L] <- - (1 - e[YUT] ^ 2)
        gradi[YUP, 1L:K]   <-  mmz[YUP] *  X[YUP, , drop = FALSE]
        gradi[YUP, K + 1L] <-    e[YUP] * mmz[YUP]
        if (! is.na(i)){
            gradi <- cbind(gradi, NA)
            gradi[YLO, K + 2L] <- - mz[YLO] * sqrt(2) * z
            gradi[YUT, K + 2L] <-    e[YUT] * sqrt(2) * z
            gradi[YUP, K + 2L] < - mmz[YUP] * sqrt(2) * z
        }
        gradi / sigma
    }

    h <- function(i = NA, pwnt = NULL){
        if (is.na(i)){
            z <- 0
            seta <- 0
            pw <- 1
        }
        else{
            z <- rn$nodes[i]
            pw <- pwnt[[i]]
        }
        e <- (y - Xb - sqrt(2) * seta * z) / sigma
        mz <-  mills(e)
        mmz <- mills(- e)
        hbb <- hbs <- hss <- numeric(length = nrow(X)) # pre-allocate
        hbb[YLO] <- - (e[YLO] + mz[YLO]) * mz[YLO]
        hbs[YLO] <-          mz[YLO] * (1 - (e[YLO] + mz[YLO]) * e[YLO])
        hss[YLO] <- e[YLO] * mz[YLO] * (2 - (e[YLO] + mz[YLO]) * e[YLO])
        hbb[YUT] <- - 1
        hbs[YUT] <- - 2 * e[YUT]
        hss[YUT] <- (1 - 3 * e[YUT] ^ 2)
        hbb[YUP] <- - (- e[YUP] + mmz[YUP]) * mmz[YUP]
        hbs[YUP] <-          - mmz[YUP] * (1 + (mmz[YUP] - e[YUP]) * e[YUP])
        hss[YUP] <- - e[YUP] * mmz[YUP] * (2 + (mmz[YUP] - e[YUP]) * e[YUP])
        hbb <- crossprod(hbb * X * pw, X)
        hbs <- apply(hbs * X * pw, 2, sum) # TODO: can use colSums -> faster
        hss <- sum(hss * pw)
        H <- rbind(cbind(hbb, hbs), c(hbs, hss))
        if (! is.na(i)){
            hba <- hsa <- haa <- numeric(length = nrow(X))
            hba[YLO] <- - (e[YLO] + mz[YLO]) * mz[YLO] * sqrt(2) * z
            hsa[YLO] <-   mz[YLO] * sqrt(2) * z * (1 - (e[YLO] + mz[YLO]) * e[YLO])
            haa[YLO] <- - (e[YLO] + mz[YLO]) * mz[YLO] * 2 * z ^ 2
            hba[YUT] <- - sqrt(2) * z
            hsa[YUT] <- - 2 * sqrt(2) * z * e[YUT]
            haa[YUT] <- - 2 * z ^ 2
            hba[YUP] <- - (- e[YUP] + mmz[YUP]) * mmz[YUP] * sqrt(2) * z
            hsa[YUP] <- - mmz[YUP] * sqrt(2) * z * (1 + (- e[YUP] + mmz[YUP]) * e[YUP])
            haa[YUP] <- - (- e[YUP] + mmz[YUP]) * mmz[YUP] * 2 * z ^ 2
            hba <- apply(hba * X * pw, 2, sum) # TODO: can use colSums -> faster
            haa <- sum(haa * pw)
            hsa <- sum(hsa * pw)
            H <- rbind(cbind(H, c(hba, hsa)), c(hba, hsa, haa))
        }
        H / sigma ^ 2
    }

    if (model == "pooling"){
        lnL <- sum(f(i = NA))
        if (compute.gradient) attr(lnL, "gradient") <- g(i = NA)
        if (compute.hessian) attr(lnL, "hessian") <- h(i = NA)
    }
    if (model == "random"){
        lnPntr <- lapply(1:R, function(i)  f(i = i))
        lnPnr <- lapply(lnPntr, function(x){
            result <- tapply(x, id, sum)
            ids <- names(result)
            result <- as.numeric(result)
            names(result) <- ids
            result
        }
        )
        lnPn <- lapply(1:R, function(i) rn$weights[i] * exp(lnPnr[[i]]))
        lnPn <- log(Reduce("+", lnPn)) - 0.5 * log(pi)
        lnL <- sum(lnPn)
        if (compute.gradient || compute.hessian){
            glnPnr  <- lapply(1:R, function(i) g(i = i))
            pwn     <- lapply(1:R, function(i) exp(lnPnr[[i]] - lnPn))
            pwnt    <- lapply(1:R, function(i) pwn[[i]][as.character(id)])
            glnPnr2 <- lapply(1:R, function(i) rn$weights[i] * pwnt[[i]]  * glnPnr[[i]])
            gradi <- Reduce("+", glnPnr2) / sqrt(pi)
            attr(lnL, "gradient") <- gradi
        }
        if (compute.hessian){
            hlnPnr <- lapply(1:R, function(i) h(i = i, pwnt = pwnt))
            daub <- lapply(1:R, function(i) apply(glnPnr[[i]], 2, tapply, id, sum) * pwn[[i]] * rn$weights[i])
            daub <- Reduce("+", daub) / sqrt(pi)
            DD1 <- - crossprod(daub)
            DD2 <- lapply(1:R, function(i) rn$weights[i] * hlnPnr[[i]])
            DD2 <- Reduce("+", DD2) / sqrt(pi)
            DD3 <- lapply(1:R, function(i) rn$weights[i] * crossprod(sqrt(pwn[[i]]) * apply(glnPnr[[i]], 2, tapply, id, sum)))
            DD3 <- Reduce("+", DD3) / sqrt(pi)
            H <- (DD1 + DD2 + DD3)
            attr(lnL, "hessian") <- H
        }
    }
    lnL
}