1########################################################################## 2## tobit regression model 3## 4## This software is distributed under the terms of the GNU GENERAL 5## PUBLIC LICENSE Version 2, June 1991. See the package LICENSE 6## file for more information. 7## 8## Copyright (C) 2003-2007 Andrew D. Martin and Kevin M. Quinn 9## Copyright (C) 2007-present Andrew D. Martin, Kevin M. Quinn, 10## and Jong Hee Park 11########################################################################## 12 13 14#' Markov Chain Monte Carlo for Gaussian Linear Regression with a Censored 15#' Dependent Variable 16#' 17#' This function generates a sample from the posterior distribution of a linear 18#' regression model with Gaussian errors using Gibbs sampling (with a 19#' multivariate Gaussian prior on the beta vector, and an inverse Gamma prior 20#' on the conditional error variance). The dependent variable may be censored 21#' from below, from above, or both. The user supplies data and priors, and a 22#' sample from the posterior distribution is returned as an mcmc object, which 23#' can be subsequently analyzed with functions provided in the coda package. 24#' 25#' \code{MCMCtobit} simulates from the posterior distribution using standard 26#' Gibbs sampling (a multivariate Normal draw for the betas, and an inverse 27#' Gamma draw for the conditional error variance). \code{MCMCtobit} differs 28#' from \code{MCMCregress} in that the dependent variable may be censored from 29#' below, from above, or both. The simulation proper is done in compiled C++ 30#' code to maximize efficiency. Please consult the coda documentation for a 31#' comprehensive list of functions that can be used to analyze the posterior 32#' sample. 33#' 34#' The model takes the following form: 35#' 36#' \deqn{y_i = x_i ' \beta + \varepsilon_{i},} 37#' 38#' where the errors are assumed 39#' to be Gaussian: 40#' 41#' \deqn{\varepsilon_{i} \sim \mathcal{N}(0, \sigma^2).} 42#' 43#' Let \eqn{c_1} and \eqn{c_2} be the two censoring points, and let 44#' \eqn{y_i^\ast} be the partially observed dependent variable. Then, 45#' 46#' \deqn{y_i = y_i^{\ast} \texttt{ if } c_1 < y_i^{\ast} < c_2,} 47#' 48#' \deqn{y_i = c_1 \texttt{ if } c_1 \geq y_i^{\ast},} 49#' 50#' \deqn{y_i = c_2 \texttt{ if } c_2 \leq y_i^{\ast}.} 51#' 52#' We assume standard, semi-conjugate priors: 53#' 54#' \deqn{\beta \sim \mathcal{N}(b_0,B_0^{-1}),} 55#' 56#' and: 57#' 58#' \deqn{\sigma^{-2} \sim \mathcal{G}amma(c_0/2, d_0/2),} 59#' 60#' where \eqn{\beta} and \eqn{\sigma^{-2}} are 61#' assumed \emph{a priori} independent. Note that only starting 62#' values for \eqn{\beta} are allowed because simulation is done 63#' using Gibbs sampling with the conditional error variance as the 64#' first block in the sampler. 65#' 66#' @param formula A model formula. 67#' 68#' @param data A dataframe. 69#' 70#' @param below The point at which the dependent variable is censored from 71#' below. The default is zero. To censor from above only, specify that below = 72#' -Inf. 73#' @param above The point at which the dependent variable is censored from 74#' above. To censor from below only, use the default value of Inf. 75#' 76#' @param burnin The number of burn-in iterations for the sampler. 77#' 78#' @param mcmc The number of MCMC iterations after burnin. 79#' 80#' @param thin The thinning interval used in the simulation. The number of 81#' MCMC iterations must be divisible by this value. 82#' 83#' @param verbose A switch which determines whether or not the progress of the 84#' sampler is printed to the screen. If \code{verbose} is greater than 0 the 85#' iteration number, the \eqn{\beta} vector, and the error variance is 86#' printed to the screen every \code{verbose}th iteration. 87#' 88#' @param seed The seed for the random number generator. If NA, the Mersenne 89#' Twister generator is used with default seed 12345; if an integer is passed 90#' it is used to seed the Mersenne twister. The user can also pass a list of 91#' length two to use the L'Ecuyer random number generator, which is suitable 92#' for parallel computation. The first element of the list is the L'Ecuyer 93#' seed, which is a vector of length six or NA (if NA a default seed of 94#' \code{rep(12345,6)} is used). The second element of list is a positive 95#' substream number. See the MCMCpack specification for more details. 96#' 97#' @param beta.start The starting values for the \eqn{\beta} vector. 98#' This can either be a scalar or a column vector with dimension equal to the 99#' number of betas. The default value of of NA will use the OLS estimate of 100#' \eqn{\beta} as the starting value. If this is a scalar, that value 101#' will serve as the starting value mean for all of the betas. 102#' 103#' @param b0 The prior mean of \eqn{\beta}. This can either be a scalar 104#' or a column vector with dimension equal to the number of betas. If this 105#' takes a scalar value, then that value will serve as the prior mean for all 106#' of the betas. 107#' 108#' @param B0 The prior precision of \eqn{\beta}. This can either be a 109#' scalar or a square matrix with dimensions equal to the number of betas. If 110#' this takes a scalar value, then that value times an identity matrix serves 111#' as the prior precision of beta. Default value of 0 is equivalent to an 112#' improper uniform prior for beta. 113#' 114#' @param c0 \eqn{c_0/2} is the shape parameter for the inverse Gamma 115#' prior on \eqn{\sigma^2} (the variance of the disturbances). The 116#' amount of information in the inverse Gamma prior is something like that from 117#' \eqn{c_0} pseudo-observations. 118#' 119#' @param d0 \eqn{d_0/2} is the scale parameter for the inverse Gamma 120#' prior on \eqn{\sigma^2} (the variance of the disturbances). In 121#' constructing the inverse Gamma prior, \eqn{d_0} acts like the sum of 122#' squared errors from the \eqn{c_0} pseudo-observations. 123#' 124#' @param ... further arguments to be passed 125#' 126#' @return An mcmc object that contains the posterior sample. This object can 127#' be summarized by functions provided by the coda package. 128#' 129#' @export 130#' 131#' @author Ben Goodrich, \email{goodrich.ben@gmail.com}, 132#' \url{http://www.columbia.edu/~bg2382/} 133#' 134#' @seealso \code{\link[coda]{plot.mcmc}}, \code{\link[coda]{summary.mcmc}}, 135#' \code{\link[survival]{survreg}}, \code{\link[MCMCpack]{MCMCregress}} 136#' 137#' @references Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park. 2011. 138#' ``MCMCpack: Markov Chain Monte Carlo in R.'', \emph{Journal of Statistical 139#' Software}. 42(9): 1-21. \doi{10.18637/jss.v042.i09}. 140#' 141#' Daniel Pemstein, Kevin M. Quinn, and Andrew D. Martin. 2007. \emph{Scythe 142#' Statistical Library 1.0.} \url{http://scythe.lsa.umich.edu}. 143#' 144#' Martyn Plummer, Nicky Best, Kate Cowles, and Karen Vines. 2006. ``Output 145#' Analysis and Diagnostics for MCMC (CODA)'', \emph{R News}. 6(1): 7-11. 146#' \url{https://CRAN.R-project.org/doc/Rnews/Rnews_2006-1.pdf}. 147#' 148#' Siddhartha Chib. 1992. ``Bayes inference in the Tobit censored regression 149#' model." \emph{Journal of Econometrics}. 51:79-99. 150#' 151#' James Tobin. 1958. ``Estimation of relationships for limited dependent 152#' variables." \emph{Econometrica.} 26:24-36. 153#' 154#' @keywords models 155#' 156#' @examples 157#' 158#' \dontrun{ 159#' library(survival) 160#' example(tobin) 161#' summary(tfit) 162#' tfit.mcmc <- MCMCtobit(durable ~ age + quant, data=tobin, mcmc=30000, 163#' verbose=1000) 164#' plot(tfit.mcmc) 165#' raftery.diag(tfit.mcmc) 166#' summary(tfit.mcmc) 167#' } 168#' 169"MCMCtobit" <- 170function(formula, data=NULL, below = 0.0, above = Inf, 171 burnin = 1000, mcmc = 10000, 172 thin=1, verbose = 0, seed = NA, beta.start = NA, 173 b0 = 0, B0 = 0, c0 = 0.001, d0 = 0.001, ...) { 174 175 # checks 176 check.offset(list(...)) 177 check.mcmc.parameters(burnin, mcmc, thin) 178 if (!is.numeric(below) | !is.numeric(above)) { 179 cat("Error: Censoring points must be numeric, which includes +-Inf.\n") 180 stop("Please respecify and call ", calling.function(), " again.", 181 call.=FALSE) 182 } 183 if (below >= above) { 184 cat("Error: Truncation points are logically inconsistent.\n") 185 stop("Please respecify and call ", calling.function(), " again.", 186 call.=FALSE) 187 } 188 189 # convert infinite values to finite approximations 190 if(is.infinite(below)) below <- .Machine$double.xmax*-1 191 if(is.infinite(above)) above <- .Machine$double.xmax 192 193 # seeds 194 seeds <- form.seeds(seed) 195 lecuyer <- seeds[[1]] 196 seed.array <- seeds[[2]] 197 lecuyer.stream <- seeds[[3]] 198 199 # form response and model matrices 200 holder <- parse.formula(formula, data=data) 201 Y <- holder[[1]] 202 X <- holder[[2]] 203 xnames <- holder[[3]] 204 K <- ncol(X) # number of covariates 205 206 # starting values and priors 207 beta.start <- coef.start(beta.start, K, formula, family=gaussian, data) 208 mvn.prior <- form.mvn.prior(b0, B0, K) 209 b0 <- mvn.prior[[1]] 210 B0 <- mvn.prior[[2]] 211 check.ig.prior(c0, d0) 212 213 # define holder for posterior sample 214 sample <- matrix(data=0, mcmc/thin, K+1) 215 posterior <- NULL 216 217 # call C++ code to draw sample 218 auto.Scythe.call(output.object="posterior", cc.fun.name="cMCMCtobit", 219 sample.nonconst=sample, Y=Y, X=X, below=as.double(below), 220 above=as.double(above), 221 burnin=as.integer(burnin), mcmc=as.integer(mcmc), 222 thin=as.integer(thin), 223 lecuyer=as.integer(lecuyer), 224 seedarray=as.integer(seed.array), 225 lecuyerstream=as.integer(lecuyer.stream), 226 verbose=as.integer(verbose), betastart=beta.start, 227 b0=b0, B0=B0, c0=as.double(c0), d0=as.double(d0)) 228 229 # pull together matrix and build MCMC object to return 230 output <- form.mcmc.object(posterior, 231 names=c(xnames, "sigma2"), 232 title="MCMCtobit Posterior Sample") 233 return(output) 234 } 235