1\name{crq} 2\alias{crq} 3\alias{crq.fit.por} 4\alias{crq.fit.por2} 5\alias{crq.fit.pow} 6\alias{crq.fit.pen} 7\alias{print.crq} 8\alias{print.crq} 9\alias{coef.crq} 10\alias{predict.crq} 11\alias{predict.crqs} 12\alias{Curv} 13\title{Functions to fit censored quantile regression models} 14\description{Fits a conditional quantile regression model for censored data. There 15are three distinct methods: the first is the fixed censoring method 16of Powell (1986) as implemented by Fitzenberger (1996), the second is the random 17censoring method of Portnoy (2003). The third method is based on Peng and Huang (2008).} 18\usage{ 19crq(formula, taus, data, subset, weights, na.action, 20 method = c("Powell", "Portnoy", "Portnoy2", "PengHuang"), contrasts = NULL, ...) 21crq.fit.pow(x, y, yc, tau=0.5, weights=NULL, start, left=TRUE, maxit = 500) 22crq.fit.pen(x, y, cen, weights=NULL, grid, ctype = "right") 23crq.fit.por(x, y, cen, weights=NULL, grid, ctype = "right") 24crq.fit.por2(x, y, cen, weights=NULL, grid, ctype = "right") 25Curv(y, yc, ctype=c("left","right")) 26\method{print}{crq}(x, ...) 27\method{print}{crq}(x, ...) 28\method{predict}{crq}(object, newdata, ...) 29\method{predict}{crqs}(object, newdata, type = NULL, ...) 30\method{coef}{crq}(object,taus = 1:4/5,...) 31} 32\arguments{ 33 \item{formula}{A formula object, with the response on the left of the `~' 34 operator, and the terms on the right. The response must be a 35 \code{Surv} object as returned by either the \code{Curv} or \code{Surv} 36 function. For the Powell method, the Surv object should 37 be created by \code{Curv} and have arguments (event time, censoring time,type), 38 where "type" can take values either "left" or "right". 39 The default (for historical reasons) for type in this case is "left". 40 For the Portnoy and Peng and Huang methods the \code{Surv} should be created 41 with the usual \code{Surv} function and have (event time, censoring indicator).} 42 \item{y}{The event time.} 43 \item{newdata}{An optional data frame in which to look for variables with which 44 to predict. If omitted, the fitted values are used.} 45 \item{grid}{A vector of taus on which the quantile process should be evaluated. This 46 should be monotonic, and take values in (0,1). For the "Portnoy" method, 47 grid = "pivot" computes the full solution for all distinct taus. The "Portnoy" 48 method also enforces an equally spaced grid, see the code for details.} 49 \item{x}{An object of class \code{crq} or \code{crq}.} 50 \item{object}{An object of class \code{crq} or \code{crq}.} 51 \item{yc}{The censoring times for the "Powell" method.} 52 \item{ctype}{Censoring type: for the "Powell" method, used in \code{Curv}, by 53 default "left". If you don't like "left", maybe you will like "right". 54 Note that for fixed censoring assumed in the "Powell" method, censoring 55 times \code{yc} must be provided for all observations and the event 56 times \code{y} must satisfy the (respective) inequality constraints. 57 For the Portnoy and Peng-Huang methods ctype is determined by the 58 specification of the response as specified in \code{Surv}. 59 } 60 \item{type}{specifies either "left" or "right" as the form of censoring 61 in the \code{Surv} function for the "Portnoy" and "PengHuang" methods.} 62 \item{cen}{The censoring indicator for the "Portnoy" and "PengHuang" methods.} 63 \item{maxit}{Maximum number of iterations allowed for the "Powell" methods.} 64 \item{start}{The starting value for the coefs for the "Powell" method. Because 65 the Fitzenberger algorithm stops when it achieves a local minimum 66 of the Powell objective function, the starting value acts as an 67 a priori "preferred point". This is advantageous in some instances 68 since the global Powell solution can be quite extreme. By default the 69 starting value is the "naive rq" solution that treats all the censored 70 observations as uncensored. If \code{start} is equal to "global" 71 then an attempt is made to compute to global optimum of the Powell 72 objective. This entails an exhaustive evaluation of all n choose p 73 distinct basic solution so is rather impractical for moderately large 74 problems. Otherwise, the starting value can specify a set of p indices 75 from 1:n defining an initial basic solution, or it may specify a p-vector 76 of initial regression coefficients. In the latter case the initial basic 77 solution is the one closest to the specified parameter vector.} 78 \item{left}{A logical indicator for left censoring for the "Powell" method.} 79 \item{taus}{The quantile(s) at which the model is to be estimated.} 80 \item{tau}{The quantile at which the model is to be estimated.} 81 \item{data}{A data.frame in which to interpret the variables named in the 82 `formula', in the `subset', and the `weights' argument.} 83 \item{subset}{an optional vector specifying a subset of observations to be 84 used in the fitting process.} 85 \item{weights}{vector of observation weights; if supplied, the algorithm 86 fits to minimize the sum of the weights multiplied into the 87 absolute residuals. The length of weights vector must be the same as 88 the number of observations. The weights must be nonnegative 89 and it is strongly recommended that they be strictly 90 positive, since zero weights are ambiguous.} 91 \item{na.action}{a function to filter missing data. This is applied to the 92 model.frame after any subset argument has been used. The 93 default (with 'na.fail') is to create an error if any missing 94 values are found. A possible alternative is 'na.omit', 95 which deletes observations that contain one or more missing 96 values. } 97 \item{method}{The method used for fitting. There are currently 98 two options: method "Powell" computes the Powell estimator using 99 the algorithm of Fitzenberger (1996), method "Portnoy" computes the 100 Portnoy (2003) estimator. The method is "PengHuang" uses the method 101 of Peng and Huang (2007), in this case the variable "grid" 102 can be passed to specify the vector of quantiles at which the solution 103 is desired.} 104 \item{contrasts}{a list giving contrasts for some or all of the factors 105 default = 'NULL' appearing in the model formula. The 106 elements of the list should have the same name as the 107 variable and should be either a contrast matrix 108 (specifically, any full-rank matrix with as many rows as 109 there are levels in the factor), or else a function to 110 compute such a matrix given the number of levels.} 111 \item{...}{additional arguments for the fitting routine, for method "Powell" 112 it may be useful to pass starting values of the regression parameter 113 via the argument "start", while for methods "Portnoy" or "PengHuang" 114 one may wish to specify an alternative to the default grid for evaluating 115 the fit.} 116} 117\details{The Fitzenberger algorithm uses a variant of the Barrodale and Roberts 118 simplex method. Exploiting the fact that the solution must be characterized 119 by an exact fit to p points when there are p parameters to be estimated, 120 at any trial basic solution it computes the directional derivatives in the 121 2p distinct directions 122 and choses the direction that (locally) gives steepest descent. It then 123 performs a one-dimensional line search to choose the new basic observation 124 and continues until it reaches a local mimumum. By default it starts at 125 the naive \code{rq} solution ignoring the censoring; this has the (slight) 126 advantage that the estimator is consequently equivariant to canonical 127 transformations of the data. Since the objective function is no longer convex 128 there can be no guarantee that this produces a global minimum estimate. 129 In small problems exhaustive search over solutions defined by p-element 130 subsets of the n observations can be used, but this quickly becomes 131 impractical for large p and n. This global version of the Powell 132 estimator can be invoked by specifying \code{start = "global"}. Users 133 interested in this option would be well advised to compute \code{choose(n,p)} 134 for their problems before trying it. The method operates by pivoting 135 through this many distinct solutions and choosing the one that gives the 136 minimal Powell objective. The algorithm used for the Portnoy 137 method is described in considerable detail in Portnoy (2003). 138 There is a somewhat simplified version of the Portnoy method that is 139 written in R and iterates over a discrete grid. This version should 140 be considered somewhat experimental at this stage, but it is known to 141 avoid some difficulties with the more complicated fortran version of 142 the algorithm that can occur in degenerate problems. 143 Both the Portnoy and Peng-Huang estimators may be unable to compute 144 estimates of the conditional quantile parameters in the upper tail of 145 distribution. Like the Kaplan-Meier estimator, when censoring is heavy 146 in the upper tail the estimated distribution is defective and quantiles 147 are only estimable on a sub-interval of (0,1). 148 The Peng and Huang estimator can be 149 viewed as a generalization of the Nelson Aalen estimator of the cumulative 150 hazard function, and can be formulated as a variant of the conventional 151 quantile regression dual problem. See Koenker (2008) for further details. 152 This paper is available from the package with \code{vignette("crq")}.} 153\value{An object of class \code{crq}.} 154 155\references{ 156 157Fitzenberger, B. (1996): ``A Guide to Censored Quantile 158Regressions,'' in \emph{Handbook of Statistics}, ed. by C.~Rao, and 159G.~Maddala. North-Holland: New York. 160 161Fitzenberger, B. and P. Winker (2007): ``Improving the Computation of 162Censored Quantile Regression Estimators,'' CSDA, 52, 88-108. 163 164Koenker, R. (2008): ``Censored Quantile Regression Redux,'' \emph{J. 165Statistical Software}, 27, \url{https://www.jstatsoft.org/v27/i06}. 166 167Peng, L and Y Huang, (2008) Survival Analysis with Quantile Regression Models, 168\emph{J. Am. Stat. Assoc.}, 103, 637-649. 169 170Portnoy, S. (2003) ``Censored Quantile Regression,'' \emph{JASA}, 17198,1001-1012. 172 173Powell, J. (1986) ``Censored Regression Quantiles,'' \emph{J. 174Econometrics}, 32, 143--155. 175} 176 177 178\author{Steve Portnoy and Roger Koenker} 179 180\seealso{\code{\link{summary.crq}}} 181 182\examples{ 183# An artificial Powell example 184set.seed(2345) 185x <- sqrt(rnorm(100)^2) 186y <- -0.5 + x +(.25 + .25*x)*rnorm(100) 187plot(x,y, type="n") 188s <- (y > 0) 189points(x[s],y[s],cex=.9,pch=16) 190points(x[!s],y[!s],cex=.9,pch=1) 191yLatent <- y 192y <- pmax(0,y) 193yc <- rep(0,100) 194for(tau in (1:4)/5){ 195 f <- crq(Curv(y,yc) ~ x, tau = tau, method = "Pow") 196 xs <- sort(x) 197 lines(xs,pmax(0,cbind(1,xs)\%*\%f$coef),col="red") 198 abline(rq(y ~ x, tau = tau), col="blue") 199 abline(rq(yLatent ~ x, tau = tau), col="green") 200 } 201legend(.15,2.5,c("Naive QR","Censored QR","Omniscient QR"), 202 lty=rep(1,3),col=c("blue","red","green")) 203 204# crq example with left censoring 205set.seed(1968) 206n <- 200 207x <-rnorm(n) 208y <- 5 + x + rnorm(n) 209plot(x,y,cex = .5) 210c <- 4 + x + rnorm(n) 211d <- (y > c) 212points(x[!d],y[!d],cex = .5, col = 2) 213f <- crq(survival::Surv(pmax(y,c), d, type = "left") ~ x, method = "Portnoy") 214g <- summary(f) 215for(i in 1:4) abline(coef(g[[i]])[,1]) 216} 217 218\keyword{survival} 219\keyword{regression} 220