1\name{crq}
2\alias{crq}
3\alias{crq.fit.por}
4\alias{crq.fit.por2}
5\alias{crq.fit.pow}
6\alias{crq.fit.pen}
7\alias{print.crq}
8\alias{print.crq}
9\alias{coef.crq}
10\alias{predict.crq}
11\alias{predict.crqs}
12\alias{Curv}
13\title{Functions to fit censored quantile regression models}
14\description{Fits a conditional quantile regression model for censored data. There
15are three distinct methods:  the first is the fixed censoring method
16of Powell (1986) as implemented by Fitzenberger (1996), the second is the random
17censoring method of Portnoy (2003).  The third method is based on Peng and Huang (2008).}
18\usage{
19crq(formula, taus, data, subset, weights, na.action,
20	method = c("Powell", "Portnoy", "Portnoy2", "PengHuang"), contrasts = NULL, ...)
21crq.fit.pow(x, y, yc, tau=0.5, weights=NULL, start, left=TRUE, maxit = 500)
22crq.fit.pen(x, y, cen, weights=NULL, grid, ctype = "right")
23crq.fit.por(x, y, cen, weights=NULL, grid, ctype = "right")
24crq.fit.por2(x, y, cen, weights=NULL, grid, ctype = "right")
25Curv(y, yc, ctype=c("left","right"))
26\method{print}{crq}(x, ...)
27\method{print}{crq}(x, ...)
28\method{predict}{crq}(object, newdata,  ...)
29\method{predict}{crqs}(object, newdata, type = NULL, ...)
30\method{coef}{crq}(object,taus = 1:4/5,...)
31}
32\arguments{
33  \item{formula}{A formula object, with the response on the left of the `~'
34        operator, and the terms on the right.  The response must be a
35        \code{Surv} object as returned by either the \code{Curv} or \code{Surv}
36	function. For the Powell method, the Surv object should
37	be created by \code{Curv} and have arguments (event time, censoring time,type),
38	where "type" can take values either "left" or "right".
39	The default (for historical reasons) for type in this case is "left".
40	For the Portnoy and Peng and Huang  methods the \code{Surv}  should be created
41	with the usual \code{Surv} function and have (event time, censoring indicator).}
42  \item{y}{The event time.}
43  \item{newdata}{An optional data frame in which to look for variables with which
44	to predict. If omitted, the fitted values are used.}
45  \item{grid}{A vector of taus on which the quantile process should be evaluated. This
46	should be monotonic, and take values in (0,1).  For the "Portnoy"  method,
47	grid = "pivot" computes the full solution for all distinct taus.  The "Portnoy"
48	method also enforces an equally spaced grid, see the code for details.}
49  \item{x}{An object of class \code{crq} or \code{crq}.}
50  \item{object}{An object of class \code{crq} or \code{crq}.}
51  \item{yc}{The censoring times for the "Powell" method.}
52  \item{ctype}{Censoring type: for the "Powell" method, used in \code{Curv}, by
53	default "left".  If you don't like "left", maybe you will like "right".
54	Note that for fixed censoring assumed in the "Powell" method, censoring
55	times \code{yc} must be provided for all observations and the event
56	times \code{y} must satisfy the (respective) inequality constraints.
57	For the Portnoy and Peng-Huang methods ctype is determined by the
58	specification of the response as specified in \code{Surv}.
59        }
60  \item{type}{specifies either "left" or "right" as the form of censoring
61	 in the \code{Surv} function for the "Portnoy" and "PengHuang"  methods.}
62  \item{cen}{The censoring indicator for the "Portnoy" and "PengHuang"  methods.}
63  \item{maxit}{Maximum number of iterations allowed for the "Powell" methods.}
64  \item{start}{The starting value for the coefs for the "Powell" method.  Because
65	the Fitzenberger algorithm stops when it achieves a local minimum
66	of the Powell objective function, the starting value acts as an
67	a priori "preferred point".  This is advantageous in some instances
68	since the global Powell solution can be quite extreme. By default the
69	starting value is the "naive rq" solution that  treats all the censored
70	observations as uncensored.  If \code{start} is equal to "global"
71	then an attempt is made to compute to global optimum of the Powell
72	objective.  This entails an exhaustive evaluation of all n choose p
73	distinct basic solution so is rather impractical for moderately large
74	problems. Otherwise, the starting value can specify a set of p indices
75	from 1:n defining an initial basic solution, or it may specify a p-vector
76	of initial regression coefficients.  In the latter case the initial basic
77	solution is the one closest to the specified parameter vector.}
78  \item{left}{A logical indicator for left censoring for the "Powell" method.}
79  \item{taus}{The quantile(s) at which the model is to be estimated.}
80  \item{tau}{The quantile at which the model is to be estimated.}
81  \item{data}{A data.frame in which to interpret the variables named in the
82          `formula',  in the `subset', and the `weights' argument.}
83  \item{subset}{an optional vector specifying a subset of observations to be
84          used in the fitting process.}
85  \item{weights}{vector of observation weights; if supplied, the algorithm
86          fits to minimize the sum of the weights multiplied into the
87          absolute residuals. The length of weights vector must be the same as
88          the number of observations.  The weights must be nonnegative
89          and it is strongly recommended that they be strictly
90          positive, since zero weights are ambiguous.}
91  \item{na.action}{a function to filter missing data.  This is applied to the
92          model.frame after any subset argument has been used.  The
93          default (with 'na.fail') is to create an error if any missing
94          values are   found.  A possible alternative is 'na.omit',
95          which  deletes observations that contain one or more missing
96          values. }
97  \item{method}{The method used for fitting.  There are currently
98	two options: method "Powell" computes the Powell estimator using
99	the algorithm of Fitzenberger (1996), method "Portnoy" computes the
100	Portnoy (2003) estimator.  The  method is "PengHuang" uses the method
101	of Peng and Huang (2007), in this case the variable "grid"
102	can be passed to specify the vector of quantiles at which the solution
103	is desired.}
104  \item{contrasts}{a list giving contrasts for some or all of the factors
105          default = 'NULL' appearing in the model formula.  The
106          elements of the list should have the same name as the
107          variable  and should be either a contrast matrix
108          (specifically, any full-rank  matrix with as many rows as
109          there are levels in the factor),  or else a function to
110          compute such a matrix given the number of levels.}
111  \item{...}{additional arguments for the fitting routine, for method "Powell"
112	it may be useful to pass starting values of the regression parameter
113	via the argument "start", while for methods "Portnoy" or "PengHuang"
114	one may wish to specify an alternative to the default grid for evaluating
115	the fit.}
116}
117\details{The Fitzenberger algorithm uses a variant of the Barrodale and Roberts
118	simplex method.  Exploiting the fact that the solution must be characterized
119	by an exact fit to p points when there are p parameters to be estimated,
120	at any trial basic solution it computes the directional derivatives in the
121	2p distinct directions
122	and choses the direction that (locally) gives steepest descent.  It then
123	performs a one-dimensional line search to choose the new basic observation
124	and continues until it reaches a local mimumum.  By default it starts at
125	the naive \code{rq} solution ignoring the censoring;  this has the (slight)
126	advantage that the estimator is consequently equivariant to canonical
127	transformations of the data.  Since the objective function is no longer convex
128	there can be no guarantee that this  produces a global minimum estimate.
129	In small problems exhaustive search over solutions defined by p-element
130	subsets of the n observations can be used, but this quickly becomes
131	impractical for large p and n.   This global version of the Powell
132	estimator can be invoked by specifying \code{start = "global"}. Users
133	interested in this option would be well advised to compute \code{choose(n,p)}
134	for their problems before trying it.  The method operates by pivoting
135	through this many distinct solutions and choosing the one that gives the
136	minimal Powell objective.  The algorithm used for the Portnoy
137	method is described in considerable detail in Portnoy (2003).
138	There is a somewhat simplified version of the Portnoy method that is
139	written in R and iterates over a discrete grid.  This version should
140	be considered somewhat experimental at this stage, but it is known to
141	avoid some difficulties with the more complicated fortran version of
142	the algorithm that can occur in degenerate problems.
143	Both the Portnoy and Peng-Huang estimators may be unable to compute
144	estimates of the conditional quantile parameters in the upper tail of
145	distribution.  Like the Kaplan-Meier estimator, when censoring is heavy
146	in the upper tail the estimated distribution is defective  and quantiles
147	are only estimable on a sub-interval of (0,1).
148	The Peng and Huang estimator can be
149	viewed as a generalization of the Nelson Aalen estimator of the cumulative
150	hazard function,  and can be formulated as a variant of the conventional
151	quantile regression dual problem.  See Koenker (2008) for further details.
152	This paper is available from the package with \code{vignette("crq")}.}
153\value{An object of class \code{crq}.}
154
155\references{
156
157Fitzenberger, B.  (1996): ``A Guide to Censored Quantile
158Regressions,'' in \emph{Handbook of Statistics}, ed. by C.~Rao,   and
159G.~Maddala. North-Holland: New York.
160
161Fitzenberger, B.  and P. Winker (2007): ``Improving the Computation of
162Censored Quantile Regression Estimators,'' CSDA, 52,  88-108.
163
164Koenker, R. (2008): ``Censored Quantile Regression Redux,'' \emph{J.
165Statistical Software}, 27, \url{https://www.jstatsoft.org/v27/i06}.
166
167Peng, L and Y Huang, (2008) Survival Analysis with Quantile Regression Models,
168\emph{J. Am. Stat. Assoc.}, 103, 637-649.
169
170Portnoy, S. (2003) ``Censored Quantile Regression,'' \emph{JASA},
17198,1001-1012.
172
173Powell, J. (1986) ``Censored Regression Quantiles,'' \emph{J.
174Econometrics}, 32, 143--155.
175}
176
177
178\author{Steve Portnoy and  Roger Koenker}
179
180\seealso{\code{\link{summary.crq}}}
181
182\examples{
183# An artificial Powell example
184set.seed(2345)
185x <- sqrt(rnorm(100)^2)
186y <-  -0.5 + x +(.25 + .25*x)*rnorm(100)
187plot(x,y, type="n")
188s <- (y > 0)
189points(x[s],y[s],cex=.9,pch=16)
190points(x[!s],y[!s],cex=.9,pch=1)
191yLatent <- y
192y <- pmax(0,y)
193yc <- rep(0,100)
194for(tau in (1:4)/5){
195        f <- crq(Curv(y,yc) ~ x, tau = tau, method = "Pow")
196        xs <- sort(x)
197        lines(xs,pmax(0,cbind(1,xs)\%*\%f$coef),col="red")
198        abline(rq(y ~ x, tau = tau), col="blue")
199        abline(rq(yLatent ~ x, tau = tau), col="green")
200        }
201legend(.15,2.5,c("Naive QR","Censored QR","Omniscient QR"),
202        lty=rep(1,3),col=c("blue","red","green"))
203
204# crq example with left censoring
205set.seed(1968)
206n <- 200
207x <-rnorm(n)
208y <- 5 + x + rnorm(n)
209plot(x,y,cex = .5)
210c <- 4 + x + rnorm(n)
211d <- (y > c)
212points(x[!d],y[!d],cex = .5, col = 2)
213f <- crq(survival::Surv(pmax(y,c), d, type = "left") ~ x, method = "Portnoy")
214g <- summary(f)
215for(i in 1:4) abline(coef(g[[i]])[,1])
216}
217
218\keyword{survival}
219\keyword{regression}
220