1\name{partialPlot}
2\alias{partialPlot}
3\alias{partialPlot.default}
4\alias{partialPlot.randomForest}
5
6\title{Partial dependence plot}
7
8\description{
9Partial dependence plot gives a graphical depiction of the marginal
10effect of a variable on the class probability (classification) or
11response (regression).
12}
13
14\usage{
15\method{partialPlot}{randomForest}(x, pred.data, x.var, which.class,
16      w, plot = TRUE, add = FALSE,
17      n.pt = min(length(unique(pred.data[, xname])), 51),
18      rug = TRUE, xlab=deparse(substitute(x.var)), ylab="",
19      main=paste("Partial Dependence on", deparse(substitute(x.var))),
20      ...)
21}
22
23\arguments{
24  \item{x}{an object of class \code{randomForest}, which contains a
25    \code{forest} component.}
26  \item{pred.data}{a data frame used for contructing the plot, usually
27    the training data used to contruct the random forest.}
28  \item{x.var}{name of the variable for which partial
29    dependence is to be examined.}
30  \item{which.class}{For classification data, the class to focus on
31    (default the first class).}
32  \item{w}{weights to be used in averaging; if not supplied, mean is not
33  weighted}
34  \item{plot}{whether the plot should be shown on the graphic device.}
35  \item{add}{whether to add to existing plot (\code{TRUE}).}
36  \item{n.pt}{if \code{x.var} is continuous, the number of points on the
37    grid for evaluating partial dependence.}
38  \item{rug}{whether to draw hash marks at the bottom of the plot
39    indicating the deciles of \code{x.var}.}
40  \item{xlab}{label for the x-axis.}
41  \item{ylab}{label for the y-axis.}
42  \item{main}{main title for the plot.}
43  \item{...}{other graphical parameters to be passed on to \code{plot}
44    or \code{lines}.}
45}
46
47\value{
48A list with two components: \code{x} and \code{y}, which are the values
49used in the plot.
50}
51
52\details{
53  The function being plotted is defined as:
54  \deqn{
55    \tilde{f}(x) = \frac{1}{n} \sum_{i=1}^n f(x, x_{iC}),
56  }
57  where \eqn{x} is the variable for which partial dependence is sought,
58  and \eqn{x_{iC}} is the other variables in the data.  The summand is
59  the predicted regression function for regression, and logits
60  (i.e., log of fraction of votes) for \code{which.class} for
61  classification:
62\deqn{ f(x) = \log p_k(x) - \frac{1}{K} \sum_{j=1}^K \log p_j(x),}
63where \eqn{K} is the number of classes, \eqn{k} is \code{which.class},
64and \eqn{p_j} is the proportion of votes for class \eqn{j}.
65}
66\note{
67  The \code{randomForest} object must contain the \code{forest}
68  component; i.e., created with \code{randomForest(...,
69    keep.forest=TRUE)}.
70
71  This function runs quite slow for large data sets.
72}
73\references{
74Friedman, J. (2001). Greedy function approximation: the gradient
75boosting machine, \emph{Ann. of Stat.}}
76
77\seealso{\code{\link{randomForest}}}
78
79\author{Andy Liaw \email{andy\_liaw@merck.com}}
80
81\examples{
82data(iris)
83set.seed(543)
84iris.rf <- randomForest(Species~., iris)
85partialPlot(iris.rf, iris, Petal.Width, "versicolor")
86
87## Looping over variables ranked by importance:
88data(airquality)
89airquality <- na.omit(airquality)
90set.seed(131)
91ozone.rf <- randomForest(Ozone ~ ., airquality, importance=TRUE)
92imp <- importance(ozone.rf)
93impvar <- rownames(imp)[order(imp[, 1], decreasing=TRUE)]
94op <- par(mfrow=c(2, 3))
95for (i in seq_along(impvar)) {
96    partialPlot(ozone.rf, airquality, impvar[i], xlab=impvar[i],
97                main=paste("Partial Dependence on", impvar[i]),
98                ylim=c(30, 70))
99}
100par(op)
101}
102\keyword{classif}
103\keyword{regression}
104\keyword{tree}
105
106