1\name{partialPlot} 2\alias{partialPlot} 3\alias{partialPlot.default} 4\alias{partialPlot.randomForest} 5 6\title{Partial dependence plot} 7 8\description{ 9Partial dependence plot gives a graphical depiction of the marginal 10effect of a variable on the class probability (classification) or 11response (regression). 12} 13 14\usage{ 15\method{partialPlot}{randomForest}(x, pred.data, x.var, which.class, 16 w, plot = TRUE, add = FALSE, 17 n.pt = min(length(unique(pred.data[, xname])), 51), 18 rug = TRUE, xlab=deparse(substitute(x.var)), ylab="", 19 main=paste("Partial Dependence on", deparse(substitute(x.var))), 20 ...) 21} 22 23\arguments{ 24 \item{x}{an object of class \code{randomForest}, which contains a 25 \code{forest} component.} 26 \item{pred.data}{a data frame used for contructing the plot, usually 27 the training data used to contruct the random forest.} 28 \item{x.var}{name of the variable for which partial 29 dependence is to be examined.} 30 \item{which.class}{For classification data, the class to focus on 31 (default the first class).} 32 \item{w}{weights to be used in averaging; if not supplied, mean is not 33 weighted} 34 \item{plot}{whether the plot should be shown on the graphic device.} 35 \item{add}{whether to add to existing plot (\code{TRUE}).} 36 \item{n.pt}{if \code{x.var} is continuous, the number of points on the 37 grid for evaluating partial dependence.} 38 \item{rug}{whether to draw hash marks at the bottom of the plot 39 indicating the deciles of \code{x.var}.} 40 \item{xlab}{label for the x-axis.} 41 \item{ylab}{label for the y-axis.} 42 \item{main}{main title for the plot.} 43 \item{...}{other graphical parameters to be passed on to \code{plot} 44 or \code{lines}.} 45} 46 47\value{ 48A list with two components: \code{x} and \code{y}, which are the values 49used in the plot. 50} 51 52\details{ 53 The function being plotted is defined as: 54 \deqn{ 55 \tilde{f}(x) = \frac{1}{n} \sum_{i=1}^n f(x, x_{iC}), 56 } 57 where \eqn{x} is the variable for which partial dependence is sought, 58 and \eqn{x_{iC}} is the other variables in the data. The summand is 59 the predicted regression function for regression, and logits 60 (i.e., log of fraction of votes) for \code{which.class} for 61 classification: 62\deqn{ f(x) = \log p_k(x) - \frac{1}{K} \sum_{j=1}^K \log p_j(x),} 63where \eqn{K} is the number of classes, \eqn{k} is \code{which.class}, 64and \eqn{p_j} is the proportion of votes for class \eqn{j}. 65} 66\note{ 67 The \code{randomForest} object must contain the \code{forest} 68 component; i.e., created with \code{randomForest(..., 69 keep.forest=TRUE)}. 70 71 This function runs quite slow for large data sets. 72} 73\references{ 74Friedman, J. (2001). Greedy function approximation: the gradient 75boosting machine, \emph{Ann. of Stat.}} 76 77\seealso{\code{\link{randomForest}}} 78 79\author{Andy Liaw \email{andy\_liaw@merck.com}} 80 81\examples{ 82data(iris) 83set.seed(543) 84iris.rf <- randomForest(Species~., iris) 85partialPlot(iris.rf, iris, Petal.Width, "versicolor") 86 87## Looping over variables ranked by importance: 88data(airquality) 89airquality <- na.omit(airquality) 90set.seed(131) 91ozone.rf <- randomForest(Ozone ~ ., airquality, importance=TRUE) 92imp <- importance(ozone.rf) 93impvar <- rownames(imp)[order(imp[, 1], decreasing=TRUE)] 94op <- par(mfrow=c(2, 3)) 95for (i in seq_along(impvar)) { 96 partialPlot(ozone.rf, airquality, impvar[i], xlab=impvar[i], 97 main=paste("Partial Dependence on", impvar[i]), 98 ylim=c(30, 70)) 99} 100par(op) 101} 102\keyword{classif} 103\keyword{regression} 104\keyword{tree} 105 106