1% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/xgb.plot.shap.R
3\name{xgb.plot.shap}
4\alias{xgb.plot.shap}
5\title{SHAP contribution dependency plots}
6\usage{
7xgb.plot.shap(
8  data,
9  shap_contrib = NULL,
10  features = NULL,
11  top_n = 1,
12  model = NULL,
13  trees = NULL,
14  target_class = NULL,
15  approxcontrib = FALSE,
16  subsample = NULL,
17  n_col = 1,
18  col = rgb(0, 0, 1, 0.2),
19  pch = ".",
20  discrete_n_uniq = 5,
21  discrete_jitter = 0.01,
22  ylab = "SHAP",
23  plot_NA = TRUE,
24  col_NA = rgb(0.7, 0, 1, 0.6),
25  pch_NA = ".",
26  pos_NA = 1.07,
27  plot_loess = TRUE,
28  col_loess = 2,
29  span_loess = 0.5,
30  which = c("1d", "2d"),
31  plot = TRUE,
32  ...
33)
34}
35\arguments{
36\item{data}{data as a \code{matrix} or \code{dgCMatrix}.}
37
38\item{shap_contrib}{a matrix of SHAP contributions that was computed earlier for the above
39\code{data}. When it is NULL, it is computed internally using \code{model} and \code{data}.}
40
41\item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
42feature importance is calculated, and \code{top_n} high ranked features are taken.}
43
44\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
45
46\item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
47or \code{features} is missing.}
48
49\item{trees}{passed to \code{\link{xgb.importance}} when \code{features = NULL}.}
50
51\item{target_class}{is only relevant for multiclass models. When it is set to a 0-based class index,
52only SHAP contributions for that specific class are used.
53If it is not set, SHAP importances are averaged over all classes.}
54
55\item{approxcontrib}{passed to \code{\link{predict.xgb.Booster}} when \code{shap_contrib = NULL}.}
56
57\item{subsample}{a random fraction of data points to use for plotting. When it is NULL,
58it is set so that up to 100K data points are used.}
59
60\item{n_col}{a number of columns in a grid of plots.}
61
62\item{col}{color of the scatterplot markers.}
63
64\item{pch}{scatterplot marker.}
65
66\item{discrete_n_uniq}{a maximal number of unique values in a feature to consider it as discrete.}
67
68\item{discrete_jitter}{an \code{amount} parameter of jitter added to discrete features' positions.}
69
70\item{ylab}{a y-axis label in 1D plots.}
71
72\item{plot_NA}{whether the contributions of cases with missing values should also be plotted.}
73
74\item{col_NA}{a color of marker for missing value contributions.}
75
76\item{pch_NA}{a marker type for NA values.}
77
78\item{pos_NA}{a relative position of the x-location where NA values are shown:
79\code{min(x) + (max(x) - min(x)) * pos_NA}.}
80
81\item{plot_loess}{whether to plot loess-smoothed curves. The smoothing is only done for features with
82more than 5 distinct values.}
83
84\item{col_loess}{a color to use for the loess curves.}
85
86\item{span_loess}{the \code{span} parameter in \code{\link[stats]{loess}}'s call.}
87
88\item{which}{whether to do univariate or bivariate plotting. NOTE: only 1D is implemented so far.}
89
90\item{plot}{whether a plot should be drawn. If FALSE, only a list of matrices is returned.}
91
92\item{...}{other parameters passed to \code{plot}.}
93}
94\value{
95In addition to producing plots (when \code{plot=TRUE}), it silently returns a list of two matrices:
96\itemize{
97 \item \code{data} the values of selected features;
98 \item \code{shap_contrib} the contributions of selected features.
99}
100}
101\description{
102Visualizing the SHAP feature contribution to prediction dependencies on feature value.
103}
104\details{
105These scatterplots represent how SHAP feature contributions depend of feature values.
106The similarity to partial dependency plots is that they also give an idea for how feature values
107affect predictions. However, in partial dependency plots, we usually see marginal dependencies
108of model prediction on feature value, while SHAP contribution dependency plots display the estimated
109contributions of a feature to model prediction for each individual case.
110
111When \code{plot_loess = TRUE} is set, feature values are rounded to 3 significant digits and
112weighted LOESS is computed and plotted, where weights are the numbers of data points
113at each rounded value.
114
115Note: SHAP contributions are shown on the scale of model margin. E.g., for a logistic binomial objective,
116the margin is prediction before a sigmoidal transform into probability-like values.
117Also, since SHAP stands for "SHapley Additive exPlanation" (model prediction = sum of SHAP
118contributions for all features + bias), depending on the objective used, transforming SHAP
119contributions for a feature from the marginal to the prediction space is not necessarily
120a meaningful thing to do.
121}
122\examples{
123
124data(agaricus.train, package='xgboost')
125data(agaricus.test, package='xgboost')
126
127bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
128               eta = 0.1, max_depth = 3, subsample = .5,
129               method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
130
131xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
132contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
133xgb.plot.shap(agaricus.test$data, contr, model = bst, top_n = 12, n_col = 3)
134xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12)  # Summary plot
135
136# multiclass example - plots for each class separately:
137nclass <- 3
138nrounds <- 20
139x <- as.matrix(iris[, -5])
140set.seed(123)
141is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
142mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
143                max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
144                objective = "multi:softprob", num_class = nclass, verbose = 0)
145trees0 <- seq(from=0, by=nclass, length.out=nrounds)
146col <- rgb(0, 0, 1, 0.5)
147xgb.plot.shap(x, model = mbst, trees = trees0, target_class = 0, top_n = 4,
148              n_col = 2, col = col, pch = 16, pch_NA = 17)
149xgb.plot.shap(x, model = mbst, trees = trees0 + 1, target_class = 1, top_n = 4,
150              n_col = 2, col = col, pch = 16, pch_NA = 17)
151xgb.plot.shap(x, model = mbst, trees = trees0 + 2, target_class = 2, top_n = 4,
152              n_col = 2, col = col, pch = 16, pch_NA = 17)
153xgb.ggplot.shap.summary(x, model = mbst, target_class = 0, top_n = 4)  # Summary plot
154
155}
156\references{
157Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
158
159Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles", \url{https://arxiv.org/abs/1706.06060}
160}
161