1% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/xgb.train.R, R/xgboost.R
3\name{xgb.train}
4\alias{xgb.train}
5\alias{xgboost}
6\title{eXtreme Gradient Boosting Training}
7\usage{
8xgb.train(
9  params = list(),
10  data,
11  nrounds,
12  watchlist = list(),
13  obj = NULL,
14  feval = NULL,
15  verbose = 1,
16  print_every_n = 1L,
17  early_stopping_rounds = NULL,
18  maximize = NULL,
19  save_period = NULL,
20  save_name = "xgboost.model",
21  xgb_model = NULL,
22  callbacks = list(),
23  ...
24)
25
26xgboost(
27  data = NULL,
28  label = NULL,
29  missing = NA,
30  weight = NULL,
31  params = list(),
32  nrounds,
33  verbose = 1,
34  print_every_n = 1L,
35  early_stopping_rounds = NULL,
36  maximize = NULL,
37  save_period = NULL,
38  save_name = "xgboost.model",
39  xgb_model = NULL,
40  callbacks = list(),
41  ...
42)
43}
44\arguments{
45\item{params}{the list of parameters. The complete list of parameters is
46  available in the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. Below
47  is a shorter summary:
48
491. General Parameters
50
51\itemize{
52  \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}.
53}
54
552. Booster Parameters
56
572.1. Parameters for Tree Booster
58
59\itemize{
60  \item \code{eta} control the learning rate: scale the contribution of each tree by a factor of \code{0 < eta < 1} when it is added to the current approximation. Used to prevent overfitting by making the boosting process more conservative. Lower value for \code{eta} implies larger value for \code{nrounds}: low \code{eta} value means model more robust to overfitting but slower to compute. Default: 0.3
61  \item \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.
62  \item \code{max_depth} maximum depth of a tree. Default: 6
63  \item \code{min_child_weight} minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1
64  \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1
65  \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
66  \item \code{lambda} L2 regularization term on weights. Default: 1
67  \item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
68  \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through XGBoost (set \code{colsample_bytree < 1}, \code{subsample  < 1}  and \code{round = 1}) accordingly. Default: 1
69  \item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
70  \item \code{interaction_constraints} A list of vectors specifying feature indices of permitted interactions. Each item of the list represents one permitted interaction where specified features are allowed to interact with each other. Feature index values should start from \code{0} (\code{0} references the first column).  Leave argument unspecified for no interaction constraints.
71}
72
732.2. Parameters for Linear Booster
74
75\itemize{
76  \item \code{lambda} L2 regularization term on weights. Default: 0
77  \item \code{lambda_bias} L2 regularization term on bias. Default: 0
78  \item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
79}
80
813. Task Parameters
82
83\itemize{
84\item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
85  \itemize{
86    \item \code{reg:squarederror} Regression with squared loss (Default).
87    \item \code{reg:squaredlogerror}: regression with squared log loss \eqn{1/2 * (log(pred + 1) - log(label + 1))^2}. All inputs are required to be greater than -1. Also, see metric rmsle for possible issue with this objective.
88    \item \code{reg:logistic} logistic regression.
89    \item \code{reg:pseudohubererror}: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.
90    \item \code{binary:logistic} logistic regression for binary classification. Output probability.
91    \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
92    \item \code{binary:hinge}: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
93    \item \code{count:poisson}: Poisson regression for count data, output mean of Poisson distribution. \code{max_delta_step} is set to 0.7 by default in poisson regression (used to safeguard optimization).
94    \item \code{survival:cox}: Cox regression for right censored survival time data (negative values are considered right censored). Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function \code{h(t) = h0(t) * HR)}.
95    \item \code{survival:aft}: Accelerated failure time model for censored survival time data. See \href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details.
96    \item \code{aft_loss_distribution}: Probability Density Function used by \code{survival:aft} and \code{aft-nloglik} metric.
97    \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class - 1}.
98    \item \code{multi:softprob} same as softmax, but prediction outputs a vector of ndata * nclass elements, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
99    \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
100    \item \code{rank:ndcg}: Use LambdaMART to perform list-wise ranking where \href{https://en.wikipedia.org/wiki/Discounted_cumulative_gain}{Normalized Discounted Cumulative Gain (NDCG)} is maximized.
101    \item \code{rank:map}: Use LambdaMART to perform list-wise ranking where \href{https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Mean_average_precision}{Mean Average Precision (MAP)} is maximized.
102    \item \code{reg:gamma}: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be \href{https://en.wikipedia.org/wiki/Gamma_distribution#Applications}{gamma-distributed}.
103    \item \code{reg:tweedie}: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be \href{https://en.wikipedia.org/wiki/Tweedie_distribution#Applications}{Tweedie-distributed}.
104  }
105  \item \code{base_score} the initial prediction score of all instances, global bias. Default: 0.5
106  \item \code{eval_metric} evaluation metrics for validation data. Users can pass a self-defined function to it. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
107}}
108
109\item{data}{training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
110\code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.}
111
112\item{nrounds}{max number of boosting iterations.}
113
114\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
115Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
116of these datasets during each boosting iteration, and stored in the end as a field named
117\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
118\code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
119printed out during the training.
120E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
121the performance of each round's model on mat1 and mat2.}
122
123\item{obj}{customized objective function. Returns gradient and second order
124gradient with given prediction and dtrain.}
125
126\item{feval}{customized evaluation function. Returns
127\code{list(metric='metric-name', value='metric-value')} with given
128prediction and dtrain.}
129
130\item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.
131If 2, some additional information will be printed out.
132Note that setting \code{verbose > 0} automatically engages the
133\code{cb.print.evaluation(period=1)} callback function.}
134
135\item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
136Default is 1 which means all messages are printed. This parameter is passed to the
137\code{\link{cb.print.evaluation}} callback.}
138
139\item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered.
140If set to an integer \code{k}, training with a validation set will stop if the performance
141doesn't improve for \code{k} rounds.
142Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
143
144\item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set,
145then this parameter must be set as well.
146When it is \code{TRUE}, it means the larger the evaluation score the better.
147This parameter is passed to the \code{\link{cb.early.stop}} callback.}
148
149\item{save_period}{when it is non-NULL, model is saved to disk after every \code{save_period} rounds,
1500 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.}
151
152\item{save_name}{the name or path for periodically saved model file.}
153
154\item{xgb_model}{a previously built model to continue the training from.
155Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a
156file with a previously saved model.}
157
158\item{callbacks}{a list of callback functions to perform various task during boosting.
159See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
160parameters' values. User can provide either existing or their own callback methods in order
161to customize the training process.}
162
163\item{...}{other parameters to pass to \code{params}.}
164
165\item{label}{vector of response values. Should not be provided when data is
166a local data file name or an \code{xgb.DMatrix}.}
167
168\item{missing}{by default is set to NA, which means that NA values should be considered as 'missing'
169by the algorithm. Sometimes, 0 or other extreme value might be used to represent missing values.
170This parameter is only used when input is a dense matrix.}
171
172\item{weight}{a vector indicating the weight for each row of the input.}
173}
174\value{
175An object of class \code{xgb.Booster} with the following elements:
176\itemize{
177  \item \code{handle} a handle (pointer) to the xgboost model in memory.
178  \item \code{raw} a cached memory dump of the xgboost model saved as R's \code{raw} type.
179  \item \code{niter} number of boosting iterations.
180  \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
181        first column corresponding to iteration number and the rest corresponding to evaluation
182        metrics' values. It is created by the \code{\link{cb.evaluation.log}} callback.
183  \item \code{call} a function call.
184  \item \code{params} parameters that were passed to the xgboost library. Note that it does not
185        capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
186  \item \code{callbacks} callback functions that were either automatically assigned or
187        explicitly passed.
188  \item \code{best_iteration} iteration number with the best evaluation metric value
189        (only available with early stopping).
190  \item \code{best_score} the best evaluation metric value during early stopping.
191        (only available with early stopping).
192  \item \code{feature_names} names of the training dataset features
193        (only when column names were defined in training data).
194  \item \code{nfeatures} number of features in training data.
195}
196}
197\description{
198\code{xgb.train} is an advanced interface for training an xgboost model.
199The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
200}
201\details{
202These are the training functions for \code{xgboost}.
203
204The \code{xgb.train} interface supports advanced features such as \code{watchlist},
205customized objective and evaluation metric functions, therefore it is more flexible
206than the \code{xgboost} interface.
207
208Parallelization is automatically enabled if \code{OpenMP} is present.
209Number of threads can also be manually specified via \code{nthread} parameter.
210
211The evaluation metric is chosen automatically by XGBoost (according to the objective)
212when the \code{eval_metric} parameter is not provided.
213User may set one or several \code{eval_metric} parameters.
214Note that when using a customized metric, only this single metric can be used.
215The following is the list of built-in metrics for which XGBoost provides optimized implementation:
216  \itemize{
217     \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
218     \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
219     \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
220     \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
221           By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
222           Different threshold (e.g., 0.) could be specified as "error@0."
223     \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
224     \item \code{mae} Mean absolute error
225     \item \code{mape} Mean absolute percentage error
226     \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
227     \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
228     \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
229  }
230
231The following callbacks are automatically created when certain parameters are set:
232\itemize{
233  \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
234        and the \code{print_every_n} parameter is passed to it.
235  \item \code{cb.evaluation.log} is on when \code{watchlist} is present.
236  \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
237  \item \code{cb.save.model}: when \code{save_period > 0} is set.
238}
239}
240\examples{
241data(agaricus.train, package='xgboost')
242data(agaricus.test, package='xgboost')
243
244dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
245dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
246watchlist <- list(train = dtrain, eval = dtest)
247
248## A simple xgb.train example:
249param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
250              objective = "binary:logistic", eval_metric = "auc")
251bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
252
253
254## An xgb.train example where custom objective and evaluation metric are used:
255logregobj <- function(preds, dtrain) {
256   labels <- getinfo(dtrain, "label")
257   preds <- 1/(1 + exp(-preds))
258   grad <- preds - labels
259   hess <- preds * (1 - preds)
260   return(list(grad = grad, hess = hess))
261}
262evalerror <- function(preds, dtrain) {
263  labels <- getinfo(dtrain, "label")
264  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
265  return(list(metric = "error", value = err))
266}
267
268# These functions could be used by passing them either:
269#  as 'objective' and 'eval_metric' parameters in the params list:
270param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
271              objective = logregobj, eval_metric = evalerror)
272bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
273
274#  or through the ... arguments:
275param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
276bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
277                 objective = logregobj, eval_metric = evalerror)
278
279#  or as dedicated 'obj' and 'feval' parameters of xgb.train:
280bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
281                 obj = logregobj, feval = evalerror)
282
283
284## An xgb.train example of using variable learning rates at each iteration:
285param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
286              objective = "binary:logistic", eval_metric = "auc")
287my_etas <- list(eta = c(0.5, 0.1))
288bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
289                 callbacks = list(cb.reset.parameters(my_etas)))
290
291## Early stopping:
292bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
293                 early_stopping_rounds = 3)
294
295## An 'xgboost' interface example:
296bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
297               max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
298               objective = "binary:logistic")
299pred <- predict(bst, agaricus.test$data)
300
301}
302\references{
303Tianqi Chen and Carlos Guestrin, "XGBoost: A Scalable Tree Boosting System",
30422nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016, \url{https://arxiv.org/abs/1603.02754}
305}
306\seealso{
307\code{\link{callbacks}},
308\code{\link{predict.xgb.Booster}},
309\code{\link{xgb.cv}}
310}
311