1require(xgboost)
2# load in the agaricus dataset
3data(agaricus.train, package = 'xgboost')
4data(agaricus.test, package = 'xgboost')
5dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
6dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
7
8# note: for customized objective function, we leave objective as default
9# note: what we are getting is margin value in prediction
10# you must know what you are doing
11watchlist <- list(eval = dtest, train = dtrain)
12num_round <- 2
13
14# user define objective function, given prediction, return gradient and second order gradient
15# this is log likelihood loss
16logregobj <- function(preds, dtrain) {
17  labels <- getinfo(dtrain, "label")
18  preds <- 1 / (1 + exp(-preds))
19  grad <- preds - labels
20  hess <- preds * (1 - preds)
21  return(list(grad = grad, hess = hess))
22}
23
24# user defined evaluation function, return a pair metric_name, result
25# NOTE: when you do customized loss function, the default prediction value is margin
26# this may make builtin evaluation metric not function properly
27# for example, we are doing logistic loss, the prediction is score before logistic transformation
28# the builtin evaluation error assumes input is after logistic transformation
29# Take this in mind when you use the customization, and maybe you need write customized evaluation function
30evalerror <- function(preds, dtrain) {
31  labels <- getinfo(dtrain, "label")
32  err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
33  return(list(metric = "error", value = err))
34}
35
36param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
37              objective = logregobj, eval_metric = evalerror)
38print ('start training with user customized objective')
39# training with customized objective, we can also do step by step training
40# simply look at xgboost.py's implementation of train
41bst <- xgb.train(param, dtrain, num_round, watchlist)
42
43#
44# there can be cases where you want additional information
45# being considered besides the property of DMatrix you can get by getinfo
46# you can set additional information as attributes if DMatrix
47
48# set label attribute of dtrain to be label, we use label as an example, it can be anything
49attr(dtrain, 'label') <- getinfo(dtrain, 'label')
50# this is new customized objective, where you can access things you set
51# same thing applies to customized evaluation function
52logregobjattr <- function(preds, dtrain) {
53  # now you can access the attribute in customized function
54  labels <- attr(dtrain, 'label')
55  preds <- 1 / (1 + exp(-preds))
56  grad <- preds - labels
57  hess <- preds * (1 - preds)
58  return(list(grad = grad, hess = hess))
59}
60param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
61              objective = logregobjattr, eval_metric = evalerror)
62print ('start training with user customized objective, with additional attributes in DMatrix')
63# training with customized objective, we can also do step by step training
64# simply look at xgboost.py's implementation of train
65bst <- xgb.train(param, dtrain, num_round, watchlist)
66