1require(xgboost) 2# load in the agaricus dataset 3data(agaricus.train, package = 'xgboost') 4data(agaricus.test, package = 'xgboost') 5dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) 6dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) 7# note: for customized objective function, we leave objective as default 8# note: what we are getting is margin value in prediction 9# you must know what you are doing 10param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0) 11watchlist <- list(eval = dtest) 12num_round <- 20 13# user define objective function, given prediction, return gradient and second order gradient 14# this is log likelihood loss 15logregobj <- function(preds, dtrain) { 16 labels <- getinfo(dtrain, "label") 17 preds <- 1 / (1 + exp(-preds)) 18 grad <- preds - labels 19 hess <- preds * (1 - preds) 20 return(list(grad = grad, hess = hess)) 21} 22# user defined evaluation function, return a pair metric_name, result 23# NOTE: when you do customized loss function, the default prediction value is margin 24# this may make builtin evaluation metric not function properly 25# for example, we are doing logistic loss, the prediction is score before logistic transformation 26# the builtin evaluation error assumes input is after logistic transformation 27# Take this in mind when you use the customization, and maybe you need write customized evaluation function 28evalerror <- function(preds, dtrain) { 29 labels <- getinfo(dtrain, "label") 30 err <- as.numeric(sum(labels != (preds > 0))) / length(labels) 31 return(list(metric = "error", value = err)) 32} 33print ('start training with early Stopping setting') 34 35bst <- xgb.train(param, dtrain, num_round, watchlist, 36 objective = logregobj, eval_metric = evalerror, maximize = FALSE, 37 early_stopping_round = 3) 38bst <- xgb.cv(param, dtrain, num_round, nfold = 5, 39 objective = logregobj, eval_metric = evalerror, 40 maximize = FALSE, early_stopping_rounds = 3) 41