1require(xgboost) 2require(methods) 3 4train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = FALSE) 5test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = FALSE) 6train = train[,-1] 7test = test[,-1] 8 9y = train[,ncol(train)] 10y = gsub('Class_','',y) 11y = as.integer(y)-1 # xgboost take features in [0,numOfClass) 12 13x = rbind(train[,-ncol(train)],test) 14x = as.matrix(x) 15x = matrix(as.numeric(x),nrow(x),ncol(x)) 16trind = 1:length(y) 17teind = (nrow(train)+1):nrow(x) 18 19# Set necessary parameter 20param <- list("objective" = "multi:softprob", 21 "eval_metric" = "mlogloss", 22 "num_class" = 9, 23 "nthread" = 8) 24 25# Run Cross Validation 26cv.nrounds = 50 27bst.cv = xgb.cv(param=param, data = x[trind,], label = y, 28 nfold = 3, nrounds=cv.nrounds) 29 30# Train the model 31nrounds = 50 32bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nrounds) 33 34# Make prediction 35pred = predict(bst,x[teind,]) 36pred = matrix(pred,9,length(pred)/9) 37pred = t(pred) 38 39# Output submission 40pred = format(pred, digits=2,scientific=F) # shrink the size of submission 41pred = data.frame(1:nrow(pred),pred) 42names(pred) = c('id', paste0('Class_',1:9)) 43write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE) 44