1require(xgboost)
2require(methods)
3
4train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = FALSE)
5test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = FALSE)
6train = train[,-1]
7test = test[,-1]
8
9y = train[,ncol(train)]
10y = gsub('Class_','',y)
11y = as.integer(y)-1  # xgboost take features in [0,numOfClass)
12
13x = rbind(train[,-ncol(train)],test)
14x = as.matrix(x)
15x = matrix(as.numeric(x),nrow(x),ncol(x))
16trind = 1:length(y)
17teind = (nrow(train)+1):nrow(x)
18
19# Set necessary parameter
20param <- list("objective" = "multi:softprob",
21              "eval_metric" = "mlogloss",
22              "num_class" = 9,
23              "nthread" = 8)
24
25# Run Cross Validation
26cv.nrounds = 50
27bst.cv = xgb.cv(param=param, data = x[trind,], label = y,
28                nfold = 3, nrounds=cv.nrounds)
29
30# Train the model
31nrounds = 50
32bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nrounds)
33
34# Make prediction
35pred = predict(bst,x[teind,])
36pred = matrix(pred,9,length(pred)/9)
37pred = t(pred)
38
39# Output submission
40pred = format(pred, digits=2,scientific=F) # shrink the size of submission
41pred = data.frame(1:nrow(pred),pred)
42names(pred) = c('id', paste0('Class_',1:9))
43write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE)
44