1# tuning random forest over mtry parameter in parallel 2 3library(foreach) 4library(randomForest) 5 6# a simple iterator over different values for the mtry argument 7mtryiter <- function(from, to, stepFactor=2) { 8 nextEl <- function() { 9 if (from > to) stop('StopIteration') 10 i <- from 11 from <<- ceiling(from * stepFactor) 12 i 13 } 14 obj <- list(nextElem=nextEl) 15 class(obj) <- c('abstractiter', 'iter') 16 obj 17} 18 19# vector of ntree values that we're interested in 20vntree <- c(25, 50, 100, 200, 500, 1000) 21 22# function that gets random forest error information for different values of mtry 23tune <- function(x, y, ntree=vntree, mtry=NULL, keep.forest=FALSE, ...) { 24 comb <- if (is.factor(y)) 25 function(a, b) rbind(a, data.frame(ntree=ntree, mtry=b$mtry, error=b$err.rate[ntree, 1])) 26 else 27 function(a, b) rbind(a, data.frame(ntree=ntree, mtry=b$mtry, error=b$mse[ntree])) 28 29 foreach(mtry=mtryiter(1, ncol(x)), .combine=comb, .init=NULL, 30 .packages='randomForest') %dopar% { 31 randomForest(x, y, ntree=max(ntree), mtry=mtry, keep.forest=FALSE, ...) 32 } 33} 34 35# generate the inputs 36x <- matrix(runif(2000), 100) 37y <- gl(2, 50) 38 39# execute randomForest 40results <- tune(x, y) 41 42# print the result 43print(results) 44