1# tuning random forest over mtry parameter in parallel
2
3library(foreach)
4library(randomForest)
5
6# a simple iterator over different values for the mtry argument
7mtryiter <- function(from, to, stepFactor=2) {
8  nextEl <- function() {
9    if (from > to) stop('StopIteration')
10    i <- from
11    from <<- ceiling(from * stepFactor)
12    i
13  }
14  obj <- list(nextElem=nextEl)
15  class(obj) <- c('abstractiter', 'iter')
16  obj
17}
18
19# vector of ntree values that we're interested in
20vntree <- c(25, 50, 100, 200, 500, 1000)
21
22# function that gets random forest error information for different values of mtry
23tune <- function(x, y, ntree=vntree, mtry=NULL, keep.forest=FALSE, ...) {
24  comb <- if (is.factor(y))
25    function(a, b) rbind(a, data.frame(ntree=ntree, mtry=b$mtry, error=b$err.rate[ntree, 1]))
26  else
27    function(a, b) rbind(a, data.frame(ntree=ntree, mtry=b$mtry, error=b$mse[ntree]))
28
29  foreach(mtry=mtryiter(1, ncol(x)), .combine=comb, .init=NULL,
30          .packages='randomForest') %dopar% {
31    randomForest(x, y, ntree=max(ntree), mtry=mtry, keep.forest=FALSE, ...)
32  }
33}
34
35# generate the inputs
36x <- matrix(runif(2000), 100)
37y <- gl(2, 50)
38
39# execute randomForest
40results <- tune(x, y)
41
42# print the result
43print(results)
44