1workhorse.inbagg <- function(object, y, X, W,
2  cFUN, w.training.set, y.training.set, bcontrol, control, ...)
3{
4  formula.list <- object
5  data <- data.frame(y, X, W)
6  mtrees <- vector(mode="list", length=bcontrol$nbagg)
7  if(w.training.set[1] == "all") fit.vals <- 1:length(y)
8
9  for (i in 1:bcontrol$nbagg) {
10    bindx <- sample(1:length(y), bcontrol$ns, replace=bcontrol$replace)
11    if(w.training.set[1] == "oob") fit.vals <- (-bindx)
12    if(w.training.set[1] == "bag") fit.vals <- bindx
13
14    objs <- vector(mode="list", length=length(formula.list))	#prediction models for intermediate variables
15    names(objs) <- names(formula.list)
16
17    addclass <- function() {					##START addclass <- function()
18      for (j in 1:length(formula.list)) {			##Fitting prediction models for intermediates
19        oX <- data[fit.vals, c(paste(formula.list[[j]]$formula[[2]]),
20                   attr(terms(formula.list[[j]]$formula, dataa = data), "term.labels"))]
21        foo <- try(formula.list[[j]]$model(formula.list[[j]]$formula, data = oX))
22        objs[[j]] <- foo
23      }
24
25      fct <- function(newdata) {				##START fct <- function(newdata)
26        if (!is.data.frame(newdata))
27          newdata <- as.data.frame(newdata)
28        add.predictors <- rep(0, nrow(newdata))
29
30        for (j in 1:length(formula.list)){			## predict additional intermediates using fitted models
31          oXnewdata <- newdata[,attr(terms(formula.list[[j]]$formula, data = data), "term.labels")]
32          if(is.null(formula.list[[j]]$predict)) {
33            res <- try(predict(objs[[j]], newdata  = oXnewdata))
34          } else {
35            res <- try(formula.list[[j]]$predict(objs[[j]], newdata  = oXnewdata))
36            }
37###FIX: action for class(res) == "try-error"
38          add.predictors <- data.frame(add.predictors, res)
39        }
40        add.predictors <- add.predictors[,-1]
41        if(is.null(dim(add.predictors))) add.predictors <- matrix(add.predictors, ncol = 1)
42        colnames(add.predictors) <- names(formula.list)
43        add.predictors
44      }
45        					##END fct <- function(newdata)
46      return(fct)
47    }						##END addclass <- function()
48
49
50    bfct <- addclass()				###bfct is a function (addclass)
51
52    if (!is.null(bfct)) {
53      expl.cFUN <- attr(terms(as.formula(cFUN$formula), data = data), "term.labels")
54
55      if(!is.null(cFUN$fixed.function)) {
56         btree <- cFUN
57      } else {
58        W.new <- bfct(X)
59        W.new.names <- sub(".[0-9]$", "", colnames(W.new))
60
61        if(y.training.set[1] == "fitted.bag") {	###contstruct on bag
62          oX <- data.frame(y, X, W.new)[bindx,]
63          right.side <- paste(c(expl.cFUN[!(expl.cFUN %in% W.new.names)], colnames(W.new)[W.new.names %in% expl.cFUN]), collapse = "+")
64          cFUN$formula <- as.formula(paste(cFUN$formula[[2]], "~", right.side))
65        }
66
67        if(y.training.set[1] == "original") {	###construct on original variables
68          if(length(W.new.names)> length(colnames(W))) stop("If classifying function is trained on original intermediate, only one predictive model per intermediate can be constructed.")
69          oX <- data.frame(y, X, W[,W.new.names])
70          names(oX)[(ncol(oX)-ncol(W)+1):ncol(oX)] <- colnames(W.new)
71        }
72
73        if(y.training.set[1] == "fitted.subset") {		###construct on subset
74          oX <- data.frame(y, X, W.new)[!subset,]
75          right.side <- paste(c(expl.cFUN[!(expl.cFUN %in% W.new.names)], colnames(W.new)[W.new.names %in% expl.cFUN]), collapse = "+")
76          cFUN$formula <- as.formula(paste(cFUN$formula[[2]], "~", right.side))
77        }
78        names(oX)[names(oX) == "y"] <- paste(cFUN$formula[[2]])
79        btree <- cFUN$model(cFUN$formula, data = oX, ...)
80        btree <- list(model = btree, predict = cFUN$predict)
81      }
82
83      this <- list(bindx = bindx, btree = btree, bfct=bfct)
84    } else {
85      stop("Predictive function for intermediates not executable: Classifying function can not be applied.")
86    }
87    class(this) <- "thisclass"
88    mtrees[[i]] <- this
89  }
90  mtrees
91}
92
93
94
95
96inbagg <- function(formula, data, ...) UseMethod("inbagg", data)
97
98inbagg.default <- function(formula, data,...)
99{
100  stop(paste("Do not know how to handle objects of class", class(data)))
101}
102
103
104inbagg.data.frame <- function(formula, data, pFUN=NULL,
105 cFUN=list(model = NULL, predict = NULL, training.set = NULL),
106 nbagg = 25, ns = 0.5, replace = FALSE, ...)
107{
108  if(!is.function(cFUN)) {
109    if(is.null(cFUN$model)) {
110    cFUN$model <-  function(formula, data)
111            rpart(formula, data, control = rpart.control(minsplit=2, cp=0, xval=0))
112    if(is.null(cFUN$predict)) cFUN$predict <- function(object, newdata) predict(object, newdata, type = "class")
113    if(is.null(cFUN$training.set))  cFUN$trainig.set <- "fitted.bag"
114    }
115 }
116
117##check formula
118  if(missing(formula)
119    || (length(formula) != 3)
120    || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1))
121    stop("formula missing or incorrect")
122
123  m <- match.call(expand.dots = FALSE)
124  if(is.matrix(eval(m$data, parent.frame())))
125        m$data <- as.data.frame(data)
126
127##editing formula
128  if(length(formula[[2]])==3) {
129    if(!is.function(cFUN)) {
130        if (is.null(cFUN$formula)) y.formula <- as.formula(formula[[2]]) else y.formula <- cFUN$formula
131    }
132
133    w.formula <- XX~YY
134    w.formula[[2]] <- formula[[2]][[3]]
135    w.formula[[3]] <- formula[[3]]
136
137    response <-  paste(formula[[2]][[2]])
138    w.names <- attr(terms(as.formula(formula[[2]]), data = data), "term.labels")
139    x.names <- attr(terms(as.formula(formula), data = data), "term.labels")
140
141    if((length(x.names) == 1) && x.names == ".") x.names <- colnames(data)[!(colnames(data) %in% c(response, w.names))]
142    y <- data[, response]
143    X <- data[, x.names]
144    W <- data[, w.names]
145    if(is.null(dim(X))) X <- matrix(X, ncol = 1, dimnames = list(rownames(W), x.names))
146    if(is.null(dim(W))) W <- matrix(W, ncol = 1, dimnames = list(rownames(X), w.names))
147
148    if(is.function(cFUN)) {
149      y.formula <- as.formula(paste(formula[[2]][[2]], "~", paste(c(x.names, w.names), collapse = "+")))
150      fixed.function <- cFUN
151      cFUN <- list()
152      cFUN$fixed.function <- fixed.function
153    }
154   cFUN$formula <- y.formula
155
156  } else {
157    stop(paste("Specified formula has to be of type y~x~w"))
158  }
159##remove settings of training.set
160  if(is.null(pFUN$training.set)) w.training.set <- "oob" else w.training.set <- pFUN$training.set[1]
161  pFUN$training.set <- NULL
162
163  if(is.null(cFUN$training.set)) y.training.set <- "fitted.bag" else y.training.set <- cFUN$training.set[1]
164  cFUN$training.set <- NULL
165
166  bcontrol <- list(nbagg = nbagg, ns = length(y)*ns, replace = replace)
167
168  if(is.null(w.formula)) stop("no formula for prediction model specified")
169
170  ##formula.list : list of lists which specify an abitrary number of models for intermediate variables:
171  ##w1.1, w2.1, w3.1, ...., w2.1, w2.2, w3.1, .... where 'w*' is the variable and '.*' describes the model
172
173  P <- length(pFUN)
174  number.models <- c()
175  for(i in 1:P) {
176    if(is.null(pFUN[[i]]$formula)) pFUN[[i]]$formula <- w.formula
177    number.models <- c(number.models,
178                       paste(attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels"),
179                             ".", i, sep = ""))
180  }
181
182  formula.list <- vector(mode = "list", length= length(number.models))
183  names(formula.list) <- paste(number.models)
184
185  for(i in 1:P) {
186    res <- list()
187    Qi <- length(attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels"))
188    for(j in 1:Qi) {
189      res$formula <- w.formula
190      res$formula[[2]] <- as.name(attr(terms(res$formula[-3], data = data), "term.labels")[j])
191      res$formula[[3]] <- pFUN[[i]]$formula[[3]]
192
193      if(res$formula[[3]] == ".") res$formula <- as.formula(paste(res$formula[[2]], "~", paste(x.names, collapse= "+")))
194      res$model <- pFUN[[i]]$model
195      res$predict <- pFUN[[i]]$predict
196      formula.list[[paste(res$formula[[2]], ".", i, sep = "")]] <- res
197    }
198  }
199
200##apply
201  res <- workhorse.inbagg(object = formula.list, y = y, X = X, W = W,
202    cFUN = cFUN, w.training.set = w.training.set, y.training.set = y.training.set,
203    bcontrol = bcontrol, ...)
204  RET <- list(mtrees = res, y = y, W = W, X = X)
205  class(RET) <- "inbagg"
206  RET
207}
208
209
210print.inbagg <- function(x, ...)
211{
212  q <- length(x$mtrees)
213  intermediates <- attr(x$W, "names")
214  text.intermediates <- paste("Indirect bagging, with", q,
215    "bootstrap samples and intermediate variables: \n",
216    paste(intermediates, collapse = " "))
217  cat("\n", text.intermediates, "\n")
218}
219
220
221summary.inbagg <- function(object, ...)
222{
223  class(object) <- "summary.inbagg"
224  object
225}
226
227
228print.summary.inbagg <- function(x, ...)
229{
230  q <- length(x$mtrees)
231  intermediates <- attr(x$W, "names")
232
233  text.intermediates <- paste("Indirect bagging, with", q,
234"bootstrap samples and intermediate variables:", paste(intermediates, collapse = " "))
235
236  cat("\n", text.intermediates, "\n")
237  for(i in 1:length(x)) {
238    print(x$mtrees[[i]])
239  }
240}
241
242