1workhorse.inbagg <- function(object, y, X, W, 2 cFUN, w.training.set, y.training.set, bcontrol, control, ...) 3{ 4 formula.list <- object 5 data <- data.frame(y, X, W) 6 mtrees <- vector(mode="list", length=bcontrol$nbagg) 7 if(w.training.set[1] == "all") fit.vals <- 1:length(y) 8 9 for (i in 1:bcontrol$nbagg) { 10 bindx <- sample(1:length(y), bcontrol$ns, replace=bcontrol$replace) 11 if(w.training.set[1] == "oob") fit.vals <- (-bindx) 12 if(w.training.set[1] == "bag") fit.vals <- bindx 13 14 objs <- vector(mode="list", length=length(formula.list)) #prediction models for intermediate variables 15 names(objs) <- names(formula.list) 16 17 addclass <- function() { ##START addclass <- function() 18 for (j in 1:length(formula.list)) { ##Fitting prediction models for intermediates 19 oX <- data[fit.vals, c(paste(formula.list[[j]]$formula[[2]]), 20 attr(terms(formula.list[[j]]$formula, dataa = data), "term.labels"))] 21 foo <- try(formula.list[[j]]$model(formula.list[[j]]$formula, data = oX)) 22 objs[[j]] <- foo 23 } 24 25 fct <- function(newdata) { ##START fct <- function(newdata) 26 if (!is.data.frame(newdata)) 27 newdata <- as.data.frame(newdata) 28 add.predictors <- rep(0, nrow(newdata)) 29 30 for (j in 1:length(formula.list)){ ## predict additional intermediates using fitted models 31 oXnewdata <- newdata[,attr(terms(formula.list[[j]]$formula, data = data), "term.labels")] 32 if(is.null(formula.list[[j]]$predict)) { 33 res <- try(predict(objs[[j]], newdata = oXnewdata)) 34 } else { 35 res <- try(formula.list[[j]]$predict(objs[[j]], newdata = oXnewdata)) 36 } 37###FIX: action for class(res) == "try-error" 38 add.predictors <- data.frame(add.predictors, res) 39 } 40 add.predictors <- add.predictors[,-1] 41 if(is.null(dim(add.predictors))) add.predictors <- matrix(add.predictors, ncol = 1) 42 colnames(add.predictors) <- names(formula.list) 43 add.predictors 44 } 45 ##END fct <- function(newdata) 46 return(fct) 47 } ##END addclass <- function() 48 49 50 bfct <- addclass() ###bfct is a function (addclass) 51 52 if (!is.null(bfct)) { 53 expl.cFUN <- attr(terms(as.formula(cFUN$formula), data = data), "term.labels") 54 55 if(!is.null(cFUN$fixed.function)) { 56 btree <- cFUN 57 } else { 58 W.new <- bfct(X) 59 W.new.names <- sub(".[0-9]$", "", colnames(W.new)) 60 61 if(y.training.set[1] == "fitted.bag") { ###contstruct on bag 62 oX <- data.frame(y, X, W.new)[bindx,] 63 right.side <- paste(c(expl.cFUN[!(expl.cFUN %in% W.new.names)], colnames(W.new)[W.new.names %in% expl.cFUN]), collapse = "+") 64 cFUN$formula <- as.formula(paste(cFUN$formula[[2]], "~", right.side)) 65 } 66 67 if(y.training.set[1] == "original") { ###construct on original variables 68 if(length(W.new.names)> length(colnames(W))) stop("If classifying function is trained on original intermediate, only one predictive model per intermediate can be constructed.") 69 oX <- data.frame(y, X, W[,W.new.names]) 70 names(oX)[(ncol(oX)-ncol(W)+1):ncol(oX)] <- colnames(W.new) 71 } 72 73 if(y.training.set[1] == "fitted.subset") { ###construct on subset 74 oX <- data.frame(y, X, W.new)[!subset,] 75 right.side <- paste(c(expl.cFUN[!(expl.cFUN %in% W.new.names)], colnames(W.new)[W.new.names %in% expl.cFUN]), collapse = "+") 76 cFUN$formula <- as.formula(paste(cFUN$formula[[2]], "~", right.side)) 77 } 78 names(oX)[names(oX) == "y"] <- paste(cFUN$formula[[2]]) 79 btree <- cFUN$model(cFUN$formula, data = oX, ...) 80 btree <- list(model = btree, predict = cFUN$predict) 81 } 82 83 this <- list(bindx = bindx, btree = btree, bfct=bfct) 84 } else { 85 stop("Predictive function for intermediates not executable: Classifying function can not be applied.") 86 } 87 class(this) <- "thisclass" 88 mtrees[[i]] <- this 89 } 90 mtrees 91} 92 93 94 95 96inbagg <- function(formula, data, ...) UseMethod("inbagg", data) 97 98inbagg.default <- function(formula, data,...) 99{ 100 stop(paste("Do not know how to handle objects of class", class(data))) 101} 102 103 104inbagg.data.frame <- function(formula, data, pFUN=NULL, 105 cFUN=list(model = NULL, predict = NULL, training.set = NULL), 106 nbagg = 25, ns = 0.5, replace = FALSE, ...) 107{ 108 if(!is.function(cFUN)) { 109 if(is.null(cFUN$model)) { 110 cFUN$model <- function(formula, data) 111 rpart(formula, data, control = rpart.control(minsplit=2, cp=0, xval=0)) 112 if(is.null(cFUN$predict)) cFUN$predict <- function(object, newdata) predict(object, newdata, type = "class") 113 if(is.null(cFUN$training.set)) cFUN$trainig.set <- "fitted.bag" 114 } 115 } 116 117##check formula 118 if(missing(formula) 119 || (length(formula) != 3) 120 || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1)) 121 stop("formula missing or incorrect") 122 123 m <- match.call(expand.dots = FALSE) 124 if(is.matrix(eval(m$data, parent.frame()))) 125 m$data <- as.data.frame(data) 126 127##editing formula 128 if(length(formula[[2]])==3) { 129 if(!is.function(cFUN)) { 130 if (is.null(cFUN$formula)) y.formula <- as.formula(formula[[2]]) else y.formula <- cFUN$formula 131 } 132 133 w.formula <- XX~YY 134 w.formula[[2]] <- formula[[2]][[3]] 135 w.formula[[3]] <- formula[[3]] 136 137 response <- paste(formula[[2]][[2]]) 138 w.names <- attr(terms(as.formula(formula[[2]]), data = data), "term.labels") 139 x.names <- attr(terms(as.formula(formula), data = data), "term.labels") 140 141 if((length(x.names) == 1) && x.names == ".") x.names <- colnames(data)[!(colnames(data) %in% c(response, w.names))] 142 y <- data[, response] 143 X <- data[, x.names] 144 W <- data[, w.names] 145 if(is.null(dim(X))) X <- matrix(X, ncol = 1, dimnames = list(rownames(W), x.names)) 146 if(is.null(dim(W))) W <- matrix(W, ncol = 1, dimnames = list(rownames(X), w.names)) 147 148 if(is.function(cFUN)) { 149 y.formula <- as.formula(paste(formula[[2]][[2]], "~", paste(c(x.names, w.names), collapse = "+"))) 150 fixed.function <- cFUN 151 cFUN <- list() 152 cFUN$fixed.function <- fixed.function 153 } 154 cFUN$formula <- y.formula 155 156 } else { 157 stop(paste("Specified formula has to be of type y~x~w")) 158 } 159##remove settings of training.set 160 if(is.null(pFUN$training.set)) w.training.set <- "oob" else w.training.set <- pFUN$training.set[1] 161 pFUN$training.set <- NULL 162 163 if(is.null(cFUN$training.set)) y.training.set <- "fitted.bag" else y.training.set <- cFUN$training.set[1] 164 cFUN$training.set <- NULL 165 166 bcontrol <- list(nbagg = nbagg, ns = length(y)*ns, replace = replace) 167 168 if(is.null(w.formula)) stop("no formula for prediction model specified") 169 170 ##formula.list : list of lists which specify an abitrary number of models for intermediate variables: 171 ##w1.1, w2.1, w3.1, ...., w2.1, w2.2, w3.1, .... where 'w*' is the variable and '.*' describes the model 172 173 P <- length(pFUN) 174 number.models <- c() 175 for(i in 1:P) { 176 if(is.null(pFUN[[i]]$formula)) pFUN[[i]]$formula <- w.formula 177 number.models <- c(number.models, 178 paste(attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels"), 179 ".", i, sep = "")) 180 } 181 182 formula.list <- vector(mode = "list", length= length(number.models)) 183 names(formula.list) <- paste(number.models) 184 185 for(i in 1:P) { 186 res <- list() 187 Qi <- length(attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels")) 188 for(j in 1:Qi) { 189 res$formula <- w.formula 190 res$formula[[2]] <- as.name(attr(terms(res$formula[-3], data = data), "term.labels")[j]) 191 res$formula[[3]] <- pFUN[[i]]$formula[[3]] 192 193 if(res$formula[[3]] == ".") res$formula <- as.formula(paste(res$formula[[2]], "~", paste(x.names, collapse= "+"))) 194 res$model <- pFUN[[i]]$model 195 res$predict <- pFUN[[i]]$predict 196 formula.list[[paste(res$formula[[2]], ".", i, sep = "")]] <- res 197 } 198 } 199 200##apply 201 res <- workhorse.inbagg(object = formula.list, y = y, X = X, W = W, 202 cFUN = cFUN, w.training.set = w.training.set, y.training.set = y.training.set, 203 bcontrol = bcontrol, ...) 204 RET <- list(mtrees = res, y = y, W = W, X = X) 205 class(RET) <- "inbagg" 206 RET 207} 208 209 210print.inbagg <- function(x, ...) 211{ 212 q <- length(x$mtrees) 213 intermediates <- attr(x$W, "names") 214 text.intermediates <- paste("Indirect bagging, with", q, 215 "bootstrap samples and intermediate variables: \n", 216 paste(intermediates, collapse = " ")) 217 cat("\n", text.intermediates, "\n") 218} 219 220 221summary.inbagg <- function(object, ...) 222{ 223 class(object) <- "summary.inbagg" 224 object 225} 226 227 228print.summary.inbagg <- function(x, ...) 229{ 230 q <- length(x$mtrees) 231 intermediates <- attr(x$W, "names") 232 233 text.intermediates <- paste("Indirect bagging, with", q, 234"bootstrap samples and intermediate variables:", paste(intermediates, collapse = " ")) 235 236 cat("\n", text.intermediates, "\n") 237 for(i in 1:length(x)) { 238 print(x$mtrees[[i]]) 239 } 240} 241 242