1#' @rdname plot.train 2#' @importFrom stats as.formula 3#' @export 4ggplot.train <- function(data = NULL, mapping = NULL, metric = data$metric[1], plotType = "scatter", output = "layered", 5 nameInStrip = FALSE, highlight = FALSE, ..., environment = NULL) { 6 if(!(output %in% c("data", "layered", "ggplot"))) stop("'outout' should be either 'data', 'ggplot' or 'layered'") 7 params <- data$modelInfo$parameters$parameter 8 paramData <- data$modelInfo$parameters 9 10 if(grepl("adapt", data$control$method)) 11 warning("When using adaptive resampling, this plot may not accurately capture the relationship between the tuning parameters and model performance.") 12 13 14 plotIt <- "yes" 15 if(all(params == "parameter")) 16 { 17 plotIt <- "There are no tuning parameters for this model." 18 } else { 19 dat <- data$results 20 21 ## Some exceptions for pretty printing 22 if(data$method == "nb") dat$usekernel <- factor(ifelse(dat$usekernel, "Nonparametric", "Gaussian")) 23 if(data$method == "gam") dat$select <- factor(ifelse(dat$select, "Feature Selection", "No Feature Selection")) 24 if(data$method == "qrnn") dat$bag <- factor(ifelse(dat$bag, "Bagging", "No Bagging")) 25 if(data$method == "C5.0") dat$winnow <- factor(ifelse(dat$winnow, "Winnowing", "No Winnowing")) 26 if(data$method == "M5") dat$rules <- factor(ifelse(dat$rules == "Yes", "Rules", "Trees")) 27 28 ## Check to see which tuning parameters were varied 29 # params is a factor, so just using params does not work properly when model metric is not the first column in dat 30 # e.g. oob resampling 31 paramValues <- apply(dat[,as.character(params),drop = FALSE], 32 2, 33 function(x) length(unique(x))) 34 if(any(paramValues > 1)) 35 { 36 params <- names(paramValues)[paramValues > 1] 37 paramData <- subset(paramData, parameter %in% params) 38 } else plotIt <- "There are no tuning parameters with more than 1 value." 39 } 40 if(plotIt == "yes") 41 { 42 p <- length(params) 43 dat <- dat[, c(metric, params)] 44 45 resampText <- resampName(data, FALSE) 46 resampText <- paste(metric, resampText) 47 } else stop(plotIt) 48 p <- ncol(dat) - 1 49 if(p > 1) { 50 numUnique <- unlist(lapply(dat[, -1], function(x) length(unique(x)))) 51 numUnique <- sort(numUnique, decreasing = TRUE) 52 dat <- dat[, c(metric, names(numUnique))] 53 } 54 55 if(output == "data") return(dat) 56 if(data$control$search == "random") return(random_search_plot(data, metric = metric)) 57 58 if(plotType == "scatter") { 59 # To highlight bestTune parameters in the plot 60 if (highlight) { 61 bstRes <- data$results 62 for (par in as.character(params)) 63 bstRes <- bstRes[which(bstRes[, par] == data$bestTune[, par]), ] 64 if (nrow(bstRes) > 1) 65 stop("problem in extracting model$bestTune row from model$results") 66 } 67 68 dnm <- names(dat) 69 if(p > 1 && is.numeric(dat[, 3])) dat[, 3] <- factor(format(dat[, 3])) 70 if(p > 2 & nameInStrip) { 71 strip_vars <- names(dat)[-(1:3)] 72 strip_lab <- as.character(subset(data$modelInfo$parameters, parameter %in% strip_vars)$label) 73 for(i in seq_along(strip_vars)) 74 dat[, strip_vars[i]] <- factor(paste(strip_lab[i], dat[, strip_vars[i]], sep = ": ")) 75 } 76 77 # If a parameter is assigned to a facet panel, it needs to be converted to a factor 78 # otherwise, highlighting the bestTune parameters in a facet creates an extraneous panel 79 # potentially, a bug in ggplot ? 80 if (p >= 3) 81 for (col in 1:(p-2)) { 82 lvls <- as.character(unique(dat[, dnm[col+3]])) 83 dat[, dnm[col+3]] <- factor(dat[, dnm[col+3]], levels = lvls) 84 if (highlight) 85 bstRes[, dnm[col+3]] <- factor(bstRes[, dnm[col+3]], levels = lvls) 86 } 87 88 out <- ggplot(dat, aes_string(x = dnm[2], y = dnm[1])) 89 out <- out + ylab(resampText) 90 91 # names(dat)[.] changed to dnm[.] to make the code more readable & (marginally) efficient 92 out <- out + xlab(paramData$label[paramData$parameter == dnm[2]]) 93 if (highlight) 94 out <- out + geom_point(data = bstRes, 95 aes_string(x = dnm[2], y = dnm[1]), 96 size = 4, shape = 5) 97 98 if(output == "layered") { 99 if(p >= 2) { 100 leg_name <- paramData$label[paramData$parameter == dnm[3]] 101 out <- out + geom_point(aes_string(color = dnm[3], shape = dnm[3])) 102 out <- out + geom_line(aes_string(color = dnm[3])) 103 out <- out + scale_colour_discrete(name = leg_name) + 104 scale_shape_discrete(name = leg_name) 105 } else out <- out + geom_point() + geom_line() 106 107 if(p == 3) 108 out <- out + facet_wrap(as.formula(paste("~", dnm[4]))) 109 if(p == 4) 110 out <- out + facet_grid(as.formula(paste(names(dat)[4], "~", names(dat)[5]))) 111 if(p > 4) stop("The function can only handle <= 4 tuning parameters for scatter plots. Use output = 'ggplot' to create your own") 112 } 113 } 114 115 if(plotType == "level") { 116 if(p == 1) stop("Two tuning parameters are required for a level plot") 117 dnm <- names(dat) 118 if(is.numeric(dat[,2])) dat[,2] <- factor(format(dat[,2]), levels = format(sort(unique(dat[,2])))) 119 if(is.numeric(dat[,3])) dat[,3] <- factor(format(dat[,3]), levels = format(sort(unique(dat[,3])))) 120 if(p > 2 & nameInStrip) { 121 strip_vars <- names(dat)[-(1:3)] 122 strip_lab <- as.character(subset(data$modelInfo$parameters, parameter %in% strip_vars)$label) 123 for(i in seq_along(strip_vars)) 124 dat[, strip_vars[i]] <- factor( 125 paste(strip_lab[i], format(dat[, strip_vars[i]]), sep = ": "), 126 levels = paste(strip_lab[i], format(sort(unique(dat[, strip_vars[i]]))), sep = ": ") 127 ) 128 } 129 ## TODO: use factor(format(x)) to make a solid block of colors? 130 out <- ggplot(dat, aes_string(x = dnm[2], y = dnm[3], fill = metric)) 131 out <- out + ylab(paramData$label[paramData$parameter == dnm[3]]) 132 out <- out + xlab(paramData$label[paramData$parameter == dnm[2]]) 133 if(output == "layered") { 134 out <- out + geom_tile() 135 if(p == 3) 136 out <- out + facet_wrap(as.formula(paste("~", dnm[4]))) 137 138 # incorrect facet_wrap call for p == 4 ? fixed errors for p >= 4 139 if(p == 4) 140 out <- out + facet_grid(as.formula(paste(dnm[4], "~", dnm[5]))) 141 if(p > 4) stop("The function can only handle <= 4 tuning parameters for level plots. Use output = 'ggplot' to create your own") 142 143 } 144 } 145 out 146} 147 148#' @rdname plot.rfe 149#' @export 150ggplot.rfe <- function(data = NULL, mapping = NULL, metric = data$metric[1], 151 output = "layered", ..., environment = NULL) { 152 if(!(output %in% c("data", "layered", "ggplot"))) 153 stop("'outout' should be either 'data', 'ggplot' or 'layered'") 154 resampText <- resampName(data, FALSE) 155 resampText <- paste(metric, resampText) 156 if(output == "data") return(data$results) 157 158 if(any(names(data$results) == "Num_Resamples")) { 159 data$results <- 160 subset(data$results, Num_Resamples >= floor(.5 * length(data$control$index))) 161 } 162 163 notBest <- subset(data$results, Variables != data$bestSubset) 164 best <- subset(data$results, Variables == data$bestSubset) 165 166 out <- ggplot(data$results, aes_string(x = "Variables", y = metric)) 167 if(output == "ggplot") return(out) 168 out <- out + geom_line() 169 out <- out + ylab(resampText) 170 out <- out + geom_point(data = notBest, aes_string(x = "Variables", y = metric)) 171 172 out <- out + geom_point(data=best, aes_string(x = "Variables", y = metric), 173 size = 3, colour="blue") 174 out 175} 176 177#' @importFrom stats complete.cases 178random_search_plot <- function(x, metric = x$metric[1]) { 179 180 params <- x$modelInfo$parameters 181 p_names <- as.character(params$parameter) 182 183 exclude <- NULL 184 for(i in seq(along = p_names)) { 185 if(all(is.na(x$results[, p_names[i]]))) 186 exclude <- c(exclude, i) 187 } 188 if(length(exclude) > 0) p_names <- p_names[-exclude] 189 x$results <- x$results[, c(metric, p_names)] 190 res <- x$results[complete.cases(x$results),] 191 combos <- res[, p_names, drop = FALSE] 192 193 nvals <- unlist(lapply(combos, function(x) length(unique(x)))) 194 p_names <- p_names[which(nvals > 1)] 195 196 if(nrow(combos) == 1 | length(p_names) == 0) 197 stop("Can't plot results with a single tuning parameter combination") 198 combos <- combos[, p_names, drop = FALSE] 199 nvals <- sort(nvals[p_names], decreasing = TRUE) 200 201 is_num <- unlist(lapply(combos, function(x) is.numeric(x) | is.integer(x))) 202 num_cols <- names(is_num)[is_num] 203 other_cols <- names(is_num)[!is_num] 204 205 num_num <- sum(is_num) 206 num_other <- length(p_names) - num_num 207 if(num_other == 0) { 208 if(num_num == 1) { 209 out <- ggplot(res, aes_string(x = num_cols[1], y = metric)) + 210 geom_point() + xlab(as.character(params$label[params$parameter == num_cols[1]])) 211 } else { 212 if(num_num == 2) { 213 out <- ggplot(res, aes_string(x = num_cols[1], y = num_cols[2], size = metric)) + 214 geom_point() + 215 xlab(as.character(params$label[params$parameter == num_cols[1]])) + 216 ylab(as.character(params$label[params$parameter == num_cols[2]])) 217 } else { 218 ## feature plot 219 vert <- melt(res[, c(metric, num_cols)], id.vars = metric, variable.name = "parameter") 220 vert <- merge(vert, params) 221 names(vert)[names(vert) == "label"] <- "Parameter" 222 out <- ggplot(vert, aes_string(x = "value", y = metric)) + 223 geom_point() + facet_wrap(~Parameter, scales = "free_x") + xlab("") 224 } 225 } 226 } else { 227 if(num_other == length(p_names)) { 228 ## do an interaction plot 229 if(num_other == 1) { 230 out <- ggplot(res, aes_string(x = other_cols[1], y = metric)) + 231 geom_point() + 232 xlab(as.character(params$label[params$parameter == other_cols[1]])) 233 } else { 234 if(num_other == 2) { 235 out <- ggplot(res, aes_string(x = other_cols[1], shape = other_cols[2], y = metric)) + 236 geom_point() + geom_line(aes_string(group = other_cols[2])) + 237 xlab(as.character(params$label[params$parameter == other_cols[1]])) 238 } else { 239 if(num_other == 3) { 240 pname <- as.character(params$label[params$parameter == other_cols[3]]) 241 res[,other_cols[3]] <- paste0(pname, ": ", res[,other_cols[3]]) 242 out <- ggplot(res, aes_string(x = other_cols[1], shape = other_cols[2], y = metric)) + 243 geom_point() + geom_line(aes_string(group = other_cols[2])) + 244 facet_grid(paste0(".~", other_cols[3])) + 245 xlab(as.character(params$label[params$parameter == other_cols[1]])) 246 } else { 247 stop(paste("There are", 248 num_other, "non-numeric variables; I don't have code for", 249 "that Dave")) 250 } 251 } 252 } 253 } else { 254 ## feature plot with colors and or shapes 255 vert <- melt(res[, c(metric, num_cols, other_cols)], 256 id.vars = c(metric, other_cols), 257 variable.name = "parameter") 258 vert <- merge(vert, params) 259 names(vert)[names(vert) == "label"] <- "Parameter" 260 if(num_other == 1) { 261 out <- ggplot(vert, aes_string(x = "value", y = metric, color = other_cols)) + 262 geom_point() + facet_wrap(~Parameter, scales = "free_x") + xlab("") 263 } else { 264 stop(paste("There are", num_num, "numeric tuning variables and", 265 num_other, "non-numeric variables; I don't have code for", 266 "that Dave")) 267 } 268 } 269 } 270 out 271 272} 273