1#' @rdname plot.train
2#' @importFrom stats as.formula
3#' @export
4ggplot.train <- function(data = NULL, mapping = NULL, metric = data$metric[1], plotType = "scatter", output = "layered",
5               nameInStrip = FALSE, highlight = FALSE, ..., environment = NULL) {
6  if(!(output %in% c("data", "layered", "ggplot"))) stop("'outout' should be either 'data', 'ggplot' or 'layered'")
7  params <- data$modelInfo$parameters$parameter
8  paramData <- data$modelInfo$parameters
9
10  if(grepl("adapt", data$control$method))
11    warning("When using adaptive resampling, this plot may not accurately capture the relationship between the tuning parameters and model performance.")
12
13
14  plotIt <- "yes"
15  if(all(params == "parameter"))
16  {
17    plotIt <- "There are no tuning parameters for this model."
18  } else {
19    dat <- data$results
20
21    ## Some exceptions for pretty printing
22    if(data$method == "nb") dat$usekernel <- factor(ifelse(dat$usekernel, "Nonparametric", "Gaussian"))
23    if(data$method == "gam") dat$select <- factor(ifelse(dat$select, "Feature Selection", "No Feature Selection"))
24    if(data$method == "qrnn") dat$bag <- factor(ifelse(dat$bag, "Bagging", "No Bagging"))
25    if(data$method == "C5.0") dat$winnow <- factor(ifelse(dat$winnow, "Winnowing", "No Winnowing"))
26    if(data$method == "M5") dat$rules <- factor(ifelse(dat$rules == "Yes", "Rules", "Trees"))
27
28    ## Check to see which tuning parameters were varied
29    #       params is a factor, so just using params does not work properly when model metric is not the first column in dat
30    #           e.g. oob resampling
31    paramValues <- apply(dat[,as.character(params),drop = FALSE],
32                         2,
33                         function(x) length(unique(x)))
34    if(any(paramValues > 1))
35    {
36      params <- names(paramValues)[paramValues > 1]
37      paramData <- subset(paramData, parameter %in% params)
38    } else plotIt <- "There are no tuning parameters with more than 1 value."
39  }
40  if(plotIt == "yes")
41  {
42    p <- length(params)
43    dat <- dat[, c(metric, params)]
44
45    resampText <- resampName(data, FALSE)
46    resampText <- paste(metric, resampText)
47  }  else stop(plotIt)
48  p <- ncol(dat) - 1
49  if(p > 1) {
50    numUnique <- unlist(lapply(dat[, -1], function(x) length(unique(x))))
51    numUnique <- sort(numUnique,  decreasing = TRUE)
52    dat <- dat[, c(metric, names(numUnique))]
53  }
54
55  if(output == "data") return(dat)
56  if(data$control$search == "random") return(random_search_plot(data, metric = metric))
57
58  if(plotType == "scatter") {
59    # To highlight bestTune parameters in the plot
60    if (highlight) {
61      bstRes <- data$results
62      for (par in as.character(params))
63        bstRes <- bstRes[which(bstRes[, par] == data$bestTune[, par]), ]
64      if (nrow(bstRes) > 1)
65        stop("problem in extracting model$bestTune row from model$results")
66    }
67
68    dnm <- names(dat)
69    if(p > 1 && is.numeric(dat[, 3])) dat[, 3] <- factor(format(dat[, 3]))
70    if(p > 2 & nameInStrip) {
71      strip_vars <- names(dat)[-(1:3)]
72      strip_lab <- as.character(subset(data$modelInfo$parameters, parameter %in% strip_vars)$label)
73      for(i in seq_along(strip_vars))
74        dat[, strip_vars[i]] <- factor(paste(strip_lab[i], dat[, strip_vars[i]], sep = ": "))
75    }
76
77    # If a parameter is assigned to a facet panel, it needs to be converted to a factor
78    #   otherwise, highlighting the bestTune parameters in a facet creates an extraneous panel
79    #   potentially, a bug in ggplot ?
80    if (p >= 3)
81      for (col in 1:(p-2)) {
82        lvls <- as.character(unique(dat[, dnm[col+3]]))
83        dat[, dnm[col+3]] <- factor(dat[, dnm[col+3]], levels = lvls)
84        if (highlight)
85          bstRes[, dnm[col+3]] <- factor(bstRes[, dnm[col+3]], levels = lvls)
86      }
87
88    out <- ggplot(dat, aes_string(x = dnm[2], y = dnm[1]))
89    out <- out + ylab(resampText)
90
91    # names(dat)[.] changed to dnm[.] to make the code more readable & (marginally) efficient
92    out <- out + xlab(paramData$label[paramData$parameter == dnm[2]])
93    if (highlight)
94      out <- out + geom_point(data = bstRes,
95                              aes_string(x = dnm[2], y = dnm[1]),
96                              size = 4, shape = 5)
97
98    if(output == "layered") {
99      if(p >= 2) {
100        leg_name <- paramData$label[paramData$parameter == dnm[3]]
101        out <- out + geom_point(aes_string(color = dnm[3], shape = dnm[3]))
102        out <- out + geom_line(aes_string(color = dnm[3]))
103        out <- out + scale_colour_discrete(name = leg_name) +
104          scale_shape_discrete(name = leg_name)
105      } else out <- out + geom_point() + geom_line()
106
107      if(p == 3)
108        out <- out + facet_wrap(as.formula(paste("~", dnm[4])))
109      if(p == 4)
110        out <- out + facet_grid(as.formula(paste(names(dat)[4], "~", names(dat)[5])))
111      if(p > 4) stop("The function can only handle <= 4 tuning parameters for scatter plots. Use output = 'ggplot' to create your own")
112    }
113  }
114
115  if(plotType == "level") {
116    if(p == 1) stop("Two tuning parameters are required for a level plot")
117    dnm <- names(dat)
118    if(is.numeric(dat[,2])) dat[,2] <- factor(format(dat[,2]), levels = format(sort(unique(dat[,2]))))
119    if(is.numeric(dat[,3])) dat[,3] <- factor(format(dat[,3]), levels = format(sort(unique(dat[,3]))))
120    if(p > 2 & nameInStrip) {
121      strip_vars <- names(dat)[-(1:3)]
122      strip_lab <- as.character(subset(data$modelInfo$parameters, parameter %in% strip_vars)$label)
123      for(i in seq_along(strip_vars))
124        dat[, strip_vars[i]] <- factor(
125          paste(strip_lab[i], format(dat[, strip_vars[i]]), sep = ": "),
126          levels = paste(strip_lab[i], format(sort(unique(dat[, strip_vars[i]]))), sep = ": ")
127        )
128    }
129    ## TODO: use factor(format(x)) to make a solid block of colors?
130    out <- ggplot(dat, aes_string(x = dnm[2], y = dnm[3], fill = metric))
131    out <- out + ylab(paramData$label[paramData$parameter == dnm[3]])
132    out <- out + xlab(paramData$label[paramData$parameter == dnm[2]])
133    if(output == "layered") {
134      out <- out + geom_tile()
135      if(p == 3)
136        out <- out + facet_wrap(as.formula(paste("~", dnm[4])))
137
138      # incorrect facet_wrap call for p == 4 ? fixed errors for p >= 4
139      if(p == 4)
140        out <- out + facet_grid(as.formula(paste(dnm[4], "~", dnm[5])))
141      if(p > 4) stop("The function can only handle <= 4 tuning parameters for level plots. Use output = 'ggplot' to create your own")
142
143    }
144  }
145  out
146}
147
148#' @rdname plot.rfe
149#' @export
150ggplot.rfe <- function(data = NULL, mapping = NULL, metric = data$metric[1],
151                       output = "layered", ..., environment = NULL) {
152  if(!(output %in% c("data", "layered", "ggplot")))
153    stop("'outout' should be either 'data', 'ggplot' or 'layered'")
154  resampText <- resampName(data, FALSE)
155  resampText <- paste(metric, resampText)
156  if(output == "data") return(data$results)
157
158    if(any(names(data$results) == "Num_Resamples")) {
159      data$results <-
160        subset(data$results, Num_Resamples >= floor(.5 * length(data$control$index)))
161  }
162
163  notBest <- subset(data$results, Variables != data$bestSubset)
164  best <- subset(data$results, Variables == data$bestSubset)
165
166  out <- ggplot(data$results, aes_string(x = "Variables", y = metric))
167  if(output == "ggplot") return(out)
168  out <- out + geom_line()
169  out <- out + ylab(resampText)
170  out <- out + geom_point(data = notBest, aes_string(x = "Variables", y = metric))
171
172  out <- out + geom_point(data=best, aes_string(x = "Variables", y = metric),
173                          size = 3, colour="blue")
174  out
175}
176
177#' @importFrom stats complete.cases
178random_search_plot <- function(x, metric = x$metric[1]) {
179
180  params <- x$modelInfo$parameters
181  p_names <- as.character(params$parameter)
182
183  exclude <- NULL
184  for(i in seq(along = p_names)) {
185    if(all(is.na(x$results[, p_names[i]])))
186      exclude <- c(exclude, i)
187  }
188  if(length(exclude) > 0) p_names <- p_names[-exclude]
189  x$results <- x$results[, c(metric, p_names)]
190  res <- x$results[complete.cases(x$results),]
191  combos <- res[, p_names, drop = FALSE]
192
193  nvals <- unlist(lapply(combos, function(x) length(unique(x))))
194  p_names <- p_names[which(nvals > 1)]
195
196  if(nrow(combos) == 1 | length(p_names) == 0)
197    stop("Can't plot results with a single tuning parameter combination")
198  combos <- combos[, p_names, drop = FALSE]
199  nvals <- sort(nvals[p_names], decreasing = TRUE)
200
201  is_num <- unlist(lapply(combos, function(x) is.numeric(x) | is.integer(x)))
202  num_cols <- names(is_num)[is_num]
203  other_cols <- names(is_num)[!is_num]
204
205  num_num <- sum(is_num)
206  num_other <- length(p_names) - num_num
207  if(num_other == 0) {
208    if(num_num == 1) {
209      out <- ggplot(res, aes_string(x = num_cols[1], y = metric)) +
210        geom_point() + xlab(as.character(params$label[params$parameter == num_cols[1]]))
211    } else {
212      if(num_num == 2) {
213        out <- ggplot(res, aes_string(x = num_cols[1], y = num_cols[2], size = metric)) +
214          geom_point() +
215          xlab(as.character(params$label[params$parameter == num_cols[1]])) +
216          ylab(as.character(params$label[params$parameter == num_cols[2]]))
217      } else {
218        ## feature plot
219        vert <- melt(res[, c(metric, num_cols)], id.vars = metric, variable.name = "parameter")
220        vert <- merge(vert, params)
221        names(vert)[names(vert) == "label"] <- "Parameter"
222        out <- ggplot(vert, aes_string(x = "value", y = metric)) +
223          geom_point() + facet_wrap(~Parameter, scales = "free_x") + xlab("")
224      }
225    }
226  } else {
227    if(num_other == length(p_names)) {
228      ## do an interaction plot
229      if(num_other == 1) {
230        out <- ggplot(res, aes_string(x = other_cols[1], y = metric)) +
231          geom_point() +
232          xlab(as.character(params$label[params$parameter == other_cols[1]]))
233      } else {
234        if(num_other == 2) {
235          out <- ggplot(res, aes_string(x = other_cols[1], shape = other_cols[2],  y = metric)) +
236            geom_point() + geom_line(aes_string(group = other_cols[2])) +
237            xlab(as.character(params$label[params$parameter == other_cols[1]]))
238        } else {
239          if(num_other == 3) {
240            pname <- as.character(params$label[params$parameter == other_cols[3]])
241            res[,other_cols[3]] <- paste0(pname, ": ", res[,other_cols[3]])
242            out <- ggplot(res, aes_string(x = other_cols[1], shape = other_cols[2],  y = metric)) +
243              geom_point() + geom_line(aes_string(group = other_cols[2])) +
244              facet_grid(paste0(".~", other_cols[3])) +
245              xlab(as.character(params$label[params$parameter == other_cols[1]]))
246          } else {
247            stop(paste("There are",
248                       num_other, "non-numeric variables; I don't have code for",
249                       "that Dave"))
250          }
251        }
252      }
253    } else {
254      ## feature plot with colors and or shapes
255      vert <- melt(res[, c(metric, num_cols, other_cols)],
256                   id.vars = c(metric, other_cols),
257                   variable.name = "parameter")
258      vert <- merge(vert, params)
259      names(vert)[names(vert) == "label"] <- "Parameter"
260      if(num_other == 1) {
261        out <- ggplot(vert, aes_string(x = "value", y = metric, color = other_cols)) +
262          geom_point() + facet_wrap(~Parameter, scales = "free_x") + xlab("")
263      } else {
264        stop(paste("There are", num_num, "numeric tuning variables and",
265                   num_other, "non-numeric variables; I don't have code for",
266                   "that Dave"))
267      }
268    }
269  }
270  out
271
272}
273