1#' A box and whiskers plot (in the style of Tukey) 2#' 3#' The boxplot compactly displays the distribution of a continuous variable. 4#' It visualises five summary statistics (the median, two hinges 5#' and two whiskers), and all "outlying" points individually. 6#' 7#' @eval rd_orientation() 8#' 9#' @section Summary statistics: 10#' The lower and upper hinges correspond to the first and third quartiles 11#' (the 25th and 75th percentiles). This differs slightly from the method used 12#' by the [boxplot()] function, and may be apparent with small samples. 13#' See [boxplot.stats()] for for more information on how hinge 14#' positions are calculated for [boxplot()]. 15#' 16#' The upper whisker extends from the hinge to the largest value no further than 17#' 1.5 * IQR from the hinge (where IQR is the inter-quartile range, or distance 18#' between the first and third quartiles). The lower whisker extends from the 19#' hinge to the smallest value at most 1.5 * IQR of the hinge. Data beyond the 20#' end of the whiskers are called "outlying" points and are plotted 21#' individually. 22#' 23#' In a notched box plot, the notches extend `1.58 * IQR / sqrt(n)`. 24#' This gives a roughly 95% confidence interval for comparing medians. 25#' See McGill et al. (1978) for more details. 26#' 27#' @eval rd_aesthetics("geom", "boxplot") 28#' 29#' @seealso [geom_quantile()] for continuous `x`, 30#' [geom_violin()] for a richer display of the distribution, and 31#' [geom_jitter()] for a useful technique for small data. 32#' @inheritParams layer 33#' @inheritParams geom_bar 34#' @param geom,stat Use to override the default connection between 35#' `geom_boxplot()` and `stat_boxplot()`. 36#' @param outlier.colour,outlier.color,outlier.fill,outlier.shape,outlier.size,outlier.stroke,outlier.alpha 37#' Default aesthetics for outliers. Set to `NULL` to inherit from the 38#' aesthetics used for the box. 39#' 40#' In the unlikely event you specify both US and UK spellings of colour, the 41#' US spelling will take precedence. 42#' 43#' Sometimes it can be useful to hide the outliers, for example when overlaying 44#' the raw data points on top of the boxplot. Hiding the outliers can be achieved 45#' by setting `outlier.shape = NA`. Importantly, this does not remove the outliers, 46#' it only hides them, so the range calculated for the y-axis will be the 47#' same with outliers shown and outliers hidden. 48#' 49#' @param notch If `FALSE` (default) make a standard box plot. If 50#' `TRUE`, make a notched box plot. Notches are used to compare groups; 51#' if the notches of two boxes do not overlap, this suggests that the medians 52#' are significantly different. 53#' @param notchwidth For a notched box plot, width of the notch relative to 54#' the body (defaults to `notchwidth = 0.5`). 55#' @param varwidth If `FALSE` (default) make a standard box plot. If 56#' `TRUE`, boxes are drawn with widths proportional to the 57#' square-roots of the number of observations in the groups (possibly 58#' weighted, using the `weight` aesthetic). 59#' @export 60#' @references McGill, R., Tukey, J. W. and Larsen, W. A. (1978) Variations of 61#' box plots. The American Statistician 32, 12-16. 62#' @examples 63#' p <- ggplot(mpg, aes(class, hwy)) 64#' p + geom_boxplot() 65#' # Orientation follows the discrete axis 66#' ggplot(mpg, aes(hwy, class)) + geom_boxplot() 67#' 68#' p + geom_boxplot(notch = TRUE) 69#' p + geom_boxplot(varwidth = TRUE) 70#' p + geom_boxplot(fill = "white", colour = "#3366FF") 71#' # By default, outlier points match the colour of the box. Use 72#' # outlier.colour to override 73#' p + geom_boxplot(outlier.colour = "red", outlier.shape = 1) 74#' # Remove outliers when overlaying boxplot with original data points 75#' p + geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.2) 76#' 77#' # Boxplots are automatically dodged when any aesthetic is a factor 78#' p + geom_boxplot(aes(colour = drv)) 79#' 80#' # You can also use boxplots with continuous x, as long as you supply 81#' # a grouping variable. cut_width is particularly useful 82#' ggplot(diamonds, aes(carat, price)) + 83#' geom_boxplot() 84#' ggplot(diamonds, aes(carat, price)) + 85#' geom_boxplot(aes(group = cut_width(carat, 0.25))) 86#' # Adjust the transparency of outliers using outlier.alpha 87#' ggplot(diamonds, aes(carat, price)) + 88#' geom_boxplot(aes(group = cut_width(carat, 0.25)), outlier.alpha = 0.1) 89#' 90#' \donttest{ 91#' # It's possible to draw a boxplot with your own computations if you 92#' # use stat = "identity": 93#' y <- rnorm(100) 94#' df <- data.frame( 95#' x = 1, 96#' y0 = min(y), 97#' y25 = quantile(y, 0.25), 98#' y50 = median(y), 99#' y75 = quantile(y, 0.75), 100#' y100 = max(y) 101#' ) 102#' ggplot(df, aes(x)) + 103#' geom_boxplot( 104#' aes(ymin = y0, lower = y25, middle = y50, upper = y75, ymax = y100), 105#' stat = "identity" 106#' ) 107#' } 108geom_boxplot <- function(mapping = NULL, data = NULL, 109 stat = "boxplot", position = "dodge2", 110 ..., 111 outlier.colour = NULL, 112 outlier.color = NULL, 113 outlier.fill = NULL, 114 outlier.shape = 19, 115 outlier.size = 1.5, 116 outlier.stroke = 0.5, 117 outlier.alpha = NULL, 118 notch = FALSE, 119 notchwidth = 0.5, 120 varwidth = FALSE, 121 na.rm = FALSE, 122 orientation = NA, 123 show.legend = NA, 124 inherit.aes = TRUE) { 125 126 # varwidth = TRUE is not compatible with preserve = "total" 127 if (is.character(position)) { 128 if (varwidth == TRUE) position <- position_dodge2(preserve = "single") 129 } else { 130 if (identical(position$preserve, "total") & varwidth == TRUE) { 131 warn("Can't preserve total widths when varwidth = TRUE.") 132 position$preserve <- "single" 133 } 134 } 135 136 layer( 137 data = data, 138 mapping = mapping, 139 stat = stat, 140 geom = GeomBoxplot, 141 position = position, 142 show.legend = show.legend, 143 inherit.aes = inherit.aes, 144 params = list( 145 outlier.colour = outlier.color %||% outlier.colour, 146 outlier.fill = outlier.fill, 147 outlier.shape = outlier.shape, 148 outlier.size = outlier.size, 149 outlier.stroke = outlier.stroke, 150 outlier.alpha = outlier.alpha, 151 notch = notch, 152 notchwidth = notchwidth, 153 varwidth = varwidth, 154 na.rm = na.rm, 155 orientation = orientation, 156 ... 157 ) 158 ) 159} 160 161#' @rdname ggplot2-ggproto 162#' @format NULL 163#' @usage NULL 164#' @export 165GeomBoxplot <- ggproto("GeomBoxplot", Geom, 166 167 # need to declare `width` here in case this geom is used with a stat that 168 # doesn't have a `width` parameter (e.g., `stat_identity`). 169 extra_params = c("na.rm", "width", "orientation"), 170 171 setup_params = function(data, params) { 172 params$flipped_aes <- has_flipped_aes(data, params) 173 params 174 }, 175 176 setup_data = function(data, params) { 177 data$flipped_aes <- params$flipped_aes 178 data <- flip_data(data, params$flipped_aes) 179 data$width <- data$width %||% 180 params$width %||% (resolution(data$x, FALSE) * 0.9) 181 182 if (!is.null(data$outliers)) { 183 suppressWarnings({ 184 out_min <- vapply(data$outliers, min, numeric(1)) 185 out_max <- vapply(data$outliers, max, numeric(1)) 186 }) 187 188 data$ymin_final <- pmin(out_min, data$ymin) 189 data$ymax_final <- pmax(out_max, data$ymax) 190 } 191 192 # if `varwidth` not requested or not available, don't use it 193 if (is.null(params) || is.null(params$varwidth) || !params$varwidth || is.null(data$relvarwidth)) { 194 data$xmin <- data$x - data$width / 2 195 data$xmax <- data$x + data$width / 2 196 } else { 197 # make `relvarwidth` relative to the size of the largest group 198 data$relvarwidth <- data$relvarwidth / max(data$relvarwidth) 199 data$xmin <- data$x - data$relvarwidth * data$width / 2 200 data$xmax <- data$x + data$relvarwidth * data$width / 2 201 } 202 data$width <- NULL 203 if (!is.null(data$relvarwidth)) data$relvarwidth <- NULL 204 205 flip_data(data, params$flipped_aes) 206 }, 207 208 draw_group = function(data, panel_params, coord, fatten = 2, 209 outlier.colour = NULL, outlier.fill = NULL, 210 outlier.shape = 19, 211 outlier.size = 1.5, outlier.stroke = 0.5, 212 outlier.alpha = NULL, 213 notch = FALSE, notchwidth = 0.5, varwidth = FALSE, flipped_aes = FALSE) { 214 data <- flip_data(data, flipped_aes) 215 # this may occur when using geom_boxplot(stat = "identity") 216 if (nrow(data) != 1) { 217 abort("Can't draw more than one boxplot per group. Did you forget aes(group = ...)?") 218 } 219 220 common <- list( 221 colour = data$colour, 222 size = data$size, 223 linetype = data$linetype, 224 fill = alpha(data$fill, data$alpha), 225 group = data$group 226 ) 227 228 whiskers <- new_data_frame(c( 229 list( 230 x = c(data$x, data$x), 231 xend = c(data$x, data$x), 232 y = c(data$upper, data$lower), 233 yend = c(data$ymax, data$ymin), 234 alpha = c(NA_real_, NA_real_) 235 ), 236 common 237 ), n = 2) 238 whiskers <- flip_data(whiskers, flipped_aes) 239 240 box <- new_data_frame(c( 241 list( 242 xmin = data$xmin, 243 xmax = data$xmax, 244 ymin = data$lower, 245 y = data$middle, 246 ymax = data$upper, 247 ynotchlower = ifelse(notch, data$notchlower, NA), 248 ynotchupper = ifelse(notch, data$notchupper, NA), 249 notchwidth = notchwidth, 250 alpha = data$alpha 251 ), 252 common 253 )) 254 box <- flip_data(box, flipped_aes) 255 256 if (!is.null(data$outliers) && length(data$outliers[[1]] >= 1)) { 257 outliers <- new_data_frame(list( 258 y = data$outliers[[1]], 259 x = data$x[1], 260 colour = outlier.colour %||% data$colour[1], 261 fill = outlier.fill %||% data$fill[1], 262 shape = outlier.shape %||% data$shape[1], 263 size = outlier.size %||% data$size[1], 264 stroke = outlier.stroke %||% data$stroke[1], 265 fill = NA, 266 alpha = outlier.alpha %||% data$alpha[1] 267 ), n = length(data$outliers[[1]])) 268 outliers <- flip_data(outliers, flipped_aes) 269 270 outliers_grob <- GeomPoint$draw_panel(outliers, panel_params, coord) 271 } else { 272 outliers_grob <- NULL 273 } 274 275 ggname("geom_boxplot", grobTree( 276 outliers_grob, 277 GeomSegment$draw_panel(whiskers, panel_params, coord), 278 GeomCrossbar$draw_panel(box, fatten = fatten, panel_params, coord, flipped_aes = flipped_aes) 279 )) 280 }, 281 282 draw_key = draw_key_boxplot, 283 284 default_aes = aes(weight = 1, colour = "grey20", fill = "white", size = 0.5, 285 alpha = NA, shape = 19, linetype = "solid"), 286 287 required_aes = c("x|y", "lower|xlower", "upper|xupper", "middle|xmiddle", "ymin|xmin", "ymax|xmax") 288) 289