1#' Violin plot
2#'
3#' A violin plot is a compact display of a continuous distribution. It is a
4#' blend of [geom_boxplot()] and [geom_density()]: a
5#' violin plot is a mirrored density plot displayed in the same way as a
6#' boxplot.
7#'
8#' @eval rd_orientation()
9#'
10#' @eval rd_aesthetics("geom", "violin")
11#' @inheritParams layer
12#' @inheritParams geom_bar
13#' @param draw_quantiles If `not(NULL)` (default), draw horizontal lines
14#'   at the given quantiles of the density estimate.
15#' @param trim If `TRUE` (default), trim the tails of the violins
16#'   to the range of the data. If `FALSE`, don't trim the tails.
17#' @param geom,stat Use to override the default connection between
18#'   `geom_violin()` and `stat_ydensity()`.
19#' @export
20#' @references Hintze, J. L., Nelson, R. D. (1998) Violin Plots: A Box
21#' Plot-Density Trace Synergism. The American Statistician 52, 181-184.
22#' @examples
23#' p <- ggplot(mtcars, aes(factor(cyl), mpg))
24#' p + geom_violin()
25#'
26#' # Orientation follows the discrete axis
27#' ggplot(mtcars, aes(mpg, factor(cyl))) +
28#'   geom_violin()
29#'
30#' \donttest{
31#' p + geom_violin() + geom_jitter(height = 0, width = 0.1)
32#'
33#' # Scale maximum width proportional to sample size:
34#' p + geom_violin(scale = "count")
35#'
36#' # Scale maximum width to 1 for all violins:
37#' p + geom_violin(scale = "width")
38#'
39#' # Default is to trim violins to the range of the data. To disable:
40#' p + geom_violin(trim = FALSE)
41#'
42#' # Use a smaller bandwidth for closer density fit (default is 1).
43#' p + geom_violin(adjust = .5)
44#'
45#' # Add aesthetic mappings
46#' # Note that violins are automatically dodged when any aesthetic is
47#' # a factor
48#' p + geom_violin(aes(fill = cyl))
49#' p + geom_violin(aes(fill = factor(cyl)))
50#' p + geom_violin(aes(fill = factor(vs)))
51#' p + geom_violin(aes(fill = factor(am)))
52#'
53#' # Set aesthetics to fixed value
54#' p + geom_violin(fill = "grey80", colour = "#3366FF")
55#'
56#' # Show quartiles
57#' p + geom_violin(draw_quantiles = c(0.25, 0.5, 0.75))
58#'
59#' # Scales vs. coordinate transforms -------
60#' if (require("ggplot2movies")) {
61#' # Scale transformations occur before the density statistics are computed.
62#' # Coordinate transformations occur afterwards.  Observe the effect on the
63#' # number of outliers.
64#' m <- ggplot(movies, aes(y = votes, x = rating, group = cut_width(rating, 0.5)))
65#' m + geom_violin()
66#' m +
67#'   geom_violin() +
68#'   scale_y_log10()
69#' m +
70#'   geom_violin() +
71#'   coord_trans(y = "log10")
72#' m +
73#'   geom_violin() +
74#'   scale_y_log10() + coord_trans(y = "log10")
75#'
76#' # Violin plots with continuous x:
77#' # Use the group aesthetic to group observations in violins
78#' ggplot(movies, aes(year, budget)) +
79#'   geom_violin()
80#' ggplot(movies, aes(year, budget)) +
81#'   geom_violin(aes(group = cut_width(year, 10)), scale = "width")
82#' }
83#' }
84geom_violin <- function(mapping = NULL, data = NULL,
85                        stat = "ydensity", position = "dodge",
86                        ...,
87                        draw_quantiles = NULL,
88                        trim = TRUE,
89                        scale = "area",
90                        na.rm = FALSE,
91                        orientation = NA,
92                        show.legend = NA,
93                        inherit.aes = TRUE) {
94  layer(
95    data = data,
96    mapping = mapping,
97    stat = stat,
98    geom = GeomViolin,
99    position = position,
100    show.legend = show.legend,
101    inherit.aes = inherit.aes,
102    params = list(
103      trim = trim,
104      scale = scale,
105      draw_quantiles = draw_quantiles,
106      na.rm = na.rm,
107      orientation = orientation,
108      ...
109    )
110  )
111}
112
113#' @rdname ggplot2-ggproto
114#' @format NULL
115#' @usage NULL
116#' @export
117GeomViolin <- ggproto("GeomViolin", Geom,
118  setup_params = function(data, params) {
119    params$flipped_aes <- has_flipped_aes(data, params, ambiguous = TRUE)
120    params
121  },
122
123  extra_params = c("na.rm", "orientation"),
124
125  setup_data = function(data, params) {
126    data$flipped_aes <- params$flipped_aes
127    data <- flip_data(data, params$flipped_aes)
128    data$width <- data$width %||%
129      params$width %||% (resolution(data$x, FALSE) * 0.9)
130    # ymin, ymax, xmin, and xmax define the bounding rectangle for each group
131    data <- dapply(data, "group", transform,
132      xmin = x - width / 2,
133      xmax = x + width / 2
134    )
135    flip_data(data, params$flipped_aes)
136  },
137
138  draw_group = function(self, data, ..., draw_quantiles = NULL, flipped_aes = FALSE) {
139    data <- flip_data(data, flipped_aes)
140    # Find the points for the line to go all the way around
141    data <- transform(data,
142      xminv = x - violinwidth * (x - xmin),
143      xmaxv = x + violinwidth * (xmax - x)
144    )
145
146    # Make sure it's sorted properly to draw the outline
147    newdata <- rbind(
148      transform(data, x = xminv)[order(data$y), ],
149      transform(data, x = xmaxv)[order(data$y, decreasing = TRUE), ]
150    )
151
152    # Close the polygon: set first and last point the same
153    # Needed for coord_polar and such
154    newdata <- rbind(newdata, newdata[1,])
155    newdata <- flip_data(newdata, flipped_aes)
156
157    # Draw quantiles if requested, so long as there is non-zero y range
158    if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
159      if (!(all(draw_quantiles >= 0) && all(draw_quantiles <= 1))) {
160        abort("`draw_quantiles must be between 0 and 1")
161      }
162
163      # Compute the quantile segments and combine with existing aesthetics
164      quantiles <- create_quantile_segment_frame(data, draw_quantiles)
165      aesthetics <- data[
166        rep(1, nrow(quantiles)),
167        setdiff(names(data), c("x", "y", "group")),
168        drop = FALSE
169      ]
170      aesthetics$alpha <- rep(1, nrow(quantiles))
171      both <- cbind(quantiles, aesthetics)
172      both <- both[!is.na(both$group), , drop = FALSE]
173      both <- flip_data(both, flipped_aes)
174      quantile_grob <- if (nrow(both) == 0) {
175        zeroGrob()
176      } else {
177        GeomPath$draw_panel(both, ...)
178      }
179
180      ggname("geom_violin", grobTree(
181        GeomPolygon$draw_panel(newdata, ...),
182        quantile_grob)
183      )
184    } else {
185      ggname("geom_violin", GeomPolygon$draw_panel(newdata, ...))
186    }
187  },
188
189  draw_key = draw_key_polygon,
190
191  default_aes = aes(weight = 1, colour = "grey20", fill = "white", size = 0.5,
192    alpha = NA, linetype = "solid"),
193
194  required_aes = c("x", "y")
195)
196
197# Returns a data.frame with info needed to draw quantile segments.
198create_quantile_segment_frame <- function(data, draw_quantiles) {
199  dens <- cumsum(data$density) / sum(data$density)
200  ecdf <- stats::approxfun(dens, data$y)
201  ys <- ecdf(draw_quantiles) # these are all the y-values for quantiles
202
203  # Get the violin bounds for the requested quantiles.
204  violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
205  violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
206
207  # We have two rows per segment drawn. Each segment gets its own group.
208  new_data_frame(list(
209    x = interleave(violin.xminvs, violin.xmaxvs),
210    y = rep(ys, each = 2),
211    group = rep(ys, each = 2)
212  ))
213}
214
215