1#' Create, modify, and delete columns
2#'
3#' `mutate()` adds new variables and preserves existing ones;
4#' `transmute()` adds new variables and drops existing ones.
5#' New variables overwrite existing variables of the same name.
6#' Variables can be removed by setting their value to `NULL`.
7#'
8#' @section Useful mutate functions:
9#'
10#' * [`+`], [`-`], [log()], etc., for their usual mathematical meanings
11#'
12#' * [lead()], [lag()]
13#'
14#' * [dense_rank()], [min_rank()], [percent_rank()], [row_number()],
15#'   [cume_dist()], [ntile()]
16#'
17#' * [cumsum()], [cummean()], [cummin()], [cummax()], [cumany()], [cumall()]
18#'
19#' * [na_if()], [coalesce()]
20#'
21#' * [if_else()], [recode()], [case_when()]
22#'
23#' @section Grouped tibbles:
24#'
25#' Because mutating expressions are computed within groups, they may
26#' yield different results on grouped tibbles. This will be the case
27#' as soon as an aggregating, lagging, or ranking function is
28#' involved. Compare this ungrouped mutate:
29#'
30#' ```
31#' starwars %>%
32#'   select(name, mass, species) %>%
33#'   mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
34#' ```
35#'
36#' With the grouped equivalent:
37#'
38#' ```
39#' starwars %>%
40#'   select(name, mass, species) %>%
41#'   group_by(species) %>%
42#'   mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
43#' ```
44#'
45#' The former normalises `mass` by the global average whereas the
46#' latter normalises by the averages within species levels.
47#'
48#' @export
49#' @inheritParams arrange
50#' @param ... <[`data-masking`][dplyr_data_masking]> Name-value pairs.
51#'   The name gives the name of the column in the output.
52#'
53#'   The value can be:
54#'
55#'   * A vector of length 1, which will be recycled to the correct length.
56#'   * A vector the same length as the current group (or the whole data frame
57#'     if ungrouped).
58#'   * `NULL`, to remove the column.
59#'   * A data frame or tibble, to create multiple columns in the output.
60#' @family single table verbs
61#' @return
62#' An object of the same type as `.data`. The output has the following
63#' properties:
64#'
65#' * Rows are not affected.
66#' * Existing columns will be preserved according to the `.keep` argument.
67#'   New columns will be placed according to the `.before` and `.after`
68#'   arguments. If `.keep = "none"` (as in `transmute()`), the output order
69#'   is determined only by `...`, not the order of existing columns.
70#' * Columns given value `NULL` will be removed
71#' * Groups will be recomputed if a grouping variable is mutated.
72#' * Data frame attributes are preserved.
73#' @section Methods:
74#' These function are **generic**s, which means that packages can provide
75#' implementations (methods) for other classes. See the documentation of
76#' individual methods for extra arguments and differences in behaviour.
77#'
78#' Methods available in currently loaded packages:
79#'
80#' * `mutate()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}.
81#' * `transmute()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("transmute")}.
82#' @examples
83#' # Newly created variables are available immediately
84#' starwars %>%
85#'  select(name, mass) %>%
86#'  mutate(
87#'   mass2 = mass * 2,
88#'   mass2_squared = mass2 * mass2
89#' )
90#'
91#' # As well as adding new variables, you can use mutate() to
92#' # remove variables and modify existing variables.
93#' starwars %>%
94#'  select(name, height, mass, homeworld) %>%
95#'  mutate(
96#'   mass = NULL,
97#'   height = height * 0.0328084 # convert to feet
98#' )
99#'
100#' # Use across() with mutate() to apply a transformation
101#' # to multiple columns in a tibble.
102#' starwars %>%
103#'  select(name, homeworld, species) %>%
104#'  mutate(across(!name, as.factor))
105#' # see more in ?across
106#'
107#' # Window functions are useful for grouped mutates:
108#' starwars %>%
109#'  select(name, mass, homeworld) %>%
110#'  group_by(homeworld) %>%
111#'  mutate(rank = min_rank(desc(mass)))
112#' # see `vignette("window-functions")` for more details
113#'
114#' # By default, new columns are placed on the far right.
115#' # Experimental: you can override with `.before` or `.after`
116#' df <- tibble(x = 1, y = 2)
117#' df %>% mutate(z = x + y)
118#' df %>% mutate(z = x + y, .before = 1)
119#' df %>% mutate(z = x + y, .after = x)
120#'
121#' # By default, mutate() keeps all columns from the input data.
122#' # Experimental: You can override with `.keep`
123#' df <- tibble(x = 1, y = 2, a = "a", b = "b")
124#' df %>% mutate(z = x + y, .keep = "all") # the default
125#' df %>% mutate(z = x + y, .keep = "used")
126#' df %>% mutate(z = x + y, .keep = "unused")
127#' df %>% mutate(z = x + y, .keep = "none") # same as transmute()
128#'
129#' # Grouping ----------------------------------------
130#' # The mutate operation may yield different results on grouped
131#' # tibbles because the expressions are computed within groups.
132#' # The following normalises `mass` by the global average:
133#' starwars %>%
134#'   select(name, mass, species) %>%
135#'   mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
136#'
137#' # Whereas this normalises `mass` by the averages within species
138#' # levels:
139#' starwars %>%
140#'   select(name, mass, species) %>%
141#'   group_by(species) %>%
142#'   mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
143#'
144#' # Indirection ----------------------------------------
145#' # Refer to column names stored as strings with the `.data` pronoun:
146#' vars <- c("mass", "height")
147#' mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]])
148#' # Learn more in ?dplyr_data_masking
149mutate <- function(.data, ...) {
150  UseMethod("mutate")
151}
152
153#' @rdname mutate
154#' @param .keep \Sexpr[results=rd]{lifecycle::badge("experimental")}
155#'   This is an experimental argument that allows you to control which columns
156#'   from `.data` are retained in the output:
157#'
158#'   * `"all"`, the default, retains all variables.
159#'   * `"used"` keeps any variables used to make new variables; it's useful
160#'     for checking your work as it displays inputs and outputs side-by-side.
161#'   * `"unused"` keeps only existing variables **not** used to make new
162#'     variables.
163#'   * `"none"`, only keeps grouping keys (like [transmute()]).
164#'
165#'   Grouping variables are always kept, unconditional to `.keep`.
166#' @param .before,.after \Sexpr[results=rd]{lifecycle::badge("experimental")}
167#'   <[`tidy-select`][dplyr_tidy_select]> Optionally, control where new columns
168#'   should appear (the default is to add to the right hand side). See
169#'   [relocate()] for more details.
170#' @export
171mutate.data.frame <- function(.data, ...,
172                              .keep = c("all", "used", "unused", "none"),
173                              .before = NULL, .after = NULL) {
174  keep <- arg_match(.keep)
175
176  cols <- mutate_cols(.data, ..., caller_env = caller_env())
177  out <- dplyr_col_modify(.data, cols)
178
179  .before <- enquo(.before)
180  .after <- enquo(.after)
181  if (!quo_is_null(.before) || !quo_is_null(.after)) {
182    # Only change the order of new columns
183    new <- setdiff(names(cols), names(.data))
184    out <- relocate(out, !!new, .before = !!.before, .after = !!.after)
185  }
186
187  if (keep == "all") {
188    out
189  } else if (keep == "unused") {
190    used <- attr(cols, "used")
191    unused <- names(used)[!used]
192    keep <- intersect(names(out), c(group_vars(.data), unused, names(cols)))
193    dplyr_col_select(out, keep)
194  } else if (keep == "used") {
195    used <- attr(cols, "used")
196    used <- names(used)[used]
197    keep <- intersect(names(out), c(group_vars(.data), used, names(cols)))
198    dplyr_col_select(out, keep)
199  } else if (keep == "none") {
200    keep <- c(
201      # ensure group vars present
202      setdiff(group_vars(.data), names(cols)),
203      # cols might contain NULLs
204      intersect(names(cols), names(out))
205    )
206    dplyr_col_select(out, keep)
207  }
208}
209
210#' @rdname mutate
211#' @export
212transmute <- function(.data, ...) {
213  UseMethod("transmute")
214}
215
216#' @export
217transmute.data.frame <- function(.data, ...) {
218  dots <- check_transmute_args(...)
219  mutate(.data, !!!dots, .keep = "none")
220}
221
222# Helpers -----------------------------------------------------------------
223
224check_transmute_args <- function(..., .keep, .before, .after) {
225  if (!missing(.keep)) {
226    abort("`transmute()` does not support the `.keep` argument")
227  }
228  if (!missing(.before)) {
229    abort("`transmute()` does not support the `.before` argument")
230  }
231  if (!missing(.after)) {
232    abort("`transmute()` does not support the `.after` argument")
233  }
234  enquos(...)
235}
236
237mutate_cols <- function(.data, ..., caller_env) {
238  mask <- DataMask$new(.data, caller_env)
239  old_current_column <- context_peek_bare("column")
240
241  on.exit(context_poke("column", old_current_column), add = TRUE)
242  on.exit(mask$forget("mutate"), add = TRUE)
243
244  rows <- mask$get_rows()
245  dots <- dplyr_quosures(...)
246  if (length(dots) == 0L) {
247    return(NULL)
248  }
249
250  new_columns <- set_names(list(), character())
251
252  withCallingHandlers({
253    for (i in seq_along(dots)) {
254      mask$across_cache_reset()
255      context_poke("column", old_current_column)
256
257      # get results from all the quosures that are expanded from ..i
258      # then ingest them after
259      quosures <- expand_across(dots[[i]])
260      quosures_results <- vector(mode = "list", length = length(quosures))
261
262      for (k in seq_along(quosures)) {
263        quo <- quosures[[k]]
264        quo_data <- attr(quo, "dplyr:::data")
265        if (!is.null(quo_data$column)) {
266          context_poke("column", quo_data$column)
267        }
268        # a list in which each element is the result of
269        # evaluating the quosure in the "sliced data mask"
270        # recycling it appropriately to match the group size
271        #
272        # TODO: reinject hybrid evaluation at the R level
273        chunks <- NULL
274
275        # result after unchopping the chunks
276        result <- NULL
277
278        if (quo_is_symbol(quo)){
279          name <- as_string(quo_get_expr(quo))
280
281          if (name %in% names(new_columns)) {
282            # already have result and chunks
283            result <- new_columns[[name]]
284            chunks <- mask$resolve(name)
285          } else if (name %in% names(.data)) {
286            # column from the original data
287            result <- .data[[name]]
288            chunks <- mask$resolve(name)
289          }
290
291          if (inherits(.data, "rowwise_df") && vec_is_list(result)) {
292            sizes <- list_sizes(result)
293            wrong <- which(sizes != 1)
294            if (length(wrong)) {
295              # same error as would have been generated by mask$eval_all_mutate()
296              group <- wrong[1L]
297              mask$set_current_group(group)
298              abort(x_size = sizes[group], class = "dplyr:::mutate_incompatible_size")
299            }
300          }
301        }
302
303        if (is.null(chunks)) {
304          chunks <- mask$eval_all_mutate(quo)
305        }
306
307        if (is.null(chunks)) {
308          next
309        }
310
311        # only unchop if needed
312        if (is.null(result)) {
313          if (length(rows) == 1) {
314            result <- chunks[[1]]
315          } else {
316            result <- withCallingHandlers(
317              vec_unchop(chunks <- vec_cast_common(!!!chunks), rows),
318              vctrs_error_incompatible_type = function(cnd) {
319                abort(class = "dplyr:::error_mutate_incompatible_combine", parent = cnd)
320              }
321            )
322          }
323        }
324
325        quosures_results[[k]] <- list(result = result, chunks = chunks)
326      }
327
328
329      for (k in seq_along(quosures)) {
330        quo <- quosures[[k]]
331        quo_data <- attr(quo, "dplyr:::data")
332
333        quo_result <- quosures_results[[k]]
334        if (is.null(quo_result)) {
335          if (quo_data$is_named) {
336            name <- quo_data$name_given
337            new_columns[[name]] <- zap()
338            mask$remove(name)
339          }
340          next
341        }
342
343        result <- quo_result$result
344        chunks <- quo_result$chunks
345
346        if (!quo_data$is_named && is.data.frame(result)) {
347          new_columns[names(result)] <- result
348          mask$add_many(result, chunks)
349        } else {
350          # treat as a single output otherwise
351          name <- quo_data$name_auto
352          new_columns[[name]] <- result
353          mask$add_one(name, chunks)
354        }
355
356      }
357
358    }
359
360  },
361  error = function(e) {
362    local_call_step(dots = dots, .index = i, .fn = "mutate", .dot_data = inherits(e, "rlang_error_data_pronoun_not_found"))
363    call_step_envir <- peek_call_step()
364    error_name <- call_step_envir$error_name
365    error_expression <- call_step_envir$error_expression
366
367    show_group_details <- TRUE
368    if (inherits(e, "dplyr:::mutate_incompatible_size")) {
369      size <- vec_size(rows[[mask$get_current_group()]])
370      x_size <- e$x_size
371      bullets <- c(
372        i = cnd_bullet_column_info(),
373        i = glue("`{error_name}` must be size {or_1(size)}, not {x_size}."),
374        i = cnd_bullet_rowwise_unlist()
375      )
376    } else if (inherits(e, "dplyr:::mutate_mixed_null")) {
377      show_group_details <- FALSE
378      bullets <- c(
379        i = cnd_bullet_column_info(),
380        x = glue("`{error_name}` must return compatible vectors across groups."),
381        i = "Cannot combine NULL and non NULL results.",
382        i = cnd_bullet_rowwise_unlist()
383      )
384    } else if (inherits(e, "dplyr:::mutate_not_vector")) {
385      bullets <- c(
386        i = cnd_bullet_column_info(),
387        x = glue("`{error_name}` must be a vector, not {friendly_type_of(e$result)}."),
388        i = cnd_bullet_rowwise_unlist()
389      )
390    } else if(inherits(e, "dplyr:::error_mutate_incompatible_combine")) {
391      show_group_details <- FALSE
392      bullets <- c(
393        i = cnd_bullet_column_info(),
394        x = glue("`{error_name}` must return compatible vectors across groups"),
395        i = cnd_bullet_combine_details(e$parent$x, e$parent$x_arg),
396        i = cnd_bullet_combine_details(e$parent$y, e$parent$y_arg)
397      )
398    } else {
399      bullets <- c(
400        i = cnd_bullet_column_info(),
401        x = conditionMessage(e)
402      )
403    }
404
405    bullets <- c(
406      cnd_bullet_header(),
407      bullets,
408      i = if(show_group_details) cnd_bullet_cur_group_label()
409    )
410
411    abort(
412      bullets,
413      class = c("dplyr:::mutate_error", "dplyr_error"),
414      error_name = error_name, error_expression = error_expression,
415      parent = e,
416      bullets = bullets
417    )
418
419  },
420  warning = function(w) {
421    # Check if there is an upstack calling handler that would muffle
422    # the warning. This avoids doing the expensive work below for a
423    # silenced warning (#5675).
424    if (check_muffled_warning(w)) {
425      maybe_restart("muffleWarning")
426    }
427
428    local_call_step(dots = dots, .index = i, .fn = "mutate")
429
430    warn(c(
431      cnd_bullet_header(),
432      i = cnd_bullet_column_info(),
433      i = conditionMessage(w),
434      i = cnd_bullet_cur_group_label(what = "warning")
435    ))
436
437    # Cancel `w`
438    maybe_restart("muffleWarning")
439  })
440
441  is_zap <- map_lgl(new_columns, inherits, "rlang_zap")
442  new_columns[is_zap] <- rep(list(NULL), sum(is_zap))
443  used <- mask$get_used()
444  names(used) <- mask$current_vars()
445  attr(new_columns, "used") <- used
446  new_columns
447}
448
449check_muffled_warning <- function(cnd) {
450  early_exit <- TRUE
451
452  # Cancel early exits, e.g. from an exiting handler. This way we can
453  # still instrument caught warnings to avoid confusing
454  # inconsistencies. This doesn't work on versions of R older than
455  # 3.5.0 because they don't include this change:
456  # https://github.com/wch/r-source/commit/688eaebf. So with
457  # `tryCatch(warning = )`, the original warning `cnd` will be caught
458  # instead of the instrumented warning.
459  on.exit(
460    if (can_return_from_exit && early_exit) {
461      return(FALSE)
462    }
463  )
464
465  muffled <- withRestarts(
466    muffleWarning = function(...) TRUE,
467    {
468      signalCondition(cnd)
469      FALSE
470    }
471  )
472
473  early_exit <- FALSE
474  muffled
475}
476
477on_load(
478  can_return_from_exit <- getRversion() >= "3.5.0"
479)
480