1#' Create, modify, and delete columns 2#' 3#' `mutate()` adds new variables and preserves existing ones; 4#' `transmute()` adds new variables and drops existing ones. 5#' New variables overwrite existing variables of the same name. 6#' Variables can be removed by setting their value to `NULL`. 7#' 8#' @section Useful mutate functions: 9#' 10#' * [`+`], [`-`], [log()], etc., for their usual mathematical meanings 11#' 12#' * [lead()], [lag()] 13#' 14#' * [dense_rank()], [min_rank()], [percent_rank()], [row_number()], 15#' [cume_dist()], [ntile()] 16#' 17#' * [cumsum()], [cummean()], [cummin()], [cummax()], [cumany()], [cumall()] 18#' 19#' * [na_if()], [coalesce()] 20#' 21#' * [if_else()], [recode()], [case_when()] 22#' 23#' @section Grouped tibbles: 24#' 25#' Because mutating expressions are computed within groups, they may 26#' yield different results on grouped tibbles. This will be the case 27#' as soon as an aggregating, lagging, or ranking function is 28#' involved. Compare this ungrouped mutate: 29#' 30#' ``` 31#' starwars %>% 32#' select(name, mass, species) %>% 33#' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) 34#' ``` 35#' 36#' With the grouped equivalent: 37#' 38#' ``` 39#' starwars %>% 40#' select(name, mass, species) %>% 41#' group_by(species) %>% 42#' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) 43#' ``` 44#' 45#' The former normalises `mass` by the global average whereas the 46#' latter normalises by the averages within species levels. 47#' 48#' @export 49#' @inheritParams arrange 50#' @param ... <[`data-masking`][dplyr_data_masking]> Name-value pairs. 51#' The name gives the name of the column in the output. 52#' 53#' The value can be: 54#' 55#' * A vector of length 1, which will be recycled to the correct length. 56#' * A vector the same length as the current group (or the whole data frame 57#' if ungrouped). 58#' * `NULL`, to remove the column. 59#' * A data frame or tibble, to create multiple columns in the output. 60#' @family single table verbs 61#' @return 62#' An object of the same type as `.data`. The output has the following 63#' properties: 64#' 65#' * Rows are not affected. 66#' * Existing columns will be preserved according to the `.keep` argument. 67#' New columns will be placed according to the `.before` and `.after` 68#' arguments. If `.keep = "none"` (as in `transmute()`), the output order 69#' is determined only by `...`, not the order of existing columns. 70#' * Columns given value `NULL` will be removed 71#' * Groups will be recomputed if a grouping variable is mutated. 72#' * Data frame attributes are preserved. 73#' @section Methods: 74#' These function are **generic**s, which means that packages can provide 75#' implementations (methods) for other classes. See the documentation of 76#' individual methods for extra arguments and differences in behaviour. 77#' 78#' Methods available in currently loaded packages: 79#' 80#' * `mutate()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}. 81#' * `transmute()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("transmute")}. 82#' @examples 83#' # Newly created variables are available immediately 84#' starwars %>% 85#' select(name, mass) %>% 86#' mutate( 87#' mass2 = mass * 2, 88#' mass2_squared = mass2 * mass2 89#' ) 90#' 91#' # As well as adding new variables, you can use mutate() to 92#' # remove variables and modify existing variables. 93#' starwars %>% 94#' select(name, height, mass, homeworld) %>% 95#' mutate( 96#' mass = NULL, 97#' height = height * 0.0328084 # convert to feet 98#' ) 99#' 100#' # Use across() with mutate() to apply a transformation 101#' # to multiple columns in a tibble. 102#' starwars %>% 103#' select(name, homeworld, species) %>% 104#' mutate(across(!name, as.factor)) 105#' # see more in ?across 106#' 107#' # Window functions are useful for grouped mutates: 108#' starwars %>% 109#' select(name, mass, homeworld) %>% 110#' group_by(homeworld) %>% 111#' mutate(rank = min_rank(desc(mass))) 112#' # see `vignette("window-functions")` for more details 113#' 114#' # By default, new columns are placed on the far right. 115#' # Experimental: you can override with `.before` or `.after` 116#' df <- tibble(x = 1, y = 2) 117#' df %>% mutate(z = x + y) 118#' df %>% mutate(z = x + y, .before = 1) 119#' df %>% mutate(z = x + y, .after = x) 120#' 121#' # By default, mutate() keeps all columns from the input data. 122#' # Experimental: You can override with `.keep` 123#' df <- tibble(x = 1, y = 2, a = "a", b = "b") 124#' df %>% mutate(z = x + y, .keep = "all") # the default 125#' df %>% mutate(z = x + y, .keep = "used") 126#' df %>% mutate(z = x + y, .keep = "unused") 127#' df %>% mutate(z = x + y, .keep = "none") # same as transmute() 128#' 129#' # Grouping ---------------------------------------- 130#' # The mutate operation may yield different results on grouped 131#' # tibbles because the expressions are computed within groups. 132#' # The following normalises `mass` by the global average: 133#' starwars %>% 134#' select(name, mass, species) %>% 135#' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) 136#' 137#' # Whereas this normalises `mass` by the averages within species 138#' # levels: 139#' starwars %>% 140#' select(name, mass, species) %>% 141#' group_by(species) %>% 142#' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) 143#' 144#' # Indirection ---------------------------------------- 145#' # Refer to column names stored as strings with the `.data` pronoun: 146#' vars <- c("mass", "height") 147#' mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]]) 148#' # Learn more in ?dplyr_data_masking 149mutate <- function(.data, ...) { 150 UseMethod("mutate") 151} 152 153#' @rdname mutate 154#' @param .keep \Sexpr[results=rd]{lifecycle::badge("experimental")} 155#' This is an experimental argument that allows you to control which columns 156#' from `.data` are retained in the output: 157#' 158#' * `"all"`, the default, retains all variables. 159#' * `"used"` keeps any variables used to make new variables; it's useful 160#' for checking your work as it displays inputs and outputs side-by-side. 161#' * `"unused"` keeps only existing variables **not** used to make new 162#' variables. 163#' * `"none"`, only keeps grouping keys (like [transmute()]). 164#' 165#' Grouping variables are always kept, unconditional to `.keep`. 166#' @param .before,.after \Sexpr[results=rd]{lifecycle::badge("experimental")} 167#' <[`tidy-select`][dplyr_tidy_select]> Optionally, control where new columns 168#' should appear (the default is to add to the right hand side). See 169#' [relocate()] for more details. 170#' @export 171mutate.data.frame <- function(.data, ..., 172 .keep = c("all", "used", "unused", "none"), 173 .before = NULL, .after = NULL) { 174 keep <- arg_match(.keep) 175 176 cols <- mutate_cols(.data, ..., caller_env = caller_env()) 177 out <- dplyr_col_modify(.data, cols) 178 179 .before <- enquo(.before) 180 .after <- enquo(.after) 181 if (!quo_is_null(.before) || !quo_is_null(.after)) { 182 # Only change the order of new columns 183 new <- setdiff(names(cols), names(.data)) 184 out <- relocate(out, !!new, .before = !!.before, .after = !!.after) 185 } 186 187 if (keep == "all") { 188 out 189 } else if (keep == "unused") { 190 used <- attr(cols, "used") 191 unused <- names(used)[!used] 192 keep <- intersect(names(out), c(group_vars(.data), unused, names(cols))) 193 dplyr_col_select(out, keep) 194 } else if (keep == "used") { 195 used <- attr(cols, "used") 196 used <- names(used)[used] 197 keep <- intersect(names(out), c(group_vars(.data), used, names(cols))) 198 dplyr_col_select(out, keep) 199 } else if (keep == "none") { 200 keep <- c( 201 # ensure group vars present 202 setdiff(group_vars(.data), names(cols)), 203 # cols might contain NULLs 204 intersect(names(cols), names(out)) 205 ) 206 dplyr_col_select(out, keep) 207 } 208} 209 210#' @rdname mutate 211#' @export 212transmute <- function(.data, ...) { 213 UseMethod("transmute") 214} 215 216#' @export 217transmute.data.frame <- function(.data, ...) { 218 dots <- check_transmute_args(...) 219 mutate(.data, !!!dots, .keep = "none") 220} 221 222# Helpers ----------------------------------------------------------------- 223 224check_transmute_args <- function(..., .keep, .before, .after) { 225 if (!missing(.keep)) { 226 abort("`transmute()` does not support the `.keep` argument") 227 } 228 if (!missing(.before)) { 229 abort("`transmute()` does not support the `.before` argument") 230 } 231 if (!missing(.after)) { 232 abort("`transmute()` does not support the `.after` argument") 233 } 234 enquos(...) 235} 236 237mutate_cols <- function(.data, ..., caller_env) { 238 mask <- DataMask$new(.data, caller_env) 239 old_current_column <- context_peek_bare("column") 240 241 on.exit(context_poke("column", old_current_column), add = TRUE) 242 on.exit(mask$forget("mutate"), add = TRUE) 243 244 rows <- mask$get_rows() 245 dots <- dplyr_quosures(...) 246 if (length(dots) == 0L) { 247 return(NULL) 248 } 249 250 new_columns <- set_names(list(), character()) 251 252 withCallingHandlers({ 253 for (i in seq_along(dots)) { 254 mask$across_cache_reset() 255 context_poke("column", old_current_column) 256 257 # get results from all the quosures that are expanded from ..i 258 # then ingest them after 259 quosures <- expand_across(dots[[i]]) 260 quosures_results <- vector(mode = "list", length = length(quosures)) 261 262 for (k in seq_along(quosures)) { 263 quo <- quosures[[k]] 264 quo_data <- attr(quo, "dplyr:::data") 265 if (!is.null(quo_data$column)) { 266 context_poke("column", quo_data$column) 267 } 268 # a list in which each element is the result of 269 # evaluating the quosure in the "sliced data mask" 270 # recycling it appropriately to match the group size 271 # 272 # TODO: reinject hybrid evaluation at the R level 273 chunks <- NULL 274 275 # result after unchopping the chunks 276 result <- NULL 277 278 if (quo_is_symbol(quo)){ 279 name <- as_string(quo_get_expr(quo)) 280 281 if (name %in% names(new_columns)) { 282 # already have result and chunks 283 result <- new_columns[[name]] 284 chunks <- mask$resolve(name) 285 } else if (name %in% names(.data)) { 286 # column from the original data 287 result <- .data[[name]] 288 chunks <- mask$resolve(name) 289 } 290 291 if (inherits(.data, "rowwise_df") && vec_is_list(result)) { 292 sizes <- list_sizes(result) 293 wrong <- which(sizes != 1) 294 if (length(wrong)) { 295 # same error as would have been generated by mask$eval_all_mutate() 296 group <- wrong[1L] 297 mask$set_current_group(group) 298 abort(x_size = sizes[group], class = "dplyr:::mutate_incompatible_size") 299 } 300 } 301 } 302 303 if (is.null(chunks)) { 304 chunks <- mask$eval_all_mutate(quo) 305 } 306 307 if (is.null(chunks)) { 308 next 309 } 310 311 # only unchop if needed 312 if (is.null(result)) { 313 if (length(rows) == 1) { 314 result <- chunks[[1]] 315 } else { 316 result <- withCallingHandlers( 317 vec_unchop(chunks <- vec_cast_common(!!!chunks), rows), 318 vctrs_error_incompatible_type = function(cnd) { 319 abort(class = "dplyr:::error_mutate_incompatible_combine", parent = cnd) 320 } 321 ) 322 } 323 } 324 325 quosures_results[[k]] <- list(result = result, chunks = chunks) 326 } 327 328 329 for (k in seq_along(quosures)) { 330 quo <- quosures[[k]] 331 quo_data <- attr(quo, "dplyr:::data") 332 333 quo_result <- quosures_results[[k]] 334 if (is.null(quo_result)) { 335 if (quo_data$is_named) { 336 name <- quo_data$name_given 337 new_columns[[name]] <- zap() 338 mask$remove(name) 339 } 340 next 341 } 342 343 result <- quo_result$result 344 chunks <- quo_result$chunks 345 346 if (!quo_data$is_named && is.data.frame(result)) { 347 new_columns[names(result)] <- result 348 mask$add_many(result, chunks) 349 } else { 350 # treat as a single output otherwise 351 name <- quo_data$name_auto 352 new_columns[[name]] <- result 353 mask$add_one(name, chunks) 354 } 355 356 } 357 358 } 359 360 }, 361 error = function(e) { 362 local_call_step(dots = dots, .index = i, .fn = "mutate", .dot_data = inherits(e, "rlang_error_data_pronoun_not_found")) 363 call_step_envir <- peek_call_step() 364 error_name <- call_step_envir$error_name 365 error_expression <- call_step_envir$error_expression 366 367 show_group_details <- TRUE 368 if (inherits(e, "dplyr:::mutate_incompatible_size")) { 369 size <- vec_size(rows[[mask$get_current_group()]]) 370 x_size <- e$x_size 371 bullets <- c( 372 i = cnd_bullet_column_info(), 373 i = glue("`{error_name}` must be size {or_1(size)}, not {x_size}."), 374 i = cnd_bullet_rowwise_unlist() 375 ) 376 } else if (inherits(e, "dplyr:::mutate_mixed_null")) { 377 show_group_details <- FALSE 378 bullets <- c( 379 i = cnd_bullet_column_info(), 380 x = glue("`{error_name}` must return compatible vectors across groups."), 381 i = "Cannot combine NULL and non NULL results.", 382 i = cnd_bullet_rowwise_unlist() 383 ) 384 } else if (inherits(e, "dplyr:::mutate_not_vector")) { 385 bullets <- c( 386 i = cnd_bullet_column_info(), 387 x = glue("`{error_name}` must be a vector, not {friendly_type_of(e$result)}."), 388 i = cnd_bullet_rowwise_unlist() 389 ) 390 } else if(inherits(e, "dplyr:::error_mutate_incompatible_combine")) { 391 show_group_details <- FALSE 392 bullets <- c( 393 i = cnd_bullet_column_info(), 394 x = glue("`{error_name}` must return compatible vectors across groups"), 395 i = cnd_bullet_combine_details(e$parent$x, e$parent$x_arg), 396 i = cnd_bullet_combine_details(e$parent$y, e$parent$y_arg) 397 ) 398 } else { 399 bullets <- c( 400 i = cnd_bullet_column_info(), 401 x = conditionMessage(e) 402 ) 403 } 404 405 bullets <- c( 406 cnd_bullet_header(), 407 bullets, 408 i = if(show_group_details) cnd_bullet_cur_group_label() 409 ) 410 411 abort( 412 bullets, 413 class = c("dplyr:::mutate_error", "dplyr_error"), 414 error_name = error_name, error_expression = error_expression, 415 parent = e, 416 bullets = bullets 417 ) 418 419 }, 420 warning = function(w) { 421 # Check if there is an upstack calling handler that would muffle 422 # the warning. This avoids doing the expensive work below for a 423 # silenced warning (#5675). 424 if (check_muffled_warning(w)) { 425 maybe_restart("muffleWarning") 426 } 427 428 local_call_step(dots = dots, .index = i, .fn = "mutate") 429 430 warn(c( 431 cnd_bullet_header(), 432 i = cnd_bullet_column_info(), 433 i = conditionMessage(w), 434 i = cnd_bullet_cur_group_label(what = "warning") 435 )) 436 437 # Cancel `w` 438 maybe_restart("muffleWarning") 439 }) 440 441 is_zap <- map_lgl(new_columns, inherits, "rlang_zap") 442 new_columns[is_zap] <- rep(list(NULL), sum(is_zap)) 443 used <- mask$get_used() 444 names(used) <- mask$current_vars() 445 attr(new_columns, "used") <- used 446 new_columns 447} 448 449check_muffled_warning <- function(cnd) { 450 early_exit <- TRUE 451 452 # Cancel early exits, e.g. from an exiting handler. This way we can 453 # still instrument caught warnings to avoid confusing 454 # inconsistencies. This doesn't work on versions of R older than 455 # 3.5.0 because they don't include this change: 456 # https://github.com/wch/r-source/commit/688eaebf. So with 457 # `tryCatch(warning = )`, the original warning `cnd` will be caught 458 # instead of the instrumented warning. 459 on.exit( 460 if (can_return_from_exit && early_exit) { 461 return(FALSE) 462 } 463 ) 464 465 muffled <- withRestarts( 466 muffleWarning = function(...) TRUE, 467 { 468 signalCondition(cnd) 469 FALSE 470 } 471 ) 472 473 early_exit <- FALSE 474 muffled 475} 476 477on_load( 478 can_return_from_exit <- getRversion() >= "3.5.0" 479) 480