1#' Create a lagged predictor 2#' 3#' `step_lag` creates a *specification* of a recipe step that 4#' will add new columns of lagged data. Lagged data will 5#' by default include NA values where the lag was induced. 6#' These can be removed with [step_naomit()], or you may 7#' specify an alternative filler value with the `default` 8#' argument. 9#' 10#' @inheritParams step_pca 11#' @inheritParams step_center 12#' @param lag A vector of positive integers. Each specified column will be 13#' lagged for each value in the vector. 14#' @param prefix A prefix for generated column names, default to "lag_". 15#' @param columns A character string of variable names that will 16#' be populated (eventually) by the `terms` argument. 17#' @param default Passed to `dplyr::lag`, determines what fills empty rows 18#' left by lagging (defaults to NA). 19#' @template step-return 20#' @details The step assumes that the data are already _in the proper sequential 21#' order_ for lagging. 22#' @family row operation steps 23#' @export 24#' @rdname step_lag 25#' 26#' @examples 27#' n <- 10 28#' start <- as.Date('1999/01/01') 29#' end <- as.Date('1999/01/10') 30#' 31#' df <- data.frame(x = runif(n), 32#' index = 1:n, 33#' day = seq(start, end, by = "day")) 34#' 35#' recipe(~ ., data = df) %>% 36#' step_lag(index, day, lag = 2:3) %>% 37#' prep(df) %>% 38#' bake(df) 39step_lag <- 40 function(recipe, 41 ..., 42 role = "predictor", 43 trained = FALSE, 44 lag = 1, 45 prefix = "lag_", 46 default = NA, 47 columns = NULL, 48 skip = FALSE, 49 id = rand_id("lag")) { 50 add_step( 51 recipe, 52 step_lag_new( 53 terms = ellipse_check(...), 54 role = role, 55 trained = trained, 56 lag = lag, 57 default = default, 58 prefix = prefix, 59 columns = columns, 60 skip = skip, 61 id = id 62 ) 63 ) 64 } 65 66step_lag_new <- 67 function(terms, role, trained, lag, default, prefix, columns, skip, id) { 68 step( 69 subclass = "lag", 70 terms = terms, 71 role = role, 72 trained = trained, 73 lag = lag, 74 default = default, 75 prefix = prefix, 76 columns = columns, 77 skip = skip, 78 id = id 79 ) 80 } 81 82#' @export 83prep.step_lag <- function(x, training, info = NULL, ...) { 84 step_lag_new( 85 terms = x$terms, 86 role = x$role, 87 trained = TRUE, 88 lag = x$lag, 89 default = x$default, 90 prefix = x$prefix, 91 columns = recipes_eval_select(x$terms, training, info), 92 skip = x$skip, 93 id = x$id 94 ) 95} 96 97#' @export 98bake.step_lag <- function(object, new_data, ...) { 99 100 if (!all(object$lag == as.integer(object$lag))) 101 rlang::abort("step_lag requires 'lag' argument to be integer valued.") 102 103 make_call <- function(col, lag_val) { 104 call2( 105 "lag", 106 x = sym(col), 107 n = lag_val, 108 default = object$default, 109 .ns = "dplyr" 110 ) 111 } 112 113 grid <- expand.grid(col = object$columns, lag_val = object$lag, 114 stringsAsFactors = FALSE) 115 calls <- purrr::map2(grid$col, grid$lag_val, make_call) 116 newname <- paste0(object$prefix, grid$lag_val, "_", grid$col) 117 calls <- check_name(calls, new_data, object, newname, TRUE) 118 119 as_tibble(mutate(new_data, !!!calls)) 120} 121 122print.step_lag <- 123 function(x, width = max(20, options()$width - 30), ...) { 124 cat("Lagging ", sep = "") 125 printer(x$columns, x$terms, x$trained, width = width) 126 invisible(x) 127 } 128