1#' Create a lagged predictor
2#'
3#' `step_lag` creates a *specification* of a recipe step that
4#'   will add new columns of lagged data. Lagged data will
5#'   by default include NA values where the lag was induced.
6#'   These can be removed with [step_naomit()], or you may
7#'   specify an alternative filler value with the `default`
8#'   argument.
9#'
10#' @inheritParams step_pca
11#' @inheritParams step_center
12#' @param lag A vector of positive integers. Each specified column will be
13#'  lagged for each value in the vector.
14#' @param prefix A prefix for generated column names, default to "lag_".
15#' @param columns A character string of variable names that will
16#'  be populated (eventually) by the `terms` argument.
17#' @param default Passed to `dplyr::lag`, determines what fills empty rows
18#'   left by lagging (defaults to NA).
19#' @template step-return
20#' @details The step assumes that the data are already _in the proper sequential
21#'  order_ for lagging.
22#' @family row operation steps
23#' @export
24#' @rdname step_lag
25#'
26#' @examples
27#' n <- 10
28#' start <- as.Date('1999/01/01')
29#' end <- as.Date('1999/01/10')
30#'
31#' df <- data.frame(x = runif(n),
32#'                  index = 1:n,
33#'                  day = seq(start, end, by = "day"))
34#'
35#' recipe(~ ., data = df) %>%
36#'   step_lag(index, day, lag = 2:3) %>%
37#'   prep(df) %>%
38#'   bake(df)
39step_lag <-
40  function(recipe,
41           ...,
42           role = "predictor",
43           trained = FALSE,
44           lag = 1,
45           prefix = "lag_",
46           default = NA,
47           columns = NULL,
48           skip = FALSE,
49           id = rand_id("lag")) {
50    add_step(
51      recipe,
52      step_lag_new(
53        terms = ellipse_check(...),
54        role = role,
55        trained = trained,
56        lag = lag,
57        default = default,
58        prefix = prefix,
59        columns = columns,
60        skip = skip,
61        id = id
62      )
63    )
64  }
65
66step_lag_new <-
67  function(terms, role, trained, lag, default, prefix, columns, skip, id) {
68    step(
69      subclass = "lag",
70      terms = terms,
71      role = role,
72      trained = trained,
73      lag = lag,
74      default = default,
75      prefix = prefix,
76      columns = columns,
77      skip = skip,
78      id = id
79    )
80  }
81
82#' @export
83prep.step_lag <- function(x, training, info = NULL, ...) {
84  step_lag_new(
85    terms = x$terms,
86    role = x$role,
87    trained = TRUE,
88    lag = x$lag,
89    default = x$default,
90    prefix = x$prefix,
91    columns = recipes_eval_select(x$terms, training, info),
92    skip = x$skip,
93    id = x$id
94  )
95}
96
97#' @export
98bake.step_lag <- function(object, new_data, ...) {
99
100  if (!all(object$lag == as.integer(object$lag)))
101    rlang::abort("step_lag requires 'lag' argument to be integer valued.")
102
103  make_call <- function(col, lag_val) {
104    call2(
105      "lag",
106      x = sym(col),
107      n = lag_val,
108      default = object$default,
109      .ns = "dplyr"
110    )
111  }
112
113  grid <- expand.grid(col = object$columns, lag_val = object$lag,
114                      stringsAsFactors = FALSE)
115  calls <- purrr::map2(grid$col, grid$lag_val, make_call)
116  newname <- paste0(object$prefix, grid$lag_val, "_", grid$col)
117  calls <- check_name(calls, new_data, object, newname, TRUE)
118
119  as_tibble(mutate(new_data, !!!calls))
120}
121
122print.step_lag <-
123  function(x, width = max(20, options()$width - 30), ...) {
124    cat("Lagging ",  sep = "")
125    printer(x$columns, x$terms, x$trained, width = width)
126    invisible(x)
127  }
128