1#' Evaluate an expression with quosures and pronoun support
2#'
3#' @description
4#'
5#' \Sexpr[results=rd, stage=render]{rlang:::lifecycle("stable")}
6#'
7#' `eval_tidy()` is a variant of [base::eval()] that powers the tidy
8#' evaluation framework. Like `eval()` it accepts user data as
9#' argument. Whereas `eval()` simply transforms the data to an
10#' environment, `eval_tidy()` transforms it to a **data mask** with
11#' [as_data_mask()]. Evaluating in a data mask enables the following
12#' features:
13#'
14#' - [Quosures][nse-defuse]. Quosures are expressions bundled with an
15#'   environment. If `data` is supplied, objects in the data mask
16#'   always have precedence over the quosure environment, i.e. the
17#'   data masks the environment.
18#'
19#' - [Pronouns][.data]. If `data` is supplied, the `.env` and `.data`
20#'   pronouns are installed in the data mask. `.env` is a reference to
21#'   the calling environment and `.data` refers to the `data` argument.
22#'   These pronouns lets you be explicit about where to find
23#'   values and throw errors if you try to access non-existent values.
24#'
25#'
26#' @param expr An expression or quosure to evaluate.
27#' @param data A data frame, or named list or vector. Alternatively, a
28#'   data mask created with [as_data_mask()] or
29#'   [new_data_mask()]. Objects in `data` have priority over those in
30#'   `env`. See the section about data masking.
31#'
32#' @param env The environment in which to evaluate `expr`. This
33#'   environment is not applicable for quosures because they have
34#'   their own environments.
35#' @seealso [nse-force] for the second leg of the tidy evaluation
36#'   framework.
37#'
38#'
39#' @section Data masking:
40#'
41#' Data masking refers to how columns or objects inside `data` have
42#' priority over objects defined in `env` (or in the quosure
43#' environment, if applicable). If there is a column `var` in `data`
44#' and an object `var` in `env`, and `expr` refers to `var`, the
45#' column has priority:
46#'
47#' ```
48#' var <- "this one?"
49#' data <- data.frame(var = rep("Or that one?", 3))
50#'
51#' within <- function(data, expr) {
52#'   eval_tidy(enquo(expr), data)
53#' }
54#'
55#' within(data, toupper(var))
56#' #> [1] "OR THAT ONE?" "OR THAT ONE?" "OR THAT ONE?"
57#' ```
58#'
59#' Because the columns or objects in `data` are always found first,
60#' before objects from `env`, we say that the data "masks" the
61#' environment.
62#'
63#'
64#' @section When should eval_tidy() be used instead of eval()?:
65#'
66#' `base::eval()` is sufficient for simple evaluation. Use
67#' `eval_tidy()` when you'd like to support expressions referring to
68#' the `.data` pronoun, or when you need to support quosures.
69#'
70#' If you're evaluating an expression captured with quasiquotation
71#' support, it is recommended to use `eval_tidy()` because users will
72#' likely unquote quosures.
73#'
74#' Note that unwrapping a quosure with [quo_get_expr()] does not
75#' guarantee that there is no quosures inside the expression. Quosures
76#' might be unquoted anywhere. For instance, the following does not
77#' work reliably in the presence of nested quosures:
78#'
79#' ```
80#' my_quoting_fn <- function(x) {
81#'   x <- enquo(x)
82#'   expr <- quo_get_expr(x)
83#'   env <- quo_get_env(x)
84#'   eval(expr, env)
85#' }
86#'
87#' # Works:
88#' my_quoting_fn(toupper(letters))
89#'
90#' # Fails because of a nested quosure:
91#' my_quoting_fn(toupper(!!quo(letters)))
92#' ```
93#'
94#'
95#' @section Stack semantics of `eval_tidy()`:
96#'
97#' `eval_tidy()` always evaluates in a data mask, even when `data` is
98#' `NULL`. Because of this, it has different stack semantics than
99#' [base::eval()]:
100#'
101#' - Lexical side effects, such as assignment with `<-`, occur in the
102#'   mask rather than `env`.
103#'
104#' - Functions that require the evaluation environment to correspond
105#'   to a frame on the call stack do not work. This is why `return()`
106#'   called from a quosure does not work.
107#'
108#' - The mask environment creates a new branch in the tree
109#'   representation of backtraces (which you can visualise in a
110#'   [browser()] session with `lobstr::cst()`).
111#'
112#' See also [eval_bare()] for more information about these differences.
113#'
114#'
115#' @section Life cycle:
116#'
117#' **rlang 0.3.0**
118#'
119#' Passing an environment to `data` is deprecated. Please construct an
120#' rlang data mask with [new_data_mask()].
121#'
122#'
123#' @examples
124#'
125#' # With simple quoted expressions eval_tidy() works the same way as
126#' # eval():
127#' apple <- "apple"
128#' kiwi <- "kiwi"
129#' expr <- quote(paste(apple, kiwi))
130#' expr
131#'
132#' eval(expr)
133#' eval_tidy(expr)
134#'
135#' # Both accept a data mask as argument:
136#' data <- list(apple = "CARROT", kiwi = "TOMATO")
137#' eval(expr, data)
138#' eval_tidy(expr, data)
139#'
140#'
141#' # In addition eval_tidy() has support for quosures:
142#' with_data <- function(data, expr) {
143#'   quo <- enquo(expr)
144#'   eval_tidy(quo, data)
145#' }
146#' with_data(NULL, apple)
147#' with_data(data, apple)
148#' with_data(data, list(apple, kiwi))
149#'
150#' # Secondly eval_tidy() installs handy pronouns that allow users to
151#' # be explicit about where to find symbols:
152#' with_data(data, .data$apple)
153#' with_data(data, .env$apple)
154#'
155#'
156#' # Note that instead of using `.env` it is often equivalent and may
157#' # be preferred to unquote a value. There are two differences. First
158#' # unquoting happens earlier, when the quosure is created. Secondly,
159#' # subsetting `.env` with the `$` operator may be brittle because
160#' # `$` does not look through the parents of the environment.
161#' #
162#' # For instance using `.env$name` in a magrittr pipeline is an
163#' # instance where this poses problem, because the magrittr pipe
164#' # currently (as of v1.5.0) evaluates its operands in a *child* of
165#' # the current environment (this child environment is where it
166#' # defines the pronoun `.`).
167#' \dontrun{
168#'   data %>% with_data(!!kiwi)     # "kiwi"
169#'   data %>% with_data(.env$kiwi)  # NULL
170#' }
171#' @export
172eval_tidy <- function(expr, data = NULL, env = caller_env()) {
173  .External2(rlang_ext2_eval_tidy, expr, data, env)
174}
175
176# Helps work around roxygen loading issues
177#' @export
178length.rlang_fake_data_pronoun <- function(...) NULL
179#' @export
180names.rlang_fake_data_pronoun <- function(...) NULL
181#' @export
182`$.rlang_fake_data_pronoun` <- function(...) NULL
183#' @export
184`[[.rlang_fake_data_pronoun` <- function(...) NULL
185#' @export
186print.rlang_fake_data_pronoun <- function(...) cat_line("<pronoun>")
187
188#' Data pronouns for tidy evaluation
189#'
190#' @description
191#'
192#' These pronouns allow you to be explicit about where to find objects
193#' when programming with data masked functions.
194#'
195#' ```
196#' m <- 10
197#' mtcars %>% mutate(disp = .data$disp * .env$m)
198#' ```
199#'
200#' * `.data` retrieves data-variables from the data frame.
201#' * `.env` retrieves env-variables from the environment.
202#'
203#' Because the lookup is explicit, there is no ambiguity between both
204#' kinds of variables. Compare:
205#'
206#' ```
207#' disp <- 10
208#' mtcars %>% mutate(disp = .data$disp * .env$disp)
209#' mtcars %>% mutate(disp = disp * disp)
210#' ```
211#'
212#' The `.data` object exported from rlang is also useful to import in
213#' your package namespace to avoid a `R CMD check` note when referring
214#' to objects from the data mask.
215#'
216#' Note that `.data` is only a pronoun, it is not a real data
217#' frame. This means that you can't take its names or map a function
218#' over the contents of `.data`. Similarly, `.env` is not an actual R
219#' environment. For instance, it doesn't have a parent and the
220#' subsetting operators behave differently.
221#'
222#' @name tidyeval-data
223#' @format NULL
224#' @export
225.data <- structure(list(), class = "rlang_fake_data_pronoun")
226#' @rdname tidyeval-data
227#' @format NULL
228#' @export
229.env <- .data
230
231
232#' Create a data mask
233#'
234#' @description
235#'
236#' \Sexpr[results=rd, stage=render]{rlang:::lifecycle("stable")}
237#'
238#' A data mask is an environment (or possibly multiple environments
239#' forming an ancestry) containing user-supplied objects. Objects in
240#' the mask have precedence over objects in the environment (i.e. they
241#' mask those objects). Many R functions evaluate quoted expressions
242#' in a data mask so these expressions can refer to objects within the
243#' user data.
244#'
245#' These functions let you construct a tidy eval data mask manually.
246#' They are meant for developers of tidy eval interfaces rather than
247#' for end users.
248#'
249#'
250#' @section Why build a data mask?:
251#'
252#' Most of the time you can just call [eval_tidy()] with a list or a
253#' data frame and the data mask will be constructed automatically.
254#' There are three main use cases for manual creation of data masks:
255#'
256#' * When [eval_tidy()] is called with the same data in a tight loop.
257#'   Because there is some overhead to creating tidy eval data masks,
258#'   constructing the mask once and reusing it for subsequent
259#'   evaluations may improve performance.
260#'
261#' * When several expressions should be evaluated in the exact same
262#'   environment because a quoted expression might create new objects
263#'   that can be referred in other quoted expressions evaluated at a
264#'   later time. One example of this is `tibble::lst()` where new
265#'   columns can refer to previous ones.
266#'
267#' * When your data mask requires special features. For instance the
268#'   data frame columns in dplyr data masks are implemented with
269#'   [active bindings][base::delayedAssign].
270#'
271#'
272#' @section Building your own data mask:
273#'
274#' Unlike [base::eval()] which takes any kind of environments as data
275#' mask, [eval_tidy()] has specific requirements in order to support
276#' [quosures][nse-defuse]. For this reason you can't supply bare
277#' environments.
278#'
279#' There are two ways of constructing an rlang data mask manually:
280#'
281#' * `as_data_mask()` transforms a list or data frame to a data mask.
282#'   It automatically installs the data pronoun [`.data`][.data].
283#'
284#' * `new_data_mask()` is a bare bones data mask constructor for
285#'   environments. You can supply a bottom and a top environment in
286#'   case your data mask comprises multiple environments (see section
287#'   below).
288#'
289#'   Unlike `as_data_mask()` it does not install the `.data` pronoun
290#'   so you need to provide one yourself. You can provide a pronoun
291#'   constructed with `as_data_pronoun()` or your own pronoun class.
292#'
293#'   `as_data_pronoun()` will create a pronoun from a list, an
294#'   environment, or an rlang data mask. In the latter case, the whole
295#'   ancestry is looked up from the bottom to the top of the mask.
296#'   Functions stored in the mask are bypassed by the pronoun.
297#'
298#' Once you have built a data mask, simply pass it to [eval_tidy()] as
299#' the `data` argument. You can repeat this as many times as
300#' needed. Note that any objects created there (perhaps because of a
301#' call to `<-`) will persist in subsequent evaluations.
302#'
303#'
304#' @section Top and bottom of data mask:
305#'
306#' In some cases you'll need several levels in your data mask. One
307#' good reason is when you include functions in the mask. It's a good
308#' idea to keep data objects one level lower than function objects, so
309#' that the former cannot override the definitions of the latter (see
310#' examples).
311#'
312#' In that case, set up all your environments and keep track of the
313#' bottom child and the top parent. You'll need to pass both to
314#' `new_data_mask()`.
315#'
316#' Note that the parent of the top environment is completely
317#' undetermined, you shouldn't expect it to remain the same at all
318#' times. This parent is replaced during evaluation by [eval_tidy()]
319#' to one of the following environments:
320#'
321#' * The default environment passed as the `env` argument of `eval_tidy()`.
322#' * The environment of the current quosure being evaluated, if applicable.
323#'
324#' Consequently, all masking data should be contained between the
325#' bottom and top environment of the data mask.
326#'
327#' @param data A data frame or named vector of masking data.
328#' @return A data mask that you can supply to [eval_tidy()].
329#'
330#' @export
331#' @examples
332#' # Evaluating in a tidy evaluation environment enables all tidy
333#' # features:
334#' mask <- as_data_mask(mtcars)
335#' eval_tidy(quo(letters), mask)
336#'
337#' # You can install new pronouns in the mask:
338#' mask$.pronoun <- as_data_pronoun(list(foo = "bar", baz = "bam"))
339#' eval_tidy(quo(.pronoun$foo), mask)
340#'
341#' # In some cases the data mask can leak to the user, for example if
342#' # a function or formula is created in the data mask environment:
343#' cyl <- "user variable from the context"
344#' fn <- eval_tidy(quote(function() cyl), mask)
345#' fn()
346#'
347#' # If new objects are created in the mask, they persist in the
348#' # subsequent calls:
349#' eval_tidy(quote(new <- cyl + am), mask)
350#' eval_tidy(quote(new * 2), mask)
351#'
352#'
353#' # In some cases your data mask is a whole chain of environments
354#' # rather than a single environment. You'll have to use
355#' # `new_data_mask()` and let it know about the bottom of the mask
356#' # (the last child of the environment chain) and the topmost parent.
357#'
358#' # A common situation where you'll want a multiple-environment mask
359#' # is when you include functions in your mask. In that case you'll
360#' # put functions in the top environment and data in the bottom. This
361#' # will prevent the data from overwriting the functions.
362#' top <- new_environment(list(`+` = base::paste, c = base::paste))
363#'
364#' # Let's add a middle environment just for sport:
365#' middle <- env(top)
366#'
367#' # And finally the bottom environment containing data:
368#' bottom <- env(middle, a = "a", b = "b", c = "c")
369#'
370#' # We can now create a mask by supplying the top and bottom
371#' # environments:
372#' mask <- new_data_mask(bottom, top = top)
373#'
374#' # This data mask can be passed to eval_tidy() instead of a list or
375#' # data frame:
376#' eval_tidy(quote(a + b + c), data = mask)
377#'
378#' # Note how the function `c()` and the object `c` are looked up
379#' # properly because of the multi-level structure:
380#' eval_tidy(quote(c(a, b, c)), data = mask)
381#'
382#' # new_data_mask() does not create data pronouns, but
383#' # data pronouns can be added manually:
384#' mask$.fns <- as_data_pronoun(top)
385#'
386#' # The `.data` pronoun should generally be created from the
387#' # mask. This will ensure data is looked up throughout the whole
388#' # ancestry. Only non-function objects are looked up from this
389#' # pronoun:
390#' mask$.data <- as_data_pronoun(mask)
391#' mask$.data$c
392#'
393#' # Now we can reference the values with the pronouns:
394#' eval_tidy(quote(c(.data$a, .data$b, .data$c)), data = mask)
395as_data_mask <- function(data) {
396  .Call(rlang_as_data_mask, data)
397}
398#' @rdname as_data_mask
399#' @export
400as_data_pronoun <- function(data) {
401  .Call(rlang_as_data_pronoun, data)
402}
403
404#' @rdname as_data_mask
405#' @param bottom The environment containing masking objects if the
406#'   data mask is one environment deep. The bottom environment if the
407#'   data mask comprises multiple environment.
408#'
409#'   If you haven't supplied `top`, this __must__ be an environment
410#'   that you own, i.e. that you have created yourself.
411#' @param top The last environment of the data mask. If the data mask
412#'   is only one environment deep, `top` should be the same as
413#'   `bottom`.
414#'
415#'   This __must__ be an environment that you own, i.e. that you have
416#'   created yourself. The parent of `top` will be changed by the tidy
417#'   eval engine and should be considered undetermined. Never make
418#'   assumption about the parent of `top`.
419#' @export
420new_data_mask <- function(bottom, top = bottom) {
421  .Call(rlang_new_data_mask, bottom, top)
422}
423
424#' @export
425`$.rlang_data_pronoun` <- function(x, nm) {
426  data_pronoun_get(x, nm)
427}
428#' @export
429`[[.rlang_data_pronoun` <- function(x, i, ...) {
430  data_pronoun_get(x, i)
431}
432data_pronoun_get <- function(x, nm) {
433  if (!is_string(nm)) {
434    abort("Must subset the data pronoun with a string.")
435  }
436  mask <- .subset2(x, 1)
437  .Call(rlang_data_pronoun_get, mask, sym(nm))
438}
439abort_data_pronoun <- function(nm) {
440  msg <- sprintf("Column `%s` not found in `.data`", as_string(nm))
441  abort(msg, "rlang_error_data_pronoun_not_found")
442}
443
444#' @export
445`$.rlang_ctxt_pronoun` <- function(x, nm) {
446  ctxt_pronoun_get(x, nm)
447}
448#' @export
449`[[.rlang_ctxt_pronoun` <- function(x, i, ...) {
450  ctxt_pronoun_get(x, i)
451}
452ctxt_pronoun_get <- function(x, nm) {
453  if (!is_string(nm)) {
454    abort("Must subset the context pronoun with a string.")
455  }
456  eval_bare(sym(nm), x)
457}
458
459#' @export
460`$<-.rlang_data_pronoun` <- function(x, i, value) {
461  abort("Can't modify the data pronoun")
462}
463#' @export
464`[[<-.rlang_data_pronoun` <- function(x, i, value) {
465  abort("Can't modify the data pronoun")
466}
467#' @export
468`$<-.rlang_ctxt_pronoun` <- function(x, i, value) {
469  abort("Can't modify the context pronoun")
470}
471#' @export
472`[[<-.rlang_ctxt_pronoun` <- function(x, i, value) {
473  abort("Can't modify the context pronoun")
474}
475
476#' @export
477`[.rlang_data_pronoun` <- function(x, i, ...) {
478  abort("`[` is not supported by .data pronoun, use `[[` or $ instead.")
479}
480#' @export
481names.rlang_data_pronoun <- function(x) {
482  abort("Can't take the `names()` of the `.data` pronoun")
483}
484#' @export
485length.rlang_data_pronoun <- function(x) {
486  abort("Can't take the `length()` of the `.data` pronoun")
487}
488
489#' @export
490names.rlang_ctxt_pronoun <- function(x) {
491  abort("Can't take the `names()` of the context pronoun")
492}
493#' @export
494length.rlang_ctxt_pronoun <- function(x) {
495  abort("Can't take the `length()` of the context pronoun")
496}
497
498#' @export
499print.rlang_data_pronoun <- function(x, ...) {
500  cat_line("<pronoun>")
501  invisible(x)
502}
503#' @importFrom utils str
504#' @export
505str.rlang_data_pronoun <- function(object, ...) {
506  cat_line("<pronoun>")
507}
508
509# Used for deparsing
510is_data_pronoun <- function(x) {
511  is_call(x, c("[[", "$"), n = 2L) && identical(node_cadr(x), dot_data_sym)
512}
513data_pronoun_name <- function(x) {
514  if (is_call(x, "$")) {
515    arg <- node_cadr(node_cdr(x))
516    if (is_symbol(arg)) {
517      return(as_string(arg))
518    } else {
519      return(NULL)
520    }
521  }
522
523  if (is_call(x, "[[")) {
524    arg <- node_cadr(node_cdr(x))
525    if (is_string(arg)) {
526      return(arg)
527    } else {
528      return(NULL)
529    }
530  }
531}
532
533is_data_mask <- function(x) {
534  is_environment(x) && env_has(x, ".__rlang_data_mask__.")
535}
536