1 #include <rlang.h>
2 #include "internal.h"
3 
4 
5 static sexp* quo_mask_flag_sym = NULL;
6 static sexp* data_mask_flag_sym = NULL;
7 
8 enum rlang_mask_type {
9   RLANG_MASK_DATA,     // Full data mask
10   RLANG_MASK_QUOSURE,  // Quosure mask with only `~` binding
11   RLANG_MASK_NONE
12 };
13 
14 struct rlang_mask_info {
15   sexp* mask;
16   enum rlang_mask_type type;
17 };
18 
mask_info(sexp * mask)19 static struct rlang_mask_info mask_info(sexp* mask) {
20   if (r_typeof(mask) != r_type_environment) {
21     return (struct rlang_mask_info) { r_null, RLANG_MASK_NONE };
22   }
23 
24   sexp* flag;
25 
26   flag = r_env_find_anywhere(mask, data_mask_flag_sym);
27   if (flag != r_syms_unbound) {
28     return (struct rlang_mask_info) { flag, RLANG_MASK_DATA };
29   }
30 
31   flag = r_env_find_anywhere(mask, quo_mask_flag_sym);
32   if (flag != r_syms_unbound) {
33     return (struct rlang_mask_info) { flag, RLANG_MASK_QUOSURE };
34   }
35 
36   return (struct rlang_mask_info) { r_null, RLANG_MASK_NONE };
37 }
38 
39 
40 static sexp* data_pronoun_class = NULL;
41 static sexp* ctxt_pronoun_class = NULL;
42 static sexp* data_mask_env_sym = NULL;
43 
rlang_new_data_pronoun(sexp * mask)44 static sexp* rlang_new_data_pronoun(sexp* mask) {
45   sexp* pronoun = KEEP(r_new_vector(r_type_list, 1));
46 
47   r_list_poke(pronoun, 0, mask);
48   r_poke_attribute(pronoun, r_syms_class, data_pronoun_class);
49 
50   FREE(1);
51   return pronoun;
52 }
rlang_new_ctxt_pronoun(sexp * top)53 static sexp* rlang_new_ctxt_pronoun(sexp* top) {
54   sexp* pronoun = KEEP(r_new_environment(r_env_parent(top), 0));
55 
56   r_poke_attribute(pronoun, r_syms_class, ctxt_pronoun_class);
57 
58   FREE(1);
59   return pronoun;
60 }
61 
poke_ctxt_env(sexp * mask,sexp * env)62 void poke_ctxt_env(sexp* mask, sexp* env) {
63   sexp* ctxt_pronoun = r_env_find(mask, data_mask_env_sym);
64 
65   if (ctxt_pronoun == r_syms_unbound) {
66     r_abort("Internal error: Can't find context pronoun in data mask");
67   }
68 
69   r_env_poke_parent(ctxt_pronoun, env);
70 }
71 
72 
73 static sexp* empty_names_chr;
74 
check_unique_names(sexp * x)75 static void check_unique_names(sexp* x) {
76   // Allow empty lists
77   if (!r_length(x)) {
78     return ;
79   }
80 
81   sexp* names = r_names(x);
82   if (names == r_null) {
83     r_abort("`data` must be uniquely named but does not have names");
84   }
85   if (r_vec_find_first_duplicate(names, empty_names_chr, NULL)) {
86     r_abort("`data` must be uniquely named but has duplicate columns");
87   }
88 }
rlang_as_data_pronoun(sexp * x)89 sexp* rlang_as_data_pronoun(sexp* x) {
90   int n_kept = 0;
91 
92   switch (r_typeof(x)) {
93   case r_type_logical:
94   case r_type_integer:
95   case r_type_double:
96   case r_type_complex:
97   case r_type_character:
98   case r_type_raw:
99     x = KEEP_N(r_vec_coerce(x, r_type_list), n_kept);
100     // fallthrough
101   case r_type_list:
102     check_unique_names(x);
103     x = KEEP_N(r_list_as_environment(x, r_empty_env), n_kept);
104     break;
105   case r_type_environment:
106     break;
107   default:
108     r_abort("`data` must be an uniquely named vector, list, data frame or environment");
109   }
110 
111   sexp* pronoun = rlang_new_data_pronoun(x);
112 
113   FREE(n_kept);
114   return pronoun;
115 }
116 
117 
118 static sexp* data_mask_top_env_sym = NULL;
119 
check_data_mask_input(sexp * env,const char * arg)120 static void check_data_mask_input(sexp* env, const char* arg) {
121   if (r_typeof(env) != r_type_environment) {
122     r_abort("Can't create data mask because `%s` must be an environment", arg);
123   }
124 }
check_data_mask_top(sexp * bottom,sexp * top)125 static void check_data_mask_top(sexp* bottom, sexp* top) {
126   sexp* cur = bottom;
127 
128   while (cur != r_empty_env) {
129     if (cur == top) {
130       return ;
131     }
132     cur = r_env_parent(cur);
133   }
134 
135   r_abort("Can't create data mask because `top` is not a parent of `bottom`");
136 }
137 
138 static sexp* env_sym = NULL;
139 static sexp* old_sym = NULL;
140 static sexp* mask_sym = NULL;
141 
142 static sexp* tilde_fn = NULL;
143 static sexp* restore_mask_fn = NULL;
144 
on_exit_restore_lexical_env(sexp * mask,sexp * old,sexp * frame)145 static void on_exit_restore_lexical_env(sexp* mask, sexp* old, sexp* frame) {
146   sexp* fn = KEEP(r_clone(restore_mask_fn));
147 
148   sexp* env = KEEP(r_new_environment(r_base_env, 2));
149   r_env_poke(env, mask_sym, mask);
150   r_env_poke(env, old_sym, old);
151   r_fn_poke_env(fn, env);
152 
153   sexp* call = KEEP(r_new_call(fn, r_null));
154   r_on_exit(call, frame);
155 
156   FREE(3);
157 }
158 
rlang_new_data_mask(sexp * bottom,sexp * top)159 sexp* rlang_new_data_mask(sexp* bottom, sexp* top) {
160   sexp* data_mask;
161 
162   if (bottom == r_null) {
163     bottom = KEEP(r_new_environment(r_empty_env, 100));
164     data_mask = bottom;
165   } else {
166     check_data_mask_input(bottom, "bottom");
167     // Create a child because we don't know what might be in `bottom`
168     // and we need to clear its contents without deleting any object
169     // created in the data mask environment
170     data_mask = KEEP(r_new_environment(bottom, 100));
171   }
172 
173   if (top == r_null) {
174     top = bottom;
175   } else {
176     check_data_mask_input(top, "top");
177   }
178   if (top != bottom) {
179     check_data_mask_top(bottom, top);
180   }
181 
182   sexp* ctxt_pronoun = KEEP(rlang_new_ctxt_pronoun(top));
183 
184   r_env_poke(data_mask, r_syms_tilde, tilde_fn);
185   r_env_poke(data_mask, data_mask_flag_sym, data_mask);
186   r_env_poke(data_mask, data_mask_env_sym, ctxt_pronoun);
187   r_env_poke(data_mask, data_mask_top_env_sym, top);
188 
189   FREE(2);
190   return data_mask;
191 }
192 
193 
rlang_is_data_mask(sexp * env)194 sexp* rlang_is_data_mask(sexp* env) {
195   return r_lgl(mask_info(env).type == RLANG_MASK_DATA);
196 }
197 
mask_find(sexp * env,sexp * sym)198 static sexp* mask_find(sexp* env, sexp* sym) {
199   if (r_typeof(sym) != r_type_symbol) {
200     r_abort("Internal error: Data pronoun must be subset with a symbol");
201   }
202 
203   sexp* top_env = r_env_find(env, data_mask_top_env_sym);
204   if (r_typeof(top_env) == r_type_environment) {
205     // Start lookup in the parent if the pronoun wraps a data mask
206     env = r_env_parent(env);
207   } else {
208     // Data pronouns created from lists or data frames are converted
209     // to a simple environment whose ancestry shouldn't be looked up.
210     top_env = env;
211   }
212   int n_kept = 0;
213   KEEP_N(top_env, n_kept);
214 
215   sexp* cur = env;
216   do {
217     sexp* obj = r_env_find(cur, sym);
218     if (TYPEOF(obj) == PROMSXP) {
219       KEEP(obj);
220       obj = r_eval(obj, r_empty_env);
221       FREE(1);
222     }
223 
224     if (obj != r_syms_unbound) {
225       FREE(n_kept);
226       return obj;
227     }
228 
229     if (cur == top_env) {
230       break;
231     } else {
232       cur = r_env_parent(cur);
233     }
234   } while (cur != r_empty_env);
235 
236   FREE(n_kept);
237   return r_syms_unbound;
238 }
rlang_data_pronoun_get(sexp * pronoun,sexp * sym)239 sexp* rlang_data_pronoun_get(sexp* pronoun, sexp* sym) {
240   if (r_typeof(pronoun) != r_type_environment) {
241     r_abort("Internal error: Data pronoun must wrap an environment");
242   }
243 
244   sexp* obj = mask_find(pronoun, sym);
245 
246   if (obj == r_syms_unbound) {
247     sexp* call = KEEP(r_parse("rlang:::abort_data_pronoun(x)"));
248     r_eval_with_x(call, r_base_env, sym);
249     r_abort("Internal error: .data subsetting should have failed earlier");
250   }
251 
252   r_mark_shared(obj);
253   return obj;
254 }
255 
warn_env_as_mask_once()256 static void warn_env_as_mask_once() {
257   const char* msg =
258     "Passing an environment as data mask is deprecated.\n"
259     "Please use `new_data_mask()` to transform your environment to a mask.\n"
260     "\n"
261     "  env <- env(foo = \"bar\")\n"
262     "\n"
263     "  # Bad:\n"
264     "  as_data_mask(env)\n"
265     "  eval_tidy(expr, env)\n"
266     "\n"
267     "  # Good:\n"
268     "  mask <- new_data_mask(env)\n"
269     "  eval_tidy(expr, mask)";
270   r_warn_deprecated(msg, msg);
271 }
272 
273 static sexp* data_pronoun_sym = NULL;
274 static r_ssize mask_length(r_ssize n);
275 
rlang_as_data_mask(sexp * data)276 sexp* rlang_as_data_mask(sexp* data) {
277   if (mask_info(data).type == RLANG_MASK_DATA) {
278     return data;
279   }
280   if (data == r_null) {
281     return rlang_new_data_mask(r_null, r_null);
282   }
283 
284   int n_protect = 0;
285 
286   sexp* bottom = NULL;
287 
288   switch (r_typeof(data)) {
289   case r_type_environment:
290     warn_env_as_mask_once();
291     bottom = KEEP_N(r_env_clone(data, NULL), n_protect);
292     break;
293 
294   case r_type_logical:
295   case r_type_integer:
296   case r_type_double:
297   case r_type_complex:
298   case r_type_character:
299   case r_type_raw:
300     data = r_vec_coerce(data, r_type_list);
301     KEEP_N(data, n_protect);
302     // fallthrough:
303 
304   case r_type_list: {
305     check_unique_names(data);
306 
307     sexp* names = r_names(data);
308 
309     r_ssize n_mask = mask_length(r_length(data));
310     bottom = KEEP_N(r_new_environment(r_empty_env, n_mask), n_protect);
311 
312     if (names != r_null) {
313       r_ssize n = r_length(data);
314 
315       sexp* const * p_names = r_chr_deref_const(names);
316       sexp* const * p_data = r_list_deref_const(data);
317 
318       for (r_ssize i = 0; i < n; ++i) {
319         // Ignore empty or missing names
320         sexp* nm = p_names[i];
321         if (r_str_is_name(nm)) {
322           r_env_poke(bottom, r_str_as_symbol(nm), p_data[i]);
323         }
324       }
325     }
326 
327     break;
328   }
329 
330   default:
331     r_abort("`data` must be a vector, list, data frame, or environment");
332   }
333 
334   sexp* data_mask = KEEP_N(rlang_new_data_mask(bottom, bottom), n_protect);
335 
336   sexp* data_pronoun = KEEP_N(rlang_as_data_pronoun(data_mask), n_protect);
337   r_env_poke(bottom, data_pronoun_sym, data_pronoun);
338 
339   FREE(n_protect);
340   return data_mask;
341 }
342 
343 static
mask_length(r_ssize n)344 r_ssize mask_length(r_ssize n) {
345   r_ssize n_grown = r_double_as_ssize(r_double_mult(r_ssize_as_double(n), 1.05));
346   return r_ssize_max(n_grown, r_ssize_add(n, 20));
347 }
348 
349 // For compatibility of the exported C callable
350 // TODO: warn
rlang_new_data_mask_compat(sexp * bottom,sexp * top,sexp * parent)351 sexp* rlang_new_data_mask_compat(sexp* bottom, sexp* top, sexp* parent) {
352   return rlang_new_data_mask(bottom, top);
353 }
rlang_as_data_mask_compat(sexp * data,sexp * parent)354 sexp* rlang_as_data_mask_compat(sexp* data, sexp* parent) {
355   return rlang_as_data_mask(data);
356 }
357 
358 
359 static sexp* tilde_prim = NULL;
360 
base_tilde_eval(sexp * tilde,sexp * quo_env)361 static sexp* base_tilde_eval(sexp* tilde, sexp* quo_env) {
362   if (r_f_has_env(tilde)) {
363     return tilde;
364   }
365 
366   // Inline the base primitive because overscopes override `~` to make
367   // quosures self-evaluate
368   tilde = KEEP(r_new_call(tilde_prim, r_node_cdr(tilde)));
369   tilde = KEEP(r_eval(tilde, quo_env));
370 
371   // Change it back because the result still has the primitive inlined
372   r_node_poke_car(tilde, r_syms_tilde);
373 
374   FREE(2);
375   return tilde;
376 }
377 
env_get_top_binding(sexp * mask)378 sexp* env_get_top_binding(sexp* mask) {
379   sexp* top = r_env_find(mask, data_mask_top_env_sym);
380 
381   if (top == r_syms_unbound) {
382     r_abort("Internal error: Can't find .top pronoun in data mask");
383   }
384   if (r_typeof(top) != r_type_environment) {
385     r_abort("Internal error: Unexpected .top pronoun type");
386   }
387 
388   return top;
389 }
390 
391 
392 static sexp* env_poke_parent_fn = NULL;
393 static sexp* env_poke_fn = NULL;
394 
rlang_tilde_eval(sexp * tilde,sexp * current_frame,sexp * caller_frame)395 sexp* rlang_tilde_eval(sexp* tilde, sexp* current_frame, sexp* caller_frame) {
396   // Remove srcrefs from system call
397   r_poke_attribute(tilde, r_syms_srcref, r_null);
398 
399   if (!rlang_is_quosure(tilde)) {
400     return base_tilde_eval(tilde, caller_frame);
401   }
402   if (quo_is_missing(tilde)) {
403     return(r_missing_arg());
404   }
405 
406   sexp* expr = rlang_quo_get_expr(tilde);
407   if (!r_is_symbolic(expr)) {
408     return expr;
409   }
410 
411   sexp* quo_env = rlang_quo_get_env(tilde);
412   if (r_typeof(quo_env) != r_type_environment) {
413     r_abort("Internal error: Quosure environment is corrupt");
414   }
415 
416   int n_protect = 0;
417   sexp* top;
418   struct rlang_mask_info info = mask_info(caller_frame);
419 
420   switch (info.type) {
421   case RLANG_MASK_DATA:
422     top = KEEP_N(env_get_top_binding(info.mask), n_protect);
423     // Update `.env` pronoun to current quosure env temporarily
424     poke_ctxt_env(info.mask, quo_env);
425     break;
426   case RLANG_MASK_QUOSURE:
427     top = info.mask;
428     break;
429   case RLANG_MASK_NONE:
430     r_abort("Internal error: Can't find the data mask");
431   }
432 
433   // Unless the quosure was created in the mask, swap lexical contexts
434   // temporarily by rechaining the top of the mask to the quosure
435   // environment
436   if (!r_env_inherits(info.mask, quo_env, top)) {
437     // Unwind-protect the restoration of original parents
438     on_exit_restore_lexical_env(info.mask, r_env_parent(top), current_frame);
439     r_env_poke_parent(top, quo_env);
440   }
441 
442   FREE(n_protect);
443   return r_eval(expr, info.mask);
444 }
445 
rlang_ext2_tilde_eval(sexp * call,sexp * op,sexp * args,sexp * rho)446 sexp* rlang_ext2_tilde_eval(sexp* call, sexp* op, sexp* args, sexp* rho) {
447   args = r_node_cdr(args);
448   sexp* tilde = r_node_car(args); args = r_node_cdr(args);
449   sexp* current_frame = r_node_car(args); args = r_node_cdr(args);
450   sexp* caller_frame = r_node_car(args);
451   return rlang_tilde_eval(tilde, current_frame, caller_frame);
452 }
453 
454 static const char* data_mask_objects_names[5] = {
455   ".__tidyeval_data_mask__.", "~", ".top_env", ".env", NULL
456 };
457 
458 // Soft-deprecated in rlang 0.2.0
rlang_data_mask_clean(sexp * mask)459 sexp* rlang_data_mask_clean(sexp* mask) {
460   sexp* bottom = r_env_parent(mask);
461   sexp* top = r_eval(data_mask_top_env_sym, mask);
462 
463   KEEP(top); // Help rchk
464 
465   if (top == r_null) {
466     top = bottom;
467   }
468 
469   // At this level we only want to remove our own stuff
470   r_env_unbind_strings(mask, data_mask_objects_names);
471 
472   // Remove everything in the other levels
473   sexp* env = bottom;
474   sexp* parent = r_env_parent(top);
475   while (env != parent) {
476     sexp* nms = KEEP(r_env_names(env));
477     r_env_unbind_names(env, nms);
478     FREE(1);
479     env = r_env_parent(env);
480   }
481 
482   FREE(1);
483   return mask;
484 }
485 
486 
new_quosure_mask(sexp * env)487 static sexp* new_quosure_mask(sexp* env) {
488   sexp* mask = KEEP(r_new_environment(env, 3));
489   r_env_poke(mask, r_syms_tilde, tilde_fn);
490   r_env_poke(mask, quo_mask_flag_sym, mask);
491   FREE(1);
492   return mask;
493 }
494 
rlang_eval_tidy(sexp * expr,sexp * data,sexp * env)495 sexp* rlang_eval_tidy(sexp* expr, sexp* data, sexp* env) {
496   int n_protect = 0;
497 
498   if (rlang_is_quosure(expr)) {
499     env = r_quo_get_env(expr);
500     expr = r_quo_get_expr(expr);
501   }
502 
503   // If there is no data, we only need to mask `~` with the definition
504   // for quosure thunks. Otherwise we create a heavier data mask with
505   // all the masking objects, data pronouns, etc.
506   if (data == r_null) {
507     sexp* mask = KEEP_N(new_quosure_mask(env), n_protect);
508     sexp* out = r_eval(expr, mask);
509     FREE(n_protect);
510     return out;
511   }
512 
513   sexp* mask = KEEP_N(rlang_as_data_mask(data), n_protect);
514   sexp* top = KEEP_N(env_get_top_binding(mask), n_protect);
515 
516   // Rechain the mask on the new lexical env but don't restore it on
517   // exit. This way leaked masks inherit from a somewhat sensible
518   // environment. We could do better with ALTENV and two-parent data
519   // masks:
520   //
521   // * We'd create a new two-parents evaluation env for each quosure.
522   //   The first parent would be the mask and the second the lexical
523   //   environment.
524   //
525   // * The data mask top would always inherit from the empty
526   //   environment.
527   //
528   // * Look-up in leaked environments would proceed from the data mask
529   //   to the appropriate lexical environment (from quosures or from
530   //   the `env` argument of eval_tidy()).
531   if (!r_env_inherits(mask, env, top)) {
532     poke_ctxt_env(mask, env);
533     r_env_poke_parent(top, env);
534   }
535 
536   sexp* out = r_eval(expr, mask);
537   FREE(n_protect);
538   return out;
539 }
540 
rlang_ext2_eval_tidy(sexp * call,sexp * op,sexp * args,sexp * rho)541 sexp* rlang_ext2_eval_tidy(sexp* call, sexp* op, sexp* args, sexp* rho) {
542   args = r_node_cdr(args);
543   sexp* expr = r_node_car(args); args = r_node_cdr(args);
544   sexp* data = r_node_car(args); args = r_node_cdr(args);
545   sexp* env = r_node_car(args);
546   return rlang_eval_tidy(expr, data, env);
547 }
548 
549 
rlang_init_eval_tidy()550 void rlang_init_eval_tidy() {
551   sexp* rlang_ns_env = KEEP(r_ns_env("rlang"));
552 
553   tilde_fn = r_parse_eval(
554     "function(...) {                          \n"
555     "  .External2(rlang_ext2_tilde_eval,      \n"
556     "    sys.call(),     # Quosure env        \n"
557     "    environment(),  # Unwind-protect env \n"
558     "    parent.frame()  # Lexical env        \n"
559     "  )                                      \n"
560     "}                                        \n",
561     rlang_ns_env
562   );
563   r_mark_precious(tilde_fn);
564 
565   data_pronoun_class = r_chr("rlang_data_pronoun");
566   r_mark_precious(data_pronoun_class);
567 
568   ctxt_pronoun_class = r_chr("rlang_ctxt_pronoun");
569   r_mark_precious(ctxt_pronoun_class);
570 
571   empty_names_chr = r_new_vector(r_type_character, 2);
572   r_mark_precious(empty_names_chr);
573   r_chr_poke(empty_names_chr, 0, r_string(""));
574   r_chr_poke(empty_names_chr, 1, r_missing_str);
575 
576   quo_mask_flag_sym = r_sym(".__tidyeval_quosure_mask__.");
577   data_mask_flag_sym = r_sym(".__tidyeval_data_mask__.");
578   data_mask_env_sym = r_sym(".env");
579   data_mask_top_env_sym = r_sym(".top_env");
580   data_pronoun_sym = r_sym(".data");
581 
582   tilde_prim = r_base_ns_get("~");
583   env_poke_parent_fn = rlang_ns_get("env_poke_parent");
584   env_poke_fn = rlang_ns_get("env_poke");
585 
586   env_sym = r_sym("env");
587   old_sym = r_sym("old");
588   mask_sym = r_sym("mask");
589 
590   restore_mask_fn = r_parse_eval(
591     "function() {                          \n"
592     "  ctxt_pronoun <- `mask`$.env         \n"
593     "  if (!is.null(ctxt_pronoun)) {       \n"
594     "    parent.env(ctxt_pronoun) <- `old` \n"
595     "  }                                   \n"
596     "                                      \n"
597     "  top <- `mask`$.top_env              \n"
598     "  if (is.null(top)) {                 \n"
599     "    top <- `mask`                     \n"
600     "  }                                   \n"
601     "                                      \n"
602     "  parent.env(top) <- `old`            \n"
603     "}                                     \n",
604     r_base_env
605   );
606   r_mark_precious(restore_mask_fn);
607 
608   FREE(1);
609 }
610