1 #include <rlang.h>
2 #include "internal.h"
3
4
5 static sexp* quo_mask_flag_sym = NULL;
6 static sexp* data_mask_flag_sym = NULL;
7
8 enum rlang_mask_type {
9 RLANG_MASK_DATA, // Full data mask
10 RLANG_MASK_QUOSURE, // Quosure mask with only `~` binding
11 RLANG_MASK_NONE
12 };
13
14 struct rlang_mask_info {
15 sexp* mask;
16 enum rlang_mask_type type;
17 };
18
mask_info(sexp * mask)19 static struct rlang_mask_info mask_info(sexp* mask) {
20 if (r_typeof(mask) != r_type_environment) {
21 return (struct rlang_mask_info) { r_null, RLANG_MASK_NONE };
22 }
23
24 sexp* flag;
25
26 flag = r_env_find_anywhere(mask, data_mask_flag_sym);
27 if (flag != r_syms_unbound) {
28 return (struct rlang_mask_info) { flag, RLANG_MASK_DATA };
29 }
30
31 flag = r_env_find_anywhere(mask, quo_mask_flag_sym);
32 if (flag != r_syms_unbound) {
33 return (struct rlang_mask_info) { flag, RLANG_MASK_QUOSURE };
34 }
35
36 return (struct rlang_mask_info) { r_null, RLANG_MASK_NONE };
37 }
38
39
40 static sexp* data_pronoun_class = NULL;
41 static sexp* ctxt_pronoun_class = NULL;
42 static sexp* data_mask_env_sym = NULL;
43
rlang_new_data_pronoun(sexp * mask)44 static sexp* rlang_new_data_pronoun(sexp* mask) {
45 sexp* pronoun = KEEP(r_new_vector(r_type_list, 1));
46
47 r_list_poke(pronoun, 0, mask);
48 r_poke_attribute(pronoun, r_syms_class, data_pronoun_class);
49
50 FREE(1);
51 return pronoun;
52 }
rlang_new_ctxt_pronoun(sexp * top)53 static sexp* rlang_new_ctxt_pronoun(sexp* top) {
54 sexp* pronoun = KEEP(r_new_environment(r_env_parent(top), 0));
55
56 r_poke_attribute(pronoun, r_syms_class, ctxt_pronoun_class);
57
58 FREE(1);
59 return pronoun;
60 }
61
poke_ctxt_env(sexp * mask,sexp * env)62 void poke_ctxt_env(sexp* mask, sexp* env) {
63 sexp* ctxt_pronoun = r_env_find(mask, data_mask_env_sym);
64
65 if (ctxt_pronoun == r_syms_unbound) {
66 r_abort("Internal error: Can't find context pronoun in data mask");
67 }
68
69 r_env_poke_parent(ctxt_pronoun, env);
70 }
71
72
73 static sexp* empty_names_chr;
74
check_unique_names(sexp * x)75 static void check_unique_names(sexp* x) {
76 // Allow empty lists
77 if (!r_length(x)) {
78 return ;
79 }
80
81 sexp* names = r_names(x);
82 if (names == r_null) {
83 r_abort("`data` must be uniquely named but does not have names");
84 }
85 if (r_vec_find_first_duplicate(names, empty_names_chr, NULL)) {
86 r_abort("`data` must be uniquely named but has duplicate columns");
87 }
88 }
rlang_as_data_pronoun(sexp * x)89 sexp* rlang_as_data_pronoun(sexp* x) {
90 int n_kept = 0;
91
92 switch (r_typeof(x)) {
93 case r_type_logical:
94 case r_type_integer:
95 case r_type_double:
96 case r_type_complex:
97 case r_type_character:
98 case r_type_raw:
99 x = KEEP_N(r_vec_coerce(x, r_type_list), n_kept);
100 // fallthrough
101 case r_type_list:
102 check_unique_names(x);
103 x = KEEP_N(r_list_as_environment(x, r_empty_env), n_kept);
104 break;
105 case r_type_environment:
106 break;
107 default:
108 r_abort("`data` must be an uniquely named vector, list, data frame or environment");
109 }
110
111 sexp* pronoun = rlang_new_data_pronoun(x);
112
113 FREE(n_kept);
114 return pronoun;
115 }
116
117
118 static sexp* data_mask_top_env_sym = NULL;
119
check_data_mask_input(sexp * env,const char * arg)120 static void check_data_mask_input(sexp* env, const char* arg) {
121 if (r_typeof(env) != r_type_environment) {
122 r_abort("Can't create data mask because `%s` must be an environment", arg);
123 }
124 }
check_data_mask_top(sexp * bottom,sexp * top)125 static void check_data_mask_top(sexp* bottom, sexp* top) {
126 sexp* cur = bottom;
127
128 while (cur != r_empty_env) {
129 if (cur == top) {
130 return ;
131 }
132 cur = r_env_parent(cur);
133 }
134
135 r_abort("Can't create data mask because `top` is not a parent of `bottom`");
136 }
137
138 static sexp* env_sym = NULL;
139 static sexp* old_sym = NULL;
140 static sexp* mask_sym = NULL;
141
142 static sexp* tilde_fn = NULL;
143 static sexp* restore_mask_fn = NULL;
144
on_exit_restore_lexical_env(sexp * mask,sexp * old,sexp * frame)145 static void on_exit_restore_lexical_env(sexp* mask, sexp* old, sexp* frame) {
146 sexp* fn = KEEP(r_clone(restore_mask_fn));
147
148 sexp* env = KEEP(r_new_environment(r_base_env, 2));
149 r_env_poke(env, mask_sym, mask);
150 r_env_poke(env, old_sym, old);
151 r_fn_poke_env(fn, env);
152
153 sexp* call = KEEP(r_new_call(fn, r_null));
154 r_on_exit(call, frame);
155
156 FREE(3);
157 }
158
rlang_new_data_mask(sexp * bottom,sexp * top)159 sexp* rlang_new_data_mask(sexp* bottom, sexp* top) {
160 sexp* data_mask;
161
162 if (bottom == r_null) {
163 bottom = KEEP(r_new_environment(r_empty_env, 100));
164 data_mask = bottom;
165 } else {
166 check_data_mask_input(bottom, "bottom");
167 // Create a child because we don't know what might be in `bottom`
168 // and we need to clear its contents without deleting any object
169 // created in the data mask environment
170 data_mask = KEEP(r_new_environment(bottom, 100));
171 }
172
173 if (top == r_null) {
174 top = bottom;
175 } else {
176 check_data_mask_input(top, "top");
177 }
178 if (top != bottom) {
179 check_data_mask_top(bottom, top);
180 }
181
182 sexp* ctxt_pronoun = KEEP(rlang_new_ctxt_pronoun(top));
183
184 r_env_poke(data_mask, r_syms_tilde, tilde_fn);
185 r_env_poke(data_mask, data_mask_flag_sym, data_mask);
186 r_env_poke(data_mask, data_mask_env_sym, ctxt_pronoun);
187 r_env_poke(data_mask, data_mask_top_env_sym, top);
188
189 FREE(2);
190 return data_mask;
191 }
192
193
rlang_is_data_mask(sexp * env)194 sexp* rlang_is_data_mask(sexp* env) {
195 return r_lgl(mask_info(env).type == RLANG_MASK_DATA);
196 }
197
mask_find(sexp * env,sexp * sym)198 static sexp* mask_find(sexp* env, sexp* sym) {
199 if (r_typeof(sym) != r_type_symbol) {
200 r_abort("Internal error: Data pronoun must be subset with a symbol");
201 }
202
203 sexp* top_env = r_env_find(env, data_mask_top_env_sym);
204 if (r_typeof(top_env) == r_type_environment) {
205 // Start lookup in the parent if the pronoun wraps a data mask
206 env = r_env_parent(env);
207 } else {
208 // Data pronouns created from lists or data frames are converted
209 // to a simple environment whose ancestry shouldn't be looked up.
210 top_env = env;
211 }
212 int n_kept = 0;
213 KEEP_N(top_env, n_kept);
214
215 sexp* cur = env;
216 do {
217 sexp* obj = r_env_find(cur, sym);
218 if (TYPEOF(obj) == PROMSXP) {
219 KEEP(obj);
220 obj = r_eval(obj, r_empty_env);
221 FREE(1);
222 }
223
224 if (obj != r_syms_unbound) {
225 FREE(n_kept);
226 return obj;
227 }
228
229 if (cur == top_env) {
230 break;
231 } else {
232 cur = r_env_parent(cur);
233 }
234 } while (cur != r_empty_env);
235
236 FREE(n_kept);
237 return r_syms_unbound;
238 }
rlang_data_pronoun_get(sexp * pronoun,sexp * sym)239 sexp* rlang_data_pronoun_get(sexp* pronoun, sexp* sym) {
240 if (r_typeof(pronoun) != r_type_environment) {
241 r_abort("Internal error: Data pronoun must wrap an environment");
242 }
243
244 sexp* obj = mask_find(pronoun, sym);
245
246 if (obj == r_syms_unbound) {
247 sexp* call = KEEP(r_parse("rlang:::abort_data_pronoun(x)"));
248 r_eval_with_x(call, r_base_env, sym);
249 r_abort("Internal error: .data subsetting should have failed earlier");
250 }
251
252 r_mark_shared(obj);
253 return obj;
254 }
255
warn_env_as_mask_once()256 static void warn_env_as_mask_once() {
257 const char* msg =
258 "Passing an environment as data mask is deprecated.\n"
259 "Please use `new_data_mask()` to transform your environment to a mask.\n"
260 "\n"
261 " env <- env(foo = \"bar\")\n"
262 "\n"
263 " # Bad:\n"
264 " as_data_mask(env)\n"
265 " eval_tidy(expr, env)\n"
266 "\n"
267 " # Good:\n"
268 " mask <- new_data_mask(env)\n"
269 " eval_tidy(expr, mask)";
270 r_warn_deprecated(msg, msg);
271 }
272
273 static sexp* data_pronoun_sym = NULL;
274 static r_ssize mask_length(r_ssize n);
275
rlang_as_data_mask(sexp * data)276 sexp* rlang_as_data_mask(sexp* data) {
277 if (mask_info(data).type == RLANG_MASK_DATA) {
278 return data;
279 }
280 if (data == r_null) {
281 return rlang_new_data_mask(r_null, r_null);
282 }
283
284 int n_protect = 0;
285
286 sexp* bottom = NULL;
287
288 switch (r_typeof(data)) {
289 case r_type_environment:
290 warn_env_as_mask_once();
291 bottom = KEEP_N(r_env_clone(data, NULL), n_protect);
292 break;
293
294 case r_type_logical:
295 case r_type_integer:
296 case r_type_double:
297 case r_type_complex:
298 case r_type_character:
299 case r_type_raw:
300 data = r_vec_coerce(data, r_type_list);
301 KEEP_N(data, n_protect);
302 // fallthrough:
303
304 case r_type_list: {
305 check_unique_names(data);
306
307 sexp* names = r_names(data);
308
309 r_ssize n_mask = mask_length(r_length(data));
310 bottom = KEEP_N(r_new_environment(r_empty_env, n_mask), n_protect);
311
312 if (names != r_null) {
313 r_ssize n = r_length(data);
314
315 sexp* const * p_names = r_chr_deref_const(names);
316 sexp* const * p_data = r_list_deref_const(data);
317
318 for (r_ssize i = 0; i < n; ++i) {
319 // Ignore empty or missing names
320 sexp* nm = p_names[i];
321 if (r_str_is_name(nm)) {
322 r_env_poke(bottom, r_str_as_symbol(nm), p_data[i]);
323 }
324 }
325 }
326
327 break;
328 }
329
330 default:
331 r_abort("`data` must be a vector, list, data frame, or environment");
332 }
333
334 sexp* data_mask = KEEP_N(rlang_new_data_mask(bottom, bottom), n_protect);
335
336 sexp* data_pronoun = KEEP_N(rlang_as_data_pronoun(data_mask), n_protect);
337 r_env_poke(bottom, data_pronoun_sym, data_pronoun);
338
339 FREE(n_protect);
340 return data_mask;
341 }
342
343 static
mask_length(r_ssize n)344 r_ssize mask_length(r_ssize n) {
345 r_ssize n_grown = r_double_as_ssize(r_double_mult(r_ssize_as_double(n), 1.05));
346 return r_ssize_max(n_grown, r_ssize_add(n, 20));
347 }
348
349 // For compatibility of the exported C callable
350 // TODO: warn
rlang_new_data_mask_compat(sexp * bottom,sexp * top,sexp * parent)351 sexp* rlang_new_data_mask_compat(sexp* bottom, sexp* top, sexp* parent) {
352 return rlang_new_data_mask(bottom, top);
353 }
rlang_as_data_mask_compat(sexp * data,sexp * parent)354 sexp* rlang_as_data_mask_compat(sexp* data, sexp* parent) {
355 return rlang_as_data_mask(data);
356 }
357
358
359 static sexp* tilde_prim = NULL;
360
base_tilde_eval(sexp * tilde,sexp * quo_env)361 static sexp* base_tilde_eval(sexp* tilde, sexp* quo_env) {
362 if (r_f_has_env(tilde)) {
363 return tilde;
364 }
365
366 // Inline the base primitive because overscopes override `~` to make
367 // quosures self-evaluate
368 tilde = KEEP(r_new_call(tilde_prim, r_node_cdr(tilde)));
369 tilde = KEEP(r_eval(tilde, quo_env));
370
371 // Change it back because the result still has the primitive inlined
372 r_node_poke_car(tilde, r_syms_tilde);
373
374 FREE(2);
375 return tilde;
376 }
377
env_get_top_binding(sexp * mask)378 sexp* env_get_top_binding(sexp* mask) {
379 sexp* top = r_env_find(mask, data_mask_top_env_sym);
380
381 if (top == r_syms_unbound) {
382 r_abort("Internal error: Can't find .top pronoun in data mask");
383 }
384 if (r_typeof(top) != r_type_environment) {
385 r_abort("Internal error: Unexpected .top pronoun type");
386 }
387
388 return top;
389 }
390
391
392 static sexp* env_poke_parent_fn = NULL;
393 static sexp* env_poke_fn = NULL;
394
rlang_tilde_eval(sexp * tilde,sexp * current_frame,sexp * caller_frame)395 sexp* rlang_tilde_eval(sexp* tilde, sexp* current_frame, sexp* caller_frame) {
396 // Remove srcrefs from system call
397 r_poke_attribute(tilde, r_syms_srcref, r_null);
398
399 if (!rlang_is_quosure(tilde)) {
400 return base_tilde_eval(tilde, caller_frame);
401 }
402 if (quo_is_missing(tilde)) {
403 return(r_missing_arg());
404 }
405
406 sexp* expr = rlang_quo_get_expr(tilde);
407 if (!r_is_symbolic(expr)) {
408 return expr;
409 }
410
411 sexp* quo_env = rlang_quo_get_env(tilde);
412 if (r_typeof(quo_env) != r_type_environment) {
413 r_abort("Internal error: Quosure environment is corrupt");
414 }
415
416 int n_protect = 0;
417 sexp* top;
418 struct rlang_mask_info info = mask_info(caller_frame);
419
420 switch (info.type) {
421 case RLANG_MASK_DATA:
422 top = KEEP_N(env_get_top_binding(info.mask), n_protect);
423 // Update `.env` pronoun to current quosure env temporarily
424 poke_ctxt_env(info.mask, quo_env);
425 break;
426 case RLANG_MASK_QUOSURE:
427 top = info.mask;
428 break;
429 case RLANG_MASK_NONE:
430 r_abort("Internal error: Can't find the data mask");
431 }
432
433 // Unless the quosure was created in the mask, swap lexical contexts
434 // temporarily by rechaining the top of the mask to the quosure
435 // environment
436 if (!r_env_inherits(info.mask, quo_env, top)) {
437 // Unwind-protect the restoration of original parents
438 on_exit_restore_lexical_env(info.mask, r_env_parent(top), current_frame);
439 r_env_poke_parent(top, quo_env);
440 }
441
442 FREE(n_protect);
443 return r_eval(expr, info.mask);
444 }
445
rlang_ext2_tilde_eval(sexp * call,sexp * op,sexp * args,sexp * rho)446 sexp* rlang_ext2_tilde_eval(sexp* call, sexp* op, sexp* args, sexp* rho) {
447 args = r_node_cdr(args);
448 sexp* tilde = r_node_car(args); args = r_node_cdr(args);
449 sexp* current_frame = r_node_car(args); args = r_node_cdr(args);
450 sexp* caller_frame = r_node_car(args);
451 return rlang_tilde_eval(tilde, current_frame, caller_frame);
452 }
453
454 static const char* data_mask_objects_names[5] = {
455 ".__tidyeval_data_mask__.", "~", ".top_env", ".env", NULL
456 };
457
458 // Soft-deprecated in rlang 0.2.0
rlang_data_mask_clean(sexp * mask)459 sexp* rlang_data_mask_clean(sexp* mask) {
460 sexp* bottom = r_env_parent(mask);
461 sexp* top = r_eval(data_mask_top_env_sym, mask);
462
463 KEEP(top); // Help rchk
464
465 if (top == r_null) {
466 top = bottom;
467 }
468
469 // At this level we only want to remove our own stuff
470 r_env_unbind_strings(mask, data_mask_objects_names);
471
472 // Remove everything in the other levels
473 sexp* env = bottom;
474 sexp* parent = r_env_parent(top);
475 while (env != parent) {
476 sexp* nms = KEEP(r_env_names(env));
477 r_env_unbind_names(env, nms);
478 FREE(1);
479 env = r_env_parent(env);
480 }
481
482 FREE(1);
483 return mask;
484 }
485
486
new_quosure_mask(sexp * env)487 static sexp* new_quosure_mask(sexp* env) {
488 sexp* mask = KEEP(r_new_environment(env, 3));
489 r_env_poke(mask, r_syms_tilde, tilde_fn);
490 r_env_poke(mask, quo_mask_flag_sym, mask);
491 FREE(1);
492 return mask;
493 }
494
rlang_eval_tidy(sexp * expr,sexp * data,sexp * env)495 sexp* rlang_eval_tidy(sexp* expr, sexp* data, sexp* env) {
496 int n_protect = 0;
497
498 if (rlang_is_quosure(expr)) {
499 env = r_quo_get_env(expr);
500 expr = r_quo_get_expr(expr);
501 }
502
503 // If there is no data, we only need to mask `~` with the definition
504 // for quosure thunks. Otherwise we create a heavier data mask with
505 // all the masking objects, data pronouns, etc.
506 if (data == r_null) {
507 sexp* mask = KEEP_N(new_quosure_mask(env), n_protect);
508 sexp* out = r_eval(expr, mask);
509 FREE(n_protect);
510 return out;
511 }
512
513 sexp* mask = KEEP_N(rlang_as_data_mask(data), n_protect);
514 sexp* top = KEEP_N(env_get_top_binding(mask), n_protect);
515
516 // Rechain the mask on the new lexical env but don't restore it on
517 // exit. This way leaked masks inherit from a somewhat sensible
518 // environment. We could do better with ALTENV and two-parent data
519 // masks:
520 //
521 // * We'd create a new two-parents evaluation env for each quosure.
522 // The first parent would be the mask and the second the lexical
523 // environment.
524 //
525 // * The data mask top would always inherit from the empty
526 // environment.
527 //
528 // * Look-up in leaked environments would proceed from the data mask
529 // to the appropriate lexical environment (from quosures or from
530 // the `env` argument of eval_tidy()).
531 if (!r_env_inherits(mask, env, top)) {
532 poke_ctxt_env(mask, env);
533 r_env_poke_parent(top, env);
534 }
535
536 sexp* out = r_eval(expr, mask);
537 FREE(n_protect);
538 return out;
539 }
540
rlang_ext2_eval_tidy(sexp * call,sexp * op,sexp * args,sexp * rho)541 sexp* rlang_ext2_eval_tidy(sexp* call, sexp* op, sexp* args, sexp* rho) {
542 args = r_node_cdr(args);
543 sexp* expr = r_node_car(args); args = r_node_cdr(args);
544 sexp* data = r_node_car(args); args = r_node_cdr(args);
545 sexp* env = r_node_car(args);
546 return rlang_eval_tidy(expr, data, env);
547 }
548
549
rlang_init_eval_tidy()550 void rlang_init_eval_tidy() {
551 sexp* rlang_ns_env = KEEP(r_ns_env("rlang"));
552
553 tilde_fn = r_parse_eval(
554 "function(...) { \n"
555 " .External2(rlang_ext2_tilde_eval, \n"
556 " sys.call(), # Quosure env \n"
557 " environment(), # Unwind-protect env \n"
558 " parent.frame() # Lexical env \n"
559 " ) \n"
560 "} \n",
561 rlang_ns_env
562 );
563 r_mark_precious(tilde_fn);
564
565 data_pronoun_class = r_chr("rlang_data_pronoun");
566 r_mark_precious(data_pronoun_class);
567
568 ctxt_pronoun_class = r_chr("rlang_ctxt_pronoun");
569 r_mark_precious(ctxt_pronoun_class);
570
571 empty_names_chr = r_new_vector(r_type_character, 2);
572 r_mark_precious(empty_names_chr);
573 r_chr_poke(empty_names_chr, 0, r_string(""));
574 r_chr_poke(empty_names_chr, 1, r_missing_str);
575
576 quo_mask_flag_sym = r_sym(".__tidyeval_quosure_mask__.");
577 data_mask_flag_sym = r_sym(".__tidyeval_data_mask__.");
578 data_mask_env_sym = r_sym(".env");
579 data_mask_top_env_sym = r_sym(".top_env");
580 data_pronoun_sym = r_sym(".data");
581
582 tilde_prim = r_base_ns_get("~");
583 env_poke_parent_fn = rlang_ns_get("env_poke_parent");
584 env_poke_fn = rlang_ns_get("env_poke");
585
586 env_sym = r_sym("env");
587 old_sym = r_sym("old");
588 mask_sym = r_sym("mask");
589
590 restore_mask_fn = r_parse_eval(
591 "function() { \n"
592 " ctxt_pronoun <- `mask`$.env \n"
593 " if (!is.null(ctxt_pronoun)) { \n"
594 " parent.env(ctxt_pronoun) <- `old` \n"
595 " } \n"
596 " \n"
597 " top <- `mask`$.top_env \n"
598 " if (is.null(top)) { \n"
599 " top <- `mask` \n"
600 " } \n"
601 " \n"
602 " parent.env(top) <- `old` \n"
603 "} \n",
604 r_base_env
605 );
606 r_mark_precious(restore_mask_fn);
607
608 FREE(1);
609 }
610