1 #include "vctrs.h"
2 #include "dim.h"
3 #include "names.h"
4 #include "owned.h"
5 #include "slice-assign.h"
6 #include "subscript-loc.h"
7 #include "utils.h"
8 
9 // Initialised at load time
10 SEXP syms_vec_assign_fallback = NULL;
11 SEXP fns_vec_assign_fallback = NULL;
12 
13 const struct vec_assign_opts vec_assign_default_opts = {
14   .assign_names = false
15 };
16 
17 static SEXP vec_assign_fallback(SEXP x, SEXP index, SEXP value);
18 static SEXP vec_proxy_assign_names(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned);
19 static SEXP lgl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
20 static SEXP int_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
21 static SEXP dbl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
22 static SEXP cpl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
23 SEXP chr_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
24 static SEXP raw_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
25 SEXP list_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
26 
27 
28 // [[ include("slice-assign.h") ]]
vec_assign_opts(SEXP x,SEXP index,SEXP value,const struct vec_assign_opts * opts)29 SEXP vec_assign_opts(SEXP x, SEXP index, SEXP value,
30                      const struct vec_assign_opts* opts) {
31   if (x == R_NilValue) {
32     return R_NilValue;
33   }
34 
35   vec_assert(x, opts->x_arg);
36   vec_assert(value, opts->value_arg);
37 
38   index = PROTECT(vec_as_location_opts(index,
39                                        vec_size(x),
40                                        PROTECT(vec_names(x)),
41                                        location_default_assign_opts));
42 
43   // Cast and recycle `value`
44   value = PROTECT(vec_cast(value, x, opts->value_arg, opts->x_arg));
45   value = PROTECT(vec_recycle(value, vec_size(index), opts->value_arg));
46 
47   SEXP proxy = PROTECT(vec_proxy(x));
48   const enum vctrs_owned owned = vec_owned(proxy);
49   proxy = PROTECT(vec_proxy_assign_opts(proxy, index, value, owned, opts));
50 
51   SEXP out = vec_restore(proxy, x, R_NilValue, owned);
52 
53   UNPROTECT(6);
54   return out;
55 }
56 
57 // [[ register() ]]
vctrs_assign(SEXP x,SEXP index,SEXP value,SEXP x_arg_,SEXP value_arg_)58 SEXP vctrs_assign(SEXP x, SEXP index, SEXP value, SEXP x_arg_, SEXP value_arg_) {
59   struct vctrs_arg x_arg = vec_as_arg(x_arg_);
60   struct vctrs_arg value_arg = vec_as_arg(value_arg_);
61 
62   const struct vec_assign_opts opts = {
63     .assign_names = false,
64     .x_arg = &x_arg,
65     .value_arg = &value_arg
66   };
67 
68   return vec_assign_opts(x, index, value, &opts);
69 }
70 
71 // [[ register() ]]
vctrs_assign_params(SEXP x,SEXP index,SEXP value,SEXP assign_names)72 SEXP vctrs_assign_params(SEXP x, SEXP index, SEXP value,
73                          SEXP assign_names) {
74   const struct vec_assign_opts opts =  {
75     .assign_names = r_bool_as_int(assign_names)
76   };
77   return vec_assign_opts(x, index, value, &opts);
78 }
79 
vec_assign_switch(SEXP proxy,SEXP index,SEXP value,const enum vctrs_owned owned,const struct vec_assign_opts * opts)80 static SEXP vec_assign_switch(SEXP proxy, SEXP index, SEXP value,
81                               const enum vctrs_owned owned,
82                               const struct vec_assign_opts* opts) {
83   switch (vec_proxy_typeof(proxy)) {
84   case vctrs_type_logical:   return lgl_assign(proxy, index, value, owned);
85   case vctrs_type_integer:   return int_assign(proxy, index, value, owned);
86   case vctrs_type_double:    return dbl_assign(proxy, index, value, owned);
87   case vctrs_type_complex:   return cpl_assign(proxy, index, value, owned);
88   case vctrs_type_character: return chr_assign(proxy, index, value, owned);
89   case vctrs_type_raw:       return raw_assign(proxy, index, value, owned);
90   case vctrs_type_list:      return list_assign(proxy, index, value, owned);
91   case vctrs_type_dataframe: return df_assign(proxy, index, value, owned, opts);
92   case vctrs_type_scalar:    stop_scalar_type(proxy, args_empty);
93   default:                   stop_unimplemented_vctrs_type("vec_assign_switch", vec_typeof(proxy));
94   }
95   never_reached("vec_assign_switch");
96 }
97 
98 // `vec_proxy_assign_opts()` conditionally duplicates the `proxy` depending
99 // on a number of factors.
100 //
101 // - If a fallback is required, the `proxy` is duplicated at the R level.
102 // - If `owned` is `VCTRS_OWNED_true`, the `proxy` is typically not duplicated.
103 //   However, if it is an ALTREP object, it is duplicated because we need to be
104 //   able to assign into the object it represents, not the ALTREP SEXP itself.
105 // - If `owned` is `VCTRS_OWNED_false`, the `proxy` is only
106 //   duplicated if it is referenced, i.e. `MAYBE_REFERENCED()` returns `true`.
107 //
108 // In `vec_proxy_assign()`, which is part of the experimental public API,
109 // ownership is determined with a call to `NO_REFERENCES()`. If there are no
110 // references, then `VCTRS_OWNED_true` is used, else
111 // `VCTRS_OWNED_false` is used.
112 //
113 // Ownership of the `proxy` must be recursive. For data frames, the `owned`
114 // argument is passed along to each column.
115 //
116 // Practically, we only set `VCTRS_OWNED_true` when we create a fresh data
117 // structure at the C level and then assign into it to fill it. This happens
118 // in `vec_c()` and `vec_rbind()`. For data frames, this `owned` parameter
119 // is particularly important for R 4.0.0 where references are tracked more
120 // precisely. In R 4.0.0, a freshly created data frame's columns all have a
121 // refcount of 1 because of the `SET_VECTOR_ELT()` call that set them in the
122 // data frame. This makes them referenced, but not shared. If
123 // `VCTRS_OWNED_false` was set and `df_assign()` was used in a loop
124 // (as it is in `vec_rbind()`), then a copy of each column would be made at
125 // each iteration of the loop (any time a new set of rows is assigned
126 // into the output object).
127 //
128 // Even though it can directly assign, the safe
129 // way to call `vec_proxy_assign()` and `vec_proxy_assign_opts()` is to catch
130 // and protect their output rather than relying on them to assign directly.
131 
132 /*
133  * @param proxy The proxy of the output container
134  * @param index The locations to assign `value` to
135  * @param value The value to assign into the proxy. Must already be
136  *   cast to the type of the true output container, and have been
137  *   recycled to the correct size. Should not be proxied, in case
138  *   we have to fallback.
139  */
vec_proxy_assign(SEXP proxy,SEXP index,SEXP value)140 SEXP vec_proxy_assign(SEXP proxy, SEXP index, SEXP value) {
141   return vec_proxy_assign_opts(proxy, index, value,
142                                vec_owned(proxy),
143                                &vec_assign_default_opts);
144 }
vec_proxy_assign_opts(SEXP proxy,SEXP index,SEXP value,const enum vctrs_owned owned,const struct vec_assign_opts * opts)145 SEXP vec_proxy_assign_opts(SEXP proxy, SEXP index, SEXP value,
146                            const enum vctrs_owned owned,
147                            const struct vec_assign_opts* opts) {
148   int n_protect = 0;
149 
150   struct vec_assign_opts mut_opts = *opts;
151   bool ignore_outer_names = mut_opts.ignore_outer_names;
152   mut_opts.ignore_outer_names = false;
153 
154   struct vctrs_proxy_info value_info = vec_proxy_info(value);
155   PROTECT_PROXY_INFO(&value_info, &n_protect);
156 
157   if (TYPEOF(proxy) != TYPEOF(value_info.proxy)) {
158     stop_internal("vec_proxy_assign_opts",
159                   "`proxy` of type `%s` incompatible with `value` proxy of type `%s`.",
160                   Rf_type2char(TYPEOF(proxy)),
161                   Rf_type2char(TYPEOF(value_info.proxy)));
162   }
163 
164   // If a fallback is required, the `proxy` is identical to the output container
165   // because no proxy method was called
166   SEXP out = R_NilValue;
167 
168   if (vec_requires_fallback(value, value_info)) {
169     index = PROTECT(compact_materialize(index));
170     out = PROTECT(vec_assign_fallback(proxy, index, value));
171     ++n_protect;
172   } else if (has_dim(proxy)) {
173     out = PROTECT(vec_assign_shaped(proxy, index, value_info.proxy, owned, &mut_opts));
174   } else {
175     out = PROTECT(vec_assign_switch(proxy, index, value_info.proxy, owned, &mut_opts));
176   }
177   ++n_protect;
178 
179   if (!ignore_outer_names && opts->assign_names) {
180     out = vec_proxy_assign_names(out, index, value_info.proxy, owned);
181   }
182 
183   UNPROTECT(n_protect);
184   return out;
185 }
186 
187 #define ASSIGN_INDEX(CTYPE, DEREF, CONST_DEREF)                         \
188   R_len_t n = Rf_length(index);                                         \
189   int* index_data = INTEGER(index);                                     \
190                                                                         \
191   if (n != Rf_length(value)) {                                          \
192     stop_internal("vec_assign",                                         \
193                   "`value` should have been recycled to fit `x`.");     \
194   }                                                                     \
195                                                                         \
196   const CTYPE* value_data = CONST_DEREF(value);                         \
197                                                                         \
198   SEXP out = PROTECT(vec_clone_referenced(x, owned));                   \
199   CTYPE* out_data = DEREF(out);                                         \
200                                                                         \
201   for (R_len_t i = 0; i < n; ++i) {                                     \
202     int j = index_data[i];                                              \
203     if (j != NA_INTEGER) {                                              \
204       out_data[j - 1] = value_data[i];                                  \
205     }                                                                   \
206   }                                                                     \
207                                                                         \
208   UNPROTECT(1);                                                         \
209   return out
210 
211 #define ASSIGN_COMPACT(CTYPE, DEREF, CONST_DEREF)                       \
212   int* index_data = INTEGER(index);                                     \
213   R_len_t start = index_data[0];                                        \
214   R_len_t n = index_data[1];                                            \
215   R_len_t step = index_data[2];                                         \
216                                                                         \
217   if (n != Rf_length(value)) {                                          \
218     stop_internal("vec_assign",                                         \
219                   "`value` should have been recycled to fit `x`.");     \
220   }                                                                     \
221                                                                         \
222   const CTYPE* value_data = CONST_DEREF(value);                         \
223                                                                         \
224   SEXP out = PROTECT(vec_clone_referenced(x, owned));                   \
225   CTYPE* out_data = DEREF(out) + start;                                 \
226                                                                         \
227   for (int i = 0; i < n; ++i, out_data += step, ++value_data) {         \
228     *out_data = *value_data;                                            \
229   }                                                                     \
230                                                                         \
231   UNPROTECT(1);                                                         \
232   return out
233 
234 #define ASSIGN(CTYPE, DEREF, CONST_DEREF)       \
235   if (is_compact_seq(index)) {                  \
236     ASSIGN_COMPACT(CTYPE, DEREF, CONST_DEREF);  \
237   } else {                                      \
238     ASSIGN_INDEX(CTYPE, DEREF, CONST_DEREF);    \
239   }
240 
lgl_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)241 static SEXP lgl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
242   ASSIGN(int, LOGICAL, LOGICAL_RO);
243 }
int_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)244 static SEXP int_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
245   ASSIGN(int, INTEGER, INTEGER_RO);
246 }
dbl_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)247 static SEXP dbl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
248   ASSIGN(double, REAL, REAL_RO);
249 }
cpl_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)250 static SEXP cpl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
251   ASSIGN(Rcomplex, COMPLEX, COMPLEX_RO);
252 }
chr_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)253 SEXP chr_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
254   ASSIGN(SEXP, STRING_PTR, STRING_PTR_RO);
255 }
raw_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)256 static SEXP raw_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
257   ASSIGN(Rbyte, RAW, RAW_RO);
258 }
259 
260 #undef ASSIGN
261 #undef ASSIGN_INDEX
262 #undef ASSIGN_COMPACT
263 
264 
265 #define ASSIGN_BARRIER_INDEX(GET, SET)                                  \
266   R_len_t n = Rf_length(index);                                         \
267   int* index_data = INTEGER(index);                                     \
268                                                                         \
269   if (n != Rf_length(value)) {                                          \
270     stop_internal("vec_assign",                                         \
271                   "`value` should have been recycled to fit `x`.");     \
272   }                                                                     \
273                                                                         \
274   SEXP out = PROTECT(vec_clone_referenced(x, owned));                   \
275                                                                         \
276   for (R_len_t i = 0; i < n; ++i) {                                     \
277     int j = index_data[i];                                              \
278     if (j != NA_INTEGER) {                                              \
279       SET(out, j - 1, GET(value, i));                                   \
280     }                                                                   \
281   }                                                                     \
282                                                                         \
283   UNPROTECT(1);                                                         \
284   return out
285 
286 #define ASSIGN_BARRIER_COMPACT(GET, SET)                                \
287   int* index_data = INTEGER(index);                                     \
288   R_len_t start = index_data[0];                                        \
289   R_len_t n = index_data[1];                                            \
290   R_len_t step = index_data[2];                                         \
291                                                                         \
292   if (n != Rf_length(value)) {                                          \
293     stop_internal("vec_assign",                                         \
294                   "`value` should have been recycled to fit `x`.");     \
295   }                                                                     \
296                                                                         \
297   SEXP out = PROTECT(vec_clone_referenced(x, owned));                   \
298                                                                         \
299   for (R_len_t i = 0; i < n; ++i, start += step) {                      \
300     SET(out, start, GET(value, i));                                     \
301   }                                                                     \
302                                                                         \
303   UNPROTECT(1);                                                         \
304   return out
305 
306 #define ASSIGN_BARRIER(GET, SET)                \
307   if (is_compact_seq(index)) {                  \
308     ASSIGN_BARRIER_COMPACT(GET, SET);           \
309   } else {                                      \
310     ASSIGN_BARRIER_INDEX(GET, SET);             \
311   }
312 
list_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)313 SEXP list_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
314   ASSIGN_BARRIER(VECTOR_ELT, SET_VECTOR_ELT);
315 }
316 
317 #undef ASSIGN_BARRIER
318 #undef ASSIGN_BARRIER_INDEX
319 #undef ASSIGN_BARRIER_COMPACT
320 
321 
322 /**
323  * - `out` and `value` must be rectangular lists.
324  * - `value` must have the same size as `index`.
325  *
326  * Performance and safety notes:
327  * If `x` is a fresh data frame (which would be the case in `vec_c()` and
328  * `vec_rbind()`) then `r_clone_referenced()` will return it untouched. Each
329  * column will also be fresh, so if `vec_proxy()` just returns its input then
330  * `vec_proxy_assign_opts()` will directly assign to that column in `x`. This
331  * makes it extremely fast to assign to a data frame.
332  *
333  * If `x` is referenced already, then `r_clone_referenced()` will call
334  * `Rf_shallow_duplicate()`. For lists, this loops over the list and marks
335  * each list element with max namedness. This is helpful for us, because
336  * it is possible to have a data frame that is itself referenced, with columns
337  * that are not (mtcars is an example). If each list element wasn't marked, then
338  * `vec_proxy_assign_opts()` would see an unreferenced column and modify it
339  * directly, resulting in improper mutable semantics. See #986 for full details.
340  *
341  * [[ include("vctrs.h") ]]
342  */
df_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned,const struct vec_assign_opts * opts)343 SEXP df_assign(SEXP x, SEXP index, SEXP value,
344                const enum vctrs_owned owned,
345                const struct vec_assign_opts* opts) {
346   SEXP out = PROTECT(vec_clone_referenced(x, owned));
347 
348   R_len_t n = Rf_length(out);
349 
350   if (Rf_length(value) != n) {
351     stop_internal("df_assign",
352                   "Can't assign %d columns to df of length %d.",
353                   Rf_length(value),
354                   n);
355   }
356 
357   for (R_len_t i = 0; i < n; ++i) {
358     SEXP out_elt = VECTOR_ELT(out, i);
359     SEXP value_elt = VECTOR_ELT(value, i);
360 
361     // No need to cast or recycle because those operations are
362     // recursive and have already been performed. However, proxy and
363     // restore are not recursive so need to be done for each element
364     // we recurse into. `vec_proxy_assign()` will proxy the `value_elt`.
365     SEXP proxy_elt = PROTECT(vec_proxy(out_elt));
366 
367     SEXP assigned = PROTECT(vec_proxy_assign_opts(proxy_elt, index, value_elt, owned, opts));
368     assigned = vec_restore(assigned, out_elt, R_NilValue, owned);
369 
370     SET_VECTOR_ELT(out, i, assigned);
371     UNPROTECT(2);
372   }
373 
374   UNPROTECT(1);
375   return out;
376 }
377 
vec_assign_fallback(SEXP x,SEXP index,SEXP value)378 static SEXP vec_assign_fallback(SEXP x, SEXP index, SEXP value) {
379   return vctrs_dispatch3(syms_vec_assign_fallback, fns_vec_assign_fallback,
380                          syms_x, x,
381                          syms_i, index,
382                          syms_value, value);
383 }
384 
385 static
vec_proxy_assign_names(SEXP proxy,SEXP index,SEXP value,const enum vctrs_owned owned)386 SEXP vec_proxy_assign_names(SEXP proxy,
387                             SEXP index,
388                             SEXP value,
389                             const enum vctrs_owned owned) {
390   SEXP value_nms = PROTECT(vec_names(value));
391 
392   if (value_nms == R_NilValue) {
393     UNPROTECT(1);
394     return proxy;
395   }
396 
397   SEXP proxy_nms = PROTECT(vec_proxy_names(proxy));
398   if (proxy_nms == R_NilValue) {
399     proxy_nms = PROTECT(Rf_allocVector(STRSXP, vec_size(proxy)));
400   } else {
401     proxy_nms = PROTECT(vec_clone_referenced(proxy_nms, owned));
402   }
403   proxy_nms = PROTECT(chr_assign(proxy_nms, index, value_nms, owned));
404 
405   proxy = PROTECT(vec_clone_referenced(proxy, owned));
406   proxy = vec_proxy_set_names(proxy, proxy_nms, owned);
407 
408   UNPROTECT(5);
409   return proxy;
410 }
411 
412 
413 // Exported for testing
414 // [[ register() ]]
vctrs_assign_seq(SEXP x,SEXP value,SEXP start,SEXP size,SEXP increasing)415 SEXP vctrs_assign_seq(SEXP x, SEXP value, SEXP start, SEXP size, SEXP increasing) {
416   R_len_t start_ = r_int_get(start, 0);
417   R_len_t size_ = r_int_get(size, 0);
418   bool increasing_ = r_lgl_get(increasing, 0);
419 
420   SEXP index = PROTECT(compact_seq(start_, size_, increasing_));
421 
422   const struct vec_assign_opts* opts = &vec_assign_default_opts;
423 
424   // Cast and recycle `value`
425   value = PROTECT(vec_cast(value, x, opts->value_arg, opts->x_arg));
426   value = PROTECT(vec_recycle(value, vec_subscript_size(index), opts->value_arg));
427 
428   SEXP proxy = PROTECT(vec_proxy(x));
429   const enum vctrs_owned owned = vec_owned(proxy);
430   proxy = PROTECT(vec_proxy_assign_opts(proxy, index, value, owned, opts));
431 
432   SEXP out = vec_restore(proxy, x, R_NilValue, owned);
433 
434   UNPROTECT(5);
435   return out;
436 }
437 
438 
vctrs_init_slice_assign(SEXP ns)439 void vctrs_init_slice_assign(SEXP ns) {
440   syms_vec_assign_fallback = Rf_install("vec_assign_fallback");
441   fns_vec_assign_fallback = Rf_findVar(syms_vec_assign_fallback, ns);
442 }
443