1 #include "vctrs.h"
2 #include "dim.h"
3 #include "names.h"
4 #include "owned.h"
5 #include "slice-assign.h"
6 #include "subscript-loc.h"
7 #include "utils.h"
8
9 // Initialised at load time
10 SEXP syms_vec_assign_fallback = NULL;
11 SEXP fns_vec_assign_fallback = NULL;
12
13 const struct vec_assign_opts vec_assign_default_opts = {
14 .assign_names = false
15 };
16
17 static SEXP vec_assign_fallback(SEXP x, SEXP index, SEXP value);
18 static SEXP vec_proxy_assign_names(SEXP proxy, SEXP index, SEXP value, const enum vctrs_owned owned);
19 static SEXP lgl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
20 static SEXP int_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
21 static SEXP dbl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
22 static SEXP cpl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
23 SEXP chr_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
24 static SEXP raw_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
25 SEXP list_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned);
26
27
28 // [[ include("slice-assign.h") ]]
vec_assign_opts(SEXP x,SEXP index,SEXP value,const struct vec_assign_opts * opts)29 SEXP vec_assign_opts(SEXP x, SEXP index, SEXP value,
30 const struct vec_assign_opts* opts) {
31 if (x == R_NilValue) {
32 return R_NilValue;
33 }
34
35 vec_assert(x, opts->x_arg);
36 vec_assert(value, opts->value_arg);
37
38 index = PROTECT(vec_as_location_opts(index,
39 vec_size(x),
40 PROTECT(vec_names(x)),
41 location_default_assign_opts));
42
43 // Cast and recycle `value`
44 value = PROTECT(vec_cast(value, x, opts->value_arg, opts->x_arg));
45 value = PROTECT(vec_recycle(value, vec_size(index), opts->value_arg));
46
47 SEXP proxy = PROTECT(vec_proxy(x));
48 const enum vctrs_owned owned = vec_owned(proxy);
49 proxy = PROTECT(vec_proxy_assign_opts(proxy, index, value, owned, opts));
50
51 SEXP out = vec_restore(proxy, x, R_NilValue, owned);
52
53 UNPROTECT(6);
54 return out;
55 }
56
57 // [[ register() ]]
vctrs_assign(SEXP x,SEXP index,SEXP value,SEXP x_arg_,SEXP value_arg_)58 SEXP vctrs_assign(SEXP x, SEXP index, SEXP value, SEXP x_arg_, SEXP value_arg_) {
59 struct vctrs_arg x_arg = vec_as_arg(x_arg_);
60 struct vctrs_arg value_arg = vec_as_arg(value_arg_);
61
62 const struct vec_assign_opts opts = {
63 .assign_names = false,
64 .x_arg = &x_arg,
65 .value_arg = &value_arg
66 };
67
68 return vec_assign_opts(x, index, value, &opts);
69 }
70
71 // [[ register() ]]
vctrs_assign_params(SEXP x,SEXP index,SEXP value,SEXP assign_names)72 SEXP vctrs_assign_params(SEXP x, SEXP index, SEXP value,
73 SEXP assign_names) {
74 const struct vec_assign_opts opts = {
75 .assign_names = r_bool_as_int(assign_names)
76 };
77 return vec_assign_opts(x, index, value, &opts);
78 }
79
vec_assign_switch(SEXP proxy,SEXP index,SEXP value,const enum vctrs_owned owned,const struct vec_assign_opts * opts)80 static SEXP vec_assign_switch(SEXP proxy, SEXP index, SEXP value,
81 const enum vctrs_owned owned,
82 const struct vec_assign_opts* opts) {
83 switch (vec_proxy_typeof(proxy)) {
84 case vctrs_type_logical: return lgl_assign(proxy, index, value, owned);
85 case vctrs_type_integer: return int_assign(proxy, index, value, owned);
86 case vctrs_type_double: return dbl_assign(proxy, index, value, owned);
87 case vctrs_type_complex: return cpl_assign(proxy, index, value, owned);
88 case vctrs_type_character: return chr_assign(proxy, index, value, owned);
89 case vctrs_type_raw: return raw_assign(proxy, index, value, owned);
90 case vctrs_type_list: return list_assign(proxy, index, value, owned);
91 case vctrs_type_dataframe: return df_assign(proxy, index, value, owned, opts);
92 case vctrs_type_scalar: stop_scalar_type(proxy, args_empty);
93 default: stop_unimplemented_vctrs_type("vec_assign_switch", vec_typeof(proxy));
94 }
95 never_reached("vec_assign_switch");
96 }
97
98 // `vec_proxy_assign_opts()` conditionally duplicates the `proxy` depending
99 // on a number of factors.
100 //
101 // - If a fallback is required, the `proxy` is duplicated at the R level.
102 // - If `owned` is `VCTRS_OWNED_true`, the `proxy` is typically not duplicated.
103 // However, if it is an ALTREP object, it is duplicated because we need to be
104 // able to assign into the object it represents, not the ALTREP SEXP itself.
105 // - If `owned` is `VCTRS_OWNED_false`, the `proxy` is only
106 // duplicated if it is referenced, i.e. `MAYBE_REFERENCED()` returns `true`.
107 //
108 // In `vec_proxy_assign()`, which is part of the experimental public API,
109 // ownership is determined with a call to `NO_REFERENCES()`. If there are no
110 // references, then `VCTRS_OWNED_true` is used, else
111 // `VCTRS_OWNED_false` is used.
112 //
113 // Ownership of the `proxy` must be recursive. For data frames, the `owned`
114 // argument is passed along to each column.
115 //
116 // Practically, we only set `VCTRS_OWNED_true` when we create a fresh data
117 // structure at the C level and then assign into it to fill it. This happens
118 // in `vec_c()` and `vec_rbind()`. For data frames, this `owned` parameter
119 // is particularly important for R 4.0.0 where references are tracked more
120 // precisely. In R 4.0.0, a freshly created data frame's columns all have a
121 // refcount of 1 because of the `SET_VECTOR_ELT()` call that set them in the
122 // data frame. This makes them referenced, but not shared. If
123 // `VCTRS_OWNED_false` was set and `df_assign()` was used in a loop
124 // (as it is in `vec_rbind()`), then a copy of each column would be made at
125 // each iteration of the loop (any time a new set of rows is assigned
126 // into the output object).
127 //
128 // Even though it can directly assign, the safe
129 // way to call `vec_proxy_assign()` and `vec_proxy_assign_opts()` is to catch
130 // and protect their output rather than relying on them to assign directly.
131
132 /*
133 * @param proxy The proxy of the output container
134 * @param index The locations to assign `value` to
135 * @param value The value to assign into the proxy. Must already be
136 * cast to the type of the true output container, and have been
137 * recycled to the correct size. Should not be proxied, in case
138 * we have to fallback.
139 */
vec_proxy_assign(SEXP proxy,SEXP index,SEXP value)140 SEXP vec_proxy_assign(SEXP proxy, SEXP index, SEXP value) {
141 return vec_proxy_assign_opts(proxy, index, value,
142 vec_owned(proxy),
143 &vec_assign_default_opts);
144 }
vec_proxy_assign_opts(SEXP proxy,SEXP index,SEXP value,const enum vctrs_owned owned,const struct vec_assign_opts * opts)145 SEXP vec_proxy_assign_opts(SEXP proxy, SEXP index, SEXP value,
146 const enum vctrs_owned owned,
147 const struct vec_assign_opts* opts) {
148 int n_protect = 0;
149
150 struct vec_assign_opts mut_opts = *opts;
151 bool ignore_outer_names = mut_opts.ignore_outer_names;
152 mut_opts.ignore_outer_names = false;
153
154 struct vctrs_proxy_info value_info = vec_proxy_info(value);
155 PROTECT_PROXY_INFO(&value_info, &n_protect);
156
157 if (TYPEOF(proxy) != TYPEOF(value_info.proxy)) {
158 stop_internal("vec_proxy_assign_opts",
159 "`proxy` of type `%s` incompatible with `value` proxy of type `%s`.",
160 Rf_type2char(TYPEOF(proxy)),
161 Rf_type2char(TYPEOF(value_info.proxy)));
162 }
163
164 // If a fallback is required, the `proxy` is identical to the output container
165 // because no proxy method was called
166 SEXP out = R_NilValue;
167
168 if (vec_requires_fallback(value, value_info)) {
169 index = PROTECT(compact_materialize(index));
170 out = PROTECT(vec_assign_fallback(proxy, index, value));
171 ++n_protect;
172 } else if (has_dim(proxy)) {
173 out = PROTECT(vec_assign_shaped(proxy, index, value_info.proxy, owned, &mut_opts));
174 } else {
175 out = PROTECT(vec_assign_switch(proxy, index, value_info.proxy, owned, &mut_opts));
176 }
177 ++n_protect;
178
179 if (!ignore_outer_names && opts->assign_names) {
180 out = vec_proxy_assign_names(out, index, value_info.proxy, owned);
181 }
182
183 UNPROTECT(n_protect);
184 return out;
185 }
186
187 #define ASSIGN_INDEX(CTYPE, DEREF, CONST_DEREF) \
188 R_len_t n = Rf_length(index); \
189 int* index_data = INTEGER(index); \
190 \
191 if (n != Rf_length(value)) { \
192 stop_internal("vec_assign", \
193 "`value` should have been recycled to fit `x`."); \
194 } \
195 \
196 const CTYPE* value_data = CONST_DEREF(value); \
197 \
198 SEXP out = PROTECT(vec_clone_referenced(x, owned)); \
199 CTYPE* out_data = DEREF(out); \
200 \
201 for (R_len_t i = 0; i < n; ++i) { \
202 int j = index_data[i]; \
203 if (j != NA_INTEGER) { \
204 out_data[j - 1] = value_data[i]; \
205 } \
206 } \
207 \
208 UNPROTECT(1); \
209 return out
210
211 #define ASSIGN_COMPACT(CTYPE, DEREF, CONST_DEREF) \
212 int* index_data = INTEGER(index); \
213 R_len_t start = index_data[0]; \
214 R_len_t n = index_data[1]; \
215 R_len_t step = index_data[2]; \
216 \
217 if (n != Rf_length(value)) { \
218 stop_internal("vec_assign", \
219 "`value` should have been recycled to fit `x`."); \
220 } \
221 \
222 const CTYPE* value_data = CONST_DEREF(value); \
223 \
224 SEXP out = PROTECT(vec_clone_referenced(x, owned)); \
225 CTYPE* out_data = DEREF(out) + start; \
226 \
227 for (int i = 0; i < n; ++i, out_data += step, ++value_data) { \
228 *out_data = *value_data; \
229 } \
230 \
231 UNPROTECT(1); \
232 return out
233
234 #define ASSIGN(CTYPE, DEREF, CONST_DEREF) \
235 if (is_compact_seq(index)) { \
236 ASSIGN_COMPACT(CTYPE, DEREF, CONST_DEREF); \
237 } else { \
238 ASSIGN_INDEX(CTYPE, DEREF, CONST_DEREF); \
239 }
240
lgl_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)241 static SEXP lgl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
242 ASSIGN(int, LOGICAL, LOGICAL_RO);
243 }
int_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)244 static SEXP int_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
245 ASSIGN(int, INTEGER, INTEGER_RO);
246 }
dbl_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)247 static SEXP dbl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
248 ASSIGN(double, REAL, REAL_RO);
249 }
cpl_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)250 static SEXP cpl_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
251 ASSIGN(Rcomplex, COMPLEX, COMPLEX_RO);
252 }
chr_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)253 SEXP chr_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
254 ASSIGN(SEXP, STRING_PTR, STRING_PTR_RO);
255 }
raw_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)256 static SEXP raw_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
257 ASSIGN(Rbyte, RAW, RAW_RO);
258 }
259
260 #undef ASSIGN
261 #undef ASSIGN_INDEX
262 #undef ASSIGN_COMPACT
263
264
265 #define ASSIGN_BARRIER_INDEX(GET, SET) \
266 R_len_t n = Rf_length(index); \
267 int* index_data = INTEGER(index); \
268 \
269 if (n != Rf_length(value)) { \
270 stop_internal("vec_assign", \
271 "`value` should have been recycled to fit `x`."); \
272 } \
273 \
274 SEXP out = PROTECT(vec_clone_referenced(x, owned)); \
275 \
276 for (R_len_t i = 0; i < n; ++i) { \
277 int j = index_data[i]; \
278 if (j != NA_INTEGER) { \
279 SET(out, j - 1, GET(value, i)); \
280 } \
281 } \
282 \
283 UNPROTECT(1); \
284 return out
285
286 #define ASSIGN_BARRIER_COMPACT(GET, SET) \
287 int* index_data = INTEGER(index); \
288 R_len_t start = index_data[0]; \
289 R_len_t n = index_data[1]; \
290 R_len_t step = index_data[2]; \
291 \
292 if (n != Rf_length(value)) { \
293 stop_internal("vec_assign", \
294 "`value` should have been recycled to fit `x`."); \
295 } \
296 \
297 SEXP out = PROTECT(vec_clone_referenced(x, owned)); \
298 \
299 for (R_len_t i = 0; i < n; ++i, start += step) { \
300 SET(out, start, GET(value, i)); \
301 } \
302 \
303 UNPROTECT(1); \
304 return out
305
306 #define ASSIGN_BARRIER(GET, SET) \
307 if (is_compact_seq(index)) { \
308 ASSIGN_BARRIER_COMPACT(GET, SET); \
309 } else { \
310 ASSIGN_BARRIER_INDEX(GET, SET); \
311 }
312
list_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned)313 SEXP list_assign(SEXP x, SEXP index, SEXP value, const enum vctrs_owned owned) {
314 ASSIGN_BARRIER(VECTOR_ELT, SET_VECTOR_ELT);
315 }
316
317 #undef ASSIGN_BARRIER
318 #undef ASSIGN_BARRIER_INDEX
319 #undef ASSIGN_BARRIER_COMPACT
320
321
322 /**
323 * - `out` and `value` must be rectangular lists.
324 * - `value` must have the same size as `index`.
325 *
326 * Performance and safety notes:
327 * If `x` is a fresh data frame (which would be the case in `vec_c()` and
328 * `vec_rbind()`) then `r_clone_referenced()` will return it untouched. Each
329 * column will also be fresh, so if `vec_proxy()` just returns its input then
330 * `vec_proxy_assign_opts()` will directly assign to that column in `x`. This
331 * makes it extremely fast to assign to a data frame.
332 *
333 * If `x` is referenced already, then `r_clone_referenced()` will call
334 * `Rf_shallow_duplicate()`. For lists, this loops over the list and marks
335 * each list element with max namedness. This is helpful for us, because
336 * it is possible to have a data frame that is itself referenced, with columns
337 * that are not (mtcars is an example). If each list element wasn't marked, then
338 * `vec_proxy_assign_opts()` would see an unreferenced column and modify it
339 * directly, resulting in improper mutable semantics. See #986 for full details.
340 *
341 * [[ include("vctrs.h") ]]
342 */
df_assign(SEXP x,SEXP index,SEXP value,const enum vctrs_owned owned,const struct vec_assign_opts * opts)343 SEXP df_assign(SEXP x, SEXP index, SEXP value,
344 const enum vctrs_owned owned,
345 const struct vec_assign_opts* opts) {
346 SEXP out = PROTECT(vec_clone_referenced(x, owned));
347
348 R_len_t n = Rf_length(out);
349
350 if (Rf_length(value) != n) {
351 stop_internal("df_assign",
352 "Can't assign %d columns to df of length %d.",
353 Rf_length(value),
354 n);
355 }
356
357 for (R_len_t i = 0; i < n; ++i) {
358 SEXP out_elt = VECTOR_ELT(out, i);
359 SEXP value_elt = VECTOR_ELT(value, i);
360
361 // No need to cast or recycle because those operations are
362 // recursive and have already been performed. However, proxy and
363 // restore are not recursive so need to be done for each element
364 // we recurse into. `vec_proxy_assign()` will proxy the `value_elt`.
365 SEXP proxy_elt = PROTECT(vec_proxy(out_elt));
366
367 SEXP assigned = PROTECT(vec_proxy_assign_opts(proxy_elt, index, value_elt, owned, opts));
368 assigned = vec_restore(assigned, out_elt, R_NilValue, owned);
369
370 SET_VECTOR_ELT(out, i, assigned);
371 UNPROTECT(2);
372 }
373
374 UNPROTECT(1);
375 return out;
376 }
377
vec_assign_fallback(SEXP x,SEXP index,SEXP value)378 static SEXP vec_assign_fallback(SEXP x, SEXP index, SEXP value) {
379 return vctrs_dispatch3(syms_vec_assign_fallback, fns_vec_assign_fallback,
380 syms_x, x,
381 syms_i, index,
382 syms_value, value);
383 }
384
385 static
vec_proxy_assign_names(SEXP proxy,SEXP index,SEXP value,const enum vctrs_owned owned)386 SEXP vec_proxy_assign_names(SEXP proxy,
387 SEXP index,
388 SEXP value,
389 const enum vctrs_owned owned) {
390 SEXP value_nms = PROTECT(vec_names(value));
391
392 if (value_nms == R_NilValue) {
393 UNPROTECT(1);
394 return proxy;
395 }
396
397 SEXP proxy_nms = PROTECT(vec_proxy_names(proxy));
398 if (proxy_nms == R_NilValue) {
399 proxy_nms = PROTECT(Rf_allocVector(STRSXP, vec_size(proxy)));
400 } else {
401 proxy_nms = PROTECT(vec_clone_referenced(proxy_nms, owned));
402 }
403 proxy_nms = PROTECT(chr_assign(proxy_nms, index, value_nms, owned));
404
405 proxy = PROTECT(vec_clone_referenced(proxy, owned));
406 proxy = vec_proxy_set_names(proxy, proxy_nms, owned);
407
408 UNPROTECT(5);
409 return proxy;
410 }
411
412
413 // Exported for testing
414 // [[ register() ]]
vctrs_assign_seq(SEXP x,SEXP value,SEXP start,SEXP size,SEXP increasing)415 SEXP vctrs_assign_seq(SEXP x, SEXP value, SEXP start, SEXP size, SEXP increasing) {
416 R_len_t start_ = r_int_get(start, 0);
417 R_len_t size_ = r_int_get(size, 0);
418 bool increasing_ = r_lgl_get(increasing, 0);
419
420 SEXP index = PROTECT(compact_seq(start_, size_, increasing_));
421
422 const struct vec_assign_opts* opts = &vec_assign_default_opts;
423
424 // Cast and recycle `value`
425 value = PROTECT(vec_cast(value, x, opts->value_arg, opts->x_arg));
426 value = PROTECT(vec_recycle(value, vec_subscript_size(index), opts->value_arg));
427
428 SEXP proxy = PROTECT(vec_proxy(x));
429 const enum vctrs_owned owned = vec_owned(proxy);
430 proxy = PROTECT(vec_proxy_assign_opts(proxy, index, value, owned, opts));
431
432 SEXP out = vec_restore(proxy, x, R_NilValue, owned);
433
434 UNPROTECT(5);
435 return out;
436 }
437
438
vctrs_init_slice_assign(SEXP ns)439 void vctrs_init_slice_assign(SEXP ns) {
440 syms_vec_assign_fallback = Rf_install("vec_assign_fallback");
441 fns_vec_assign_fallback = Rf_findVar(syms_vec_assign_fallback, ns);
442 }
443