1 #include "vctrs.h"
2 #include "c.h"
3 #include "dim.h"
4 #include "ptype-common.h"
5 #include "slice-assign.h"
6 #include "type-data-frame.h"
7 #include "owned.h"
8 #include "utils.h"
9
10
11 static SEXP vec_rbind(SEXP xs, SEXP ptype, SEXP id, struct name_repair_opts* name_repair, SEXP name_spec);
12 static SEXP as_df_row(SEXP x, struct name_repair_opts* name_repair);
13 static SEXP as_df_row_impl(SEXP x, struct name_repair_opts* name_repair);
14 struct name_repair_opts validate_bind_name_repair(SEXP name_repair, bool allow_minimal);
15 static SEXP vec_cbind(SEXP xs, SEXP ptype, SEXP size, struct name_repair_opts* name_repair);
16 static SEXP cbind_names_to(bool has_names, SEXP names_to, SEXP ptype);
17
18 // [[ register(external = TRUE) ]]
vctrs_rbind(SEXP call,SEXP op,SEXP args,SEXP env)19 SEXP vctrs_rbind(SEXP call, SEXP op, SEXP args, SEXP env) {
20 args = CDR(args);
21
22 SEXP xs = PROTECT(rlang_env_dots_list(env));
23 SEXP ptype = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args);
24 SEXP names_to = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args);
25 SEXP name_repair = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args);
26 SEXP name_spec = PROTECT(Rf_eval(CAR(args), env));
27
28 if (names_to != R_NilValue) {
29 if (Rf_inherits(names_to, "rlang_zap")) {
30 r_poke_names(xs, R_NilValue);
31 names_to = R_NilValue;
32 } else if (r_is_string(names_to)) {
33 names_to = r_chr_get(names_to, 0);
34 } else {
35 Rf_errorcall(R_NilValue, "`.names_to` must be `NULL`, a string, or an `rlang::zap()` object.");
36 }
37 }
38
39 struct name_repair_opts name_repair_opts = validate_bind_name_repair(name_repair, false);
40 PROTECT_NAME_REPAIR_OPTS(&name_repair_opts);
41
42 SEXP out = vec_rbind(xs, ptype, names_to, &name_repair_opts, name_spec);
43
44 UNPROTECT(6);
45 return out;
46 }
47
vec_rbind(SEXP xs,SEXP ptype,SEXP names_to,struct name_repair_opts * name_repair,SEXP name_spec)48 static SEXP vec_rbind(SEXP xs,
49 SEXP ptype,
50 SEXP names_to,
51 struct name_repair_opts* name_repair,
52 SEXP name_spec) {
53 int n_prot = 0;
54 R_len_t n_inputs = Rf_length(xs);
55
56 for (R_len_t i = 0; i < n_inputs; ++i) {
57 SET_VECTOR_ELT(xs, i, as_df_row(VECTOR_ELT(xs, i), name_repair));
58 }
59
60 // The common type holds information about common column names,
61 // types, etc. Each element of `xs` needs to be cast to that type
62 // before assignment.
63 ptype = vec_ptype_common_params(xs, ptype, DF_FALLBACK_DEFAULT, S3_FALLBACK_true);
64 PROTECT_N(ptype, &n_prot);
65
66 R_len_t n_cols = Rf_length(ptype);
67
68 if (ptype == R_NilValue) {
69 UNPROTECT(n_prot);
70 return new_data_frame(vctrs_shared_empty_list, 0);
71 }
72 if (TYPEOF(ptype) == LGLSXP && !n_cols) {
73 ptype = as_df_row_impl(vctrs_shared_na_lgl, name_repair);
74 PROTECT_N(ptype, &n_prot);
75 }
76 if (!is_data_frame(ptype)) {
77 Rf_errorcall(R_NilValue, "Can't bind objects that are not coercible to a data frame.");
78 }
79
80 bool assign_names = !Rf_inherits(name_spec, "rlang_zap");
81
82 bool has_names_to = names_to != R_NilValue;
83 R_len_t names_to_loc = 0;
84
85 if (has_names_to) {
86 if (!assign_names) {
87 r_abort("Can't zap outer names when `.names_to` is supplied.");
88 }
89
90 SEXP ptype_nms = PROTECT(r_names(ptype));
91 names_to_loc = r_chr_find(ptype_nms, names_to);
92 UNPROTECT(1);
93
94 if (names_to_loc < 0) {
95 ptype = PROTECT_N(cbind_names_to(r_names(xs) != R_NilValue, names_to, ptype), &n_prot);
96 names_to_loc = 0;
97 }
98 }
99
100 // Must happen after the `names_to` column has been added to `ptype`
101 xs = vec_cast_common_params(xs, ptype, DF_FALLBACK_DEFAULT, S3_FALLBACK_true);
102 PROTECT_N(xs, &n_prot);
103
104 // Find individual input sizes and total size of output
105 R_len_t n_rows = 0;
106
107 SEXP ns_placeholder = PROTECT_N(Rf_allocVector(INTSXP, n_inputs), &n_prot);
108 int* ns = INTEGER(ns_placeholder);
109
110 for (R_len_t i = 0; i < n_inputs; ++i) {
111 SEXP elt = VECTOR_ELT(xs, i);
112 R_len_t size = (elt == R_NilValue) ? 0 : vec_size(elt);
113 n_rows += size;
114 ns[i] = size;
115 }
116
117 SEXP proxy = PROTECT_N(vec_proxy(ptype), &n_prot);
118 if (!is_data_frame(proxy)) {
119 Rf_errorcall(R_NilValue, "Can't fill a data frame that doesn't have a data frame proxy.");
120 }
121
122 PROTECT_INDEX out_pi;
123 SEXP out = vec_init(proxy, n_rows);
124 PROTECT_WITH_INDEX(out, &out_pi);
125 ++n_prot;
126
127 SEXP loc = PROTECT_N(compact_seq(0, 0, true), &n_prot);
128 int* p_loc = INTEGER(loc);
129
130 SEXP rownames = R_NilValue;
131 PROTECT_INDEX rownames_pi;
132 PROTECT_WITH_INDEX(rownames, &rownames_pi);
133 ++n_prot;
134
135 SEXP names_to_col = R_NilValue;
136 SEXPTYPE names_to_type = 99;
137 void* p_names_to_col = NULL;
138 const void* p_index = NULL;
139
140 SEXP xs_names = PROTECT_N(r_names(xs), &n_prot);
141 bool xs_is_named = xs_names != R_NilValue;
142
143 if (has_names_to) {
144 SEXP index = R_NilValue;
145 if (xs_is_named) {
146 index = xs_names;
147 } else {
148 index = PROTECT_N(Rf_allocVector(INTSXP, n_inputs), &n_prot);
149 r_int_fill_seq(index, 1, n_inputs);
150 }
151 names_to_type = TYPEOF(index);
152 names_to_col = PROTECT_N(Rf_allocVector(names_to_type, n_rows), &n_prot);
153
154 p_index = r_vec_deref_barrier_const(index);
155 p_names_to_col = r_vec_deref_barrier(names_to_col);
156
157 xs_names = R_NilValue;
158 xs_is_named = false;
159 }
160
161 const SEXP* p_xs_names = NULL;
162 if (xs_is_named) {
163 p_xs_names = STRING_PTR_RO(xs_names);
164 }
165
166 // Compact sequences use 0-based counters
167 R_len_t counter = 0;
168
169 const struct vec_assign_opts bind_assign_opts = {
170 .assign_names = assign_names,
171 // Unlike in `vec_c()` we don't need to ignore outer names because
172 // `df_assign()` doesn't deal with those
173 .ignore_outer_names = false
174 };
175
176 for (R_len_t i = 0; i < n_inputs; ++i) {
177 R_len_t size = ns[i];
178 if (!size) {
179 continue;
180 }
181 SEXP x = VECTOR_ELT(xs, i);
182
183 init_compact_seq(p_loc, counter, size, true);
184
185 // Total ownership of `out` because it was freshly created with `vec_init()`
186 out = df_assign(out, loc, x, VCTRS_OWNED_true, &bind_assign_opts);
187 REPROTECT(out, out_pi);
188
189 if (assign_names) {
190 SEXP outer = xs_is_named ? p_xs_names[i] : R_NilValue;
191 SEXP inner = PROTECT(vec_names(x));
192 SEXP x_nms = PROTECT(apply_name_spec(name_spec, outer, inner, size));
193
194 if (x_nms != R_NilValue) {
195 R_LAZY_ALLOC(rownames, rownames_pi, STRSXP, n_rows);
196
197 // If there is no name to assign, skip the assignment since
198 // `out_names` already contains empty strings
199 if (inner != chrs_empty) {
200 rownames = chr_assign(rownames, loc, x_nms, VCTRS_OWNED_true);
201 REPROTECT(rownames, rownames_pi);
202 }
203 }
204
205 UNPROTECT(2);
206 }
207
208 // Assign current name to group vector, if supplied
209 if (has_names_to) {
210 r_vec_fill(names_to_type, p_names_to_col, counter, p_index, i, size);
211 }
212
213 counter += size;
214 }
215
216 if (rownames != R_NilValue) {
217 Rf_setAttrib(out, R_RowNamesSymbol, rownames);
218 }
219
220 if (has_names_to) {
221 out = df_poke(out, names_to_loc, names_to_col);
222 REPROTECT(out, out_pi);
223 }
224
225 // Not optimal. Happens after the fallback columns have been
226 // assigned already, ideally they should be ignored. Also this is
227 // currently not recursive. Should we deal with this during
228 // restoration?
229 for (R_len_t i = 0; i < n_cols; ++i) {
230 SEXP col = r_list_get(ptype, i);
231
232 if (vec_is_common_class_fallback(col)) {
233 SEXP col_xs = PROTECT(list_pluck(xs, i));
234 SEXP col_out = vec_c_fallback(col, col_xs, name_spec, name_repair);
235 r_list_poke(out, i, col_out);
236 UNPROTECT(1);
237 }
238 }
239
240 SEXP r_n_rows = PROTECT_N(r_int(n_rows), &n_prot);
241 out = vec_restore(out, ptype, r_n_rows, VCTRS_OWNED_true);
242
243 UNPROTECT(n_prot);
244 return out;
245 }
246
as_df_row(SEXP x,struct name_repair_opts * name_repair)247 static SEXP as_df_row(SEXP x, struct name_repair_opts* name_repair) {
248 if (vec_is_unspecified(x) && r_names(x) == R_NilValue) {
249 return x;
250 } else {
251 return as_df_row_impl(x, name_repair);
252 }
253 }
254
as_df_row_impl(SEXP x,struct name_repair_opts * name_repair)255 static SEXP as_df_row_impl(SEXP x, struct name_repair_opts* name_repair) {
256 if (x == R_NilValue) {
257 return x;
258 }
259 if (is_data_frame(x)) {
260 return df_repair_names(x, name_repair);
261 }
262
263 int nprot = 0;
264
265 SEXP dim = vec_bare_dim(x);
266 R_len_t ndim = (dim == R_NilValue) ? 1 : Rf_length(dim);
267
268 if (ndim > 2) {
269 Rf_errorcall(R_NilValue, "Can't bind arrays.");
270 }
271 if (ndim == 2) {
272 SEXP names = PROTECT_N(vec_unique_colnames(x, name_repair->quiet), &nprot);
273 SEXP out = PROTECT_N(r_as_data_frame(x), &nprot);
274 r_poke_names(out, names);
275 UNPROTECT(nprot);
276 return out;
277 }
278
279 SEXP nms = PROTECT_N(vec_names(x), &nprot);
280
281 if (dim != R_NilValue) {
282 x = PROTECT_N(r_clone_referenced(x), &nprot);
283 r_attrib_poke(x, R_DimSymbol, R_NilValue);
284 r_attrib_poke(x, R_DimNamesSymbol, R_NilValue);
285 }
286
287 // Remove names as they are promoted to data frame column names
288 if (nms != R_NilValue) {
289 x = PROTECT_N(vec_set_names(x, R_NilValue), &nprot);
290 }
291
292 if (nms == R_NilValue) {
293 nms = PROTECT_N(vec_unique_names(x, name_repair->quiet), &nprot);
294 } else {
295 nms = PROTECT_N(vec_as_names(nms, name_repair), &nprot);
296 }
297
298 x = PROTECT_N(vec_chop(x, R_NilValue), &nprot);
299
300 r_poke_names(x, nms);
301
302 x = new_data_frame(x, 1);
303
304 UNPROTECT(nprot);
305 return x;
306 }
307
308 // [[ register() ]]
vctrs_as_df_row(SEXP x,SEXP quiet)309 SEXP vctrs_as_df_row(SEXP x, SEXP quiet) {
310 struct name_repair_opts name_repair_opts = {
311 .type = name_repair_unique,
312 .fn = R_NilValue,
313 .quiet = LOGICAL(quiet)[0]
314 };
315 return as_df_row(x, &name_repair_opts);
316 }
317
cbind_names_to(bool has_names,SEXP names_to,SEXP ptype)318 static SEXP cbind_names_to(bool has_names, SEXP names_to, SEXP ptype) {
319 SEXP index_ptype = has_names ? vctrs_shared_empty_chr : vctrs_shared_empty_int;
320
321 SEXP tmp = PROTECT(Rf_allocVector(VECSXP, 2));
322 SET_VECTOR_ELT(tmp, 0, index_ptype);
323 SET_VECTOR_ELT(tmp, 1, ptype);
324
325 SEXP tmp_nms = PROTECT(Rf_allocVector(STRSXP, 2));
326 SET_STRING_ELT(tmp_nms, 0, names_to);
327 SET_STRING_ELT(tmp_nms, 1, strings_empty);
328
329 r_poke_names(tmp, tmp_nms);
330
331 SEXP out = vec_cbind(tmp, R_NilValue, R_NilValue, NULL);
332
333 UNPROTECT(2);
334 return out;
335 }
336
337
338 static SEXP as_df_col(SEXP x, SEXP outer, bool* allow_pack);
339 static SEXP cbind_container_type(SEXP x, void* data);
340
341 // [[ register(external = TRUE) ]]
vctrs_cbind(SEXP call,SEXP op,SEXP args,SEXP env)342 SEXP vctrs_cbind(SEXP call, SEXP op, SEXP args, SEXP env) {
343 args = CDR(args);
344
345 SEXP xs = PROTECT(rlang_env_dots_list(env));
346 SEXP ptype = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args);
347 SEXP size = PROTECT(Rf_eval(CAR(args), env)); args = CDR(args);
348 SEXP name_repair = PROTECT(Rf_eval(CAR(args), env));
349
350 struct name_repair_opts name_repair_opts = validate_bind_name_repair(name_repair, true);
351 PROTECT_NAME_REPAIR_OPTS(&name_repair_opts);
352
353 SEXP out = vec_cbind(xs, ptype, size, &name_repair_opts);
354
355 UNPROTECT(5);
356 return out;
357 }
358
vec_cbind(SEXP xs,SEXP ptype,SEXP size,struct name_repair_opts * name_repair)359 static SEXP vec_cbind(SEXP xs, SEXP ptype, SEXP size, struct name_repair_opts* name_repair) {
360 R_len_t n = Rf_length(xs);
361
362 // Find the common container type of inputs
363 SEXP rownames = R_NilValue;
364 SEXP containers = PROTECT(map_with_data(xs, &cbind_container_type, &rownames));
365 ptype = PROTECT(cbind_container_type(ptype, &rownames));
366
367 SEXP type = PROTECT(vec_ptype_common_params(containers,
368 ptype,
369 DF_FALLBACK_DEFAULT,
370 S3_FALLBACK_false));
371 if (type == R_NilValue) {
372 type = new_data_frame(vctrs_shared_empty_list, 0);
373 } else if (!is_data_frame(type)) {
374 type = r_as_data_frame(type);
375 }
376 UNPROTECT(1);
377 PROTECT(type);
378
379
380 R_len_t nrow;
381 if (size == R_NilValue) {
382 nrow = vec_size_common(xs, 0);
383 } else {
384 nrow = size_validate(size, ".size");
385 }
386
387 if (rownames != R_NilValue && Rf_length(rownames) != nrow) {
388 rownames = PROTECT(vec_recycle(rownames, nrow, args_empty));
389 rownames = vec_as_unique_names(rownames, false);
390 UNPROTECT(1);
391 }
392 PROTECT(rownames);
393
394 // Convert inputs to data frames, validate, and collect total number of columns
395 SEXP xs_names = PROTECT(r_names(xs));
396 bool has_names = xs_names != R_NilValue;
397 SEXP const* xs_names_p = has_names ? STRING_PTR_RO(xs_names) : NULL;
398
399 R_len_t ncol = 0;
400 for (R_len_t i = 0; i < n; ++i) {
401 SEXP x = VECTOR_ELT(xs, i);
402
403 if (x == R_NilValue) {
404 continue;
405 }
406
407 x = PROTECT(vec_recycle(x, nrow, args_empty));
408
409 SEXP outer_name = has_names ? xs_names_p[i] : strings_empty;
410 bool allow_packing;
411 x = PROTECT(as_df_col(x, outer_name, &allow_packing));
412
413 // Remove outer name of column vectors because they shouldn't be repacked
414 if (has_names && !allow_packing) {
415 SET_STRING_ELT(xs_names, i, strings_empty);
416 }
417
418 SET_VECTOR_ELT(xs, i, x);
419 UNPROTECT(2);
420
421 // Named inputs are packed in a single column
422 R_len_t x_ncol = outer_name == strings_empty ? Rf_length(x) : 1;
423 ncol += x_ncol;
424 }
425
426
427 // Fill in columns
428 PROTECT_INDEX out_pi;
429 SEXP out = Rf_allocVector(VECSXP, ncol);
430 PROTECT_WITH_INDEX(out, &out_pi);
431 init_data_frame(out, nrow);
432
433 PROTECT_INDEX names_pi;
434 SEXP names = Rf_allocVector(STRSXP, ncol);
435 PROTECT_WITH_INDEX(names, &names_pi);
436
437 SEXP idx = PROTECT(compact_seq(0, 0, true));
438 int* idx_ptr = INTEGER(idx);
439
440 R_len_t counter = 0;
441
442 for (R_len_t i = 0; i < n; ++i) {
443 SEXP x = VECTOR_ELT(xs, i);
444
445 if (x == R_NilValue) {
446 continue;
447 }
448
449 SEXP outer_name = has_names ? xs_names_p[i] : strings_empty;
450 if (outer_name != strings_empty) {
451 SET_VECTOR_ELT(out, counter, x);
452 SET_STRING_ELT(names, counter, outer_name);
453 ++counter;
454 continue;
455 }
456
457 R_len_t xn = Rf_length(x);
458 init_compact_seq(idx_ptr, counter, xn, true);
459
460 // Total ownership of `out` because it was freshly created with `Rf_allocVector()`
461 out = list_assign(out, idx, x, VCTRS_OWNED_true);
462 REPROTECT(out, out_pi);
463
464 SEXP xnms = PROTECT(r_names(x));
465 if (xnms != R_NilValue) {
466 names = chr_assign(names, idx, xnms, VCTRS_OWNED_true);
467 REPROTECT(names, names_pi);
468 }
469 UNPROTECT(1);
470
471 counter += xn;
472 }
473
474 names = PROTECT(vec_as_names(names, name_repair));
475 Rf_setAttrib(out, R_NamesSymbol, names);
476
477 if (rownames != R_NilValue) {
478 Rf_setAttrib(out, R_RowNamesSymbol, rownames);
479 }
480
481 out = vec_restore(out, type, R_NilValue, VCTRS_OWNED_true);
482
483 UNPROTECT(9);
484 return out;
485 }
486
487 SEXP syms_vec_cbind_frame_ptype = NULL;
488 SEXP fns_vec_cbind_frame_ptype = NULL;
489
vec_cbind_frame_ptype(SEXP x)490 SEXP vec_cbind_frame_ptype(SEXP x) {
491 return vctrs_dispatch1(syms_vec_cbind_frame_ptype,
492 fns_vec_cbind_frame_ptype,
493 syms_x,
494 x);
495 }
496
cbind_container_type(SEXP x,void * data)497 static SEXP cbind_container_type(SEXP x, void* data) {
498 if (is_data_frame(x)) {
499 SEXP rn = df_rownames(x);
500
501 if (rownames_type(rn) == ROWNAMES_IDENTIFIERS) {
502 SEXP* learned_rn_p = (SEXP*) data;
503 SEXP learned_rn = *learned_rn_p;
504
505 if (learned_rn == R_NilValue) {
506 *learned_rn_p = rn;
507 }
508 }
509
510 return vec_cbind_frame_ptype(x);
511 } else {
512 return R_NilValue;
513 }
514 }
515
516
517 static SEXP shaped_as_df_col(SEXP x, SEXP outer);
518 static SEXP vec_as_df_col(SEXP x, SEXP outer);
519
520 // [[ register() ]]
vctrs_as_df_col(SEXP x,SEXP outer)521 SEXP vctrs_as_df_col(SEXP x, SEXP outer) {
522 bool allow_pack;
523 return as_df_col(x, r_chr_get(outer, 0), &allow_pack);
524 }
as_df_col(SEXP x,SEXP outer,bool * allow_pack)525 static SEXP as_df_col(SEXP x, SEXP outer, bool* allow_pack) {
526 if (is_data_frame(x)) {
527 *allow_pack = true;
528 return Rf_shallow_duplicate(x);
529 }
530
531 R_len_t ndim = vec_bare_dim_n(x);
532 if (ndim > 2) {
533 Rf_errorcall(R_NilValue, "Can't bind arrays.");
534 }
535 if (ndim > 0) {
536 *allow_pack = true;
537 return shaped_as_df_col(x, outer);
538 }
539
540 *allow_pack = false;
541 return vec_as_df_col(x, outer);
542 }
543
shaped_as_df_col(SEXP x,SEXP outer)544 static SEXP shaped_as_df_col(SEXP x, SEXP outer) {
545 // If packed, store array as a column
546 if (outer != strings_empty) {
547 return x;
548 }
549
550 // If unpacked, transform to data frame first. We repair names
551 // after unpacking and concatenation.
552 SEXP out = PROTECT(r_as_data_frame(x));
553
554 // Remove names if they were repaired by `as.data.frame()`
555 if (colnames(x) == R_NilValue) {
556 r_poke_names(out, R_NilValue);
557 }
558
559 UNPROTECT(1);
560 return out;
561 }
562
vec_as_df_col(SEXP x,SEXP outer)563 static SEXP vec_as_df_col(SEXP x, SEXP outer) {
564 SEXP out = PROTECT(Rf_allocVector(VECSXP, 1));
565 SET_VECTOR_ELT(out, 0, x);
566
567 if (outer != strings_empty) {
568 SEXP names = PROTECT(r_str_as_character(outer));
569 Rf_setAttrib(out, R_NamesSymbol, names);
570 UNPROTECT(1);
571 }
572
573 init_data_frame(out, Rf_length(x));
574
575 UNPROTECT(1);
576 return out;
577 }
578
validate_bind_name_repair(SEXP name_repair,bool allow_minimal)579 struct name_repair_opts validate_bind_name_repair(SEXP name_repair, bool allow_minimal) {
580 struct name_repair_opts opts = new_name_repair_opts(name_repair, args_empty, false);
581
582 switch (opts.type) {
583 case name_repair_custom:
584 case name_repair_unique:
585 case name_repair_universal:
586 case name_repair_check_unique:
587 break;
588 case name_repair_minimal:
589 if (allow_minimal) break; // else fallthrough
590 default:
591 if (allow_minimal) {
592 Rf_errorcall(R_NilValue,
593 "`.name_repair` can't be `\"%s\"`.\n"
594 "It must be one of `\"unique\"`, `\"universal\"`, `\"check_unique\"`, or `\"minimal\"`.",
595 name_repair_arg_as_c_string(opts.type));
596 } else {
597 Rf_errorcall(R_NilValue,
598 "`.name_repair` can't be `\"%s\"`.\n"
599 "It must be one of `\"unique\"`, `\"universal\"`, or `\"check_unique\"`.",
600 name_repair_arg_as_c_string(opts.type));
601 }
602 }
603
604 return opts;
605 }
606
vctrs_init_bind(SEXP ns)607 void vctrs_init_bind(SEXP ns) {
608 syms_vec_cbind_frame_ptype = Rf_install("vec_cbind_frame_ptype");
609 fns_vec_cbind_frame_ptype = r_env_get(ns, syms_vec_cbind_frame_ptype);
610 }
611