1 #include "vctrs.h"
2 #include "ptype-common.h"
3 #include "ptype2.h"
4 #include "type-data-frame.h"
5 #include "utils.h"
6
7 static SEXP syms_df_lossy_cast = NULL;
8 static SEXP fns_df_lossy_cast = NULL;
9
10 static SEXP new_compact_rownames(R_len_t n);
11
12
13 // [[ include("type-data-frame.h") ]]
is_data_frame(SEXP x)14 bool is_data_frame(SEXP x) {
15 if (TYPEOF(x) != VECSXP) {
16 return false;
17 }
18
19 enum vctrs_class_type type = class_type(x);
20 return
21 type == vctrs_class_bare_data_frame ||
22 type == vctrs_class_bare_tibble ||
23 type == vctrs_class_data_frame;
24 }
25
26 // [[ include("type-data-frame.h") ]]
is_native_df(SEXP x)27 bool is_native_df(SEXP x) {
28 enum vctrs_class_type type = class_type(x);
29 return
30 type == vctrs_class_bare_data_frame ||
31 type == vctrs_class_bare_tibble;
32 }
33
34 // [[ include("type-data-frame.h") ]]
is_bare_data_frame(SEXP x)35 bool is_bare_data_frame(SEXP x) {
36 return class_type(x) == vctrs_class_bare_data_frame;
37 }
38
39 // [[ include("type-data-frame.h") ]]
is_bare_tibble(SEXP x)40 bool is_bare_tibble(SEXP x) {
41 return class_type(x) == vctrs_class_bare_tibble;
42 }
43
44 // [[ include("type-data-frame.h") ]]
new_data_frame(SEXP x,R_len_t n)45 SEXP new_data_frame(SEXP x, R_len_t n) {
46 x = PROTECT(r_clone_referenced(x));
47 init_data_frame(x, n);
48
49 UNPROTECT(1);
50 return x;
51 }
52
53 static R_len_t df_size_from_list(SEXP x, SEXP n);
54 static R_len_t df_size_from_n(SEXP n);
55 static SEXP c_data_frame_class(SEXP cls);
56
57 // [[ register() ]]
vctrs_new_data_frame(SEXP args)58 SEXP vctrs_new_data_frame(SEXP args) {
59 args = CDR(args);
60
61 SEXP x = CAR(args); args = CDR(args);
62 SEXP n = CAR(args); args = CDR(args);
63 SEXP cls = CAR(args); args = CDR(args);
64 SEXP attrib = args;
65
66 PROTECT_INDEX pi;
67 PROTECT_WITH_INDEX(attrib, &pi);
68
69 if (TYPEOF(x) != VECSXP) {
70 Rf_errorcall(R_NilValue, "`x` must be a list");
71 }
72
73 bool has_names = false;
74 bool has_rownames = false;
75 R_len_t size = df_size_from_list(x, n);
76
77 SEXP out = PROTECT(r_clone_referenced(x));
78
79 for (SEXP node = attrib; node != R_NilValue; node = CDR(node)) {
80 SEXP tag = TAG(node);
81
82 // We might add dynamic dots later on
83 if (tag == R_ClassSymbol) {
84 stop_internal("new_data_frame", "Can't supply `class` in `...`.");
85 }
86
87 if (tag == R_NamesSymbol) {
88 has_names = true;
89 continue;
90 }
91
92 if (tag == R_RowNamesSymbol) {
93 // "row.names" is checked for consistency with n (if provided)
94 if (size != rownames_size(CAR(node)) && n != R_NilValue) {
95 Rf_errorcall(R_NilValue, "`n` and `row.names` must be consistent.");
96 }
97
98 has_rownames = true;
99 continue;
100 }
101 }
102
103 // Take names from `x` if `attrib` doesn't have any
104 if (!has_names) {
105 SEXP nms = vctrs_shared_empty_chr;
106 if (Rf_length(out)) {
107 nms = r_names(out);
108 }
109 PROTECT(nms);
110
111 if (nms != R_NilValue) {
112 attrib = Rf_cons(nms, attrib);
113 SET_TAG(attrib, R_NamesSymbol);
114 REPROTECT(attrib, pi);
115 }
116
117 UNPROTECT(1);
118 }
119
120 if (!has_rownames) {
121 SEXP rn = PROTECT(new_compact_rownames(size));
122 attrib = Rf_cons(rn, attrib);
123 SET_TAG(attrib, R_RowNamesSymbol);
124
125 UNPROTECT(1);
126 REPROTECT(attrib, pi);
127 }
128
129 if (cls == R_NilValue) {
130 cls = classes_data_frame;
131 } else {
132 cls = c_data_frame_class(cls);
133 }
134 PROTECT(cls);
135
136 attrib = Rf_cons(cls, attrib);
137 SET_TAG(attrib, R_ClassSymbol);
138
139 UNPROTECT(1);
140 REPROTECT(attrib, pi);
141
142
143 SET_ATTRIB(out, attrib);
144 SET_OBJECT(out, 1);
145
146 UNPROTECT(2);
147 return out;
148 }
149
df_size_from_list(SEXP x,SEXP n)150 static R_len_t df_size_from_list(SEXP x, SEXP n) {
151 if (n == R_NilValue) {
152 if (is_data_frame(x)) {
153 return df_size(x);
154 } else {
155 return df_raw_size_from_list(x);
156 }
157 } else {
158 return df_size_from_n(n);
159 }
160 }
161
df_size_from_n(SEXP n)162 static R_len_t df_size_from_n(SEXP n) {
163 if (TYPEOF(n) != INTSXP || Rf_length(n) != 1) {
164 Rf_errorcall(R_NilValue, "`n` must be an integer of size 1");
165 }
166
167 return r_int_get(n, 0);
168 }
169
c_data_frame_class(SEXP cls)170 static SEXP c_data_frame_class(SEXP cls) {
171 if (TYPEOF(cls) != STRSXP) {
172 Rf_errorcall(R_NilValue, "`class` must be NULL or a character vector");
173 }
174 return chr_c(cls, classes_data_frame);
175 }
176
177
178 SEXP data_frame(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts);
179
180 // [[ register() ]]
vctrs_data_frame(SEXP x,SEXP size,SEXP name_repair)181 SEXP vctrs_data_frame(SEXP x, SEXP size, SEXP name_repair) {
182 struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, args_empty, false);
183 PROTECT_NAME_REPAIR_OPTS(&name_repair_opts);
184
185 r_ssize c_size = 0;
186 if (size == R_NilValue) {
187 c_size = vec_size_common(x, 0);
188 } else {
189 c_size = size_validate(size, ".size");
190 }
191
192 SEXP out = data_frame(x, c_size, &name_repair_opts);
193
194 UNPROTECT(1);
195 return out;
196 }
197
198 SEXP df_list(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts);
199
data_frame(SEXP x,r_ssize size,const struct name_repair_opts * p_name_repair_opts)200 SEXP data_frame(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts) {
201 SEXP out = PROTECT(df_list(x, size, p_name_repair_opts));
202 out = new_data_frame(out, size);
203 UNPROTECT(1);
204 return out;
205 }
206
207
208 // [[ register() ]]
vctrs_df_list(SEXP x,SEXP size,SEXP name_repair)209 SEXP vctrs_df_list(SEXP x, SEXP size, SEXP name_repair) {
210 struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, args_empty, false);
211 PROTECT_NAME_REPAIR_OPTS(&name_repair_opts);
212
213 r_ssize c_size = 0;
214 if (size == R_NilValue) {
215 c_size = vec_size_common(x, 0);
216 } else {
217 c_size = size_validate(size, ".size");
218 }
219
220 SEXP out = df_list(x, c_size, &name_repair_opts);
221
222 UNPROTECT(1);
223 return out;
224 }
225
226 static SEXP df_list_drop_null(SEXP x);
227 static SEXP df_list_splice(SEXP x);
228
df_list(SEXP x,r_ssize size,const struct name_repair_opts * p_name_repair_opts)229 SEXP df_list(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts) {
230 if (TYPEOF(x) != VECSXP) {
231 stop_internal("df_list", "`x` must be a list.");
232 }
233
234 x = PROTECT(vec_recycle_common(x, size));
235
236 r_ssize n_cols = r_length(x);
237
238 // Unnamed columns are auto-named with `""`
239 if (r_names(x) == R_NilValue) {
240 SEXP names = PROTECT(r_new_character(n_cols));
241 r_poke_names(x, names);
242 UNPROTECT(1);
243 }
244
245 x = PROTECT(df_list_drop_null(x));
246 x = PROTECT(df_list_splice(x));
247
248 SEXP names = PROTECT(r_names(x));
249 names = PROTECT(vec_as_names(names, p_name_repair_opts));
250 r_poke_names(x, names);
251
252 UNPROTECT(5);
253 return x;
254 }
255
df_list_drop_null(SEXP x)256 static SEXP df_list_drop_null(SEXP x) {
257 r_ssize n_cols = r_length(x);
258 r_ssize count = 0;
259
260 for (r_ssize i = 0; i < n_cols; ++i) {
261 count += VECTOR_ELT(x, i) == R_NilValue;
262 }
263
264 if (count == 0) {
265 return x;
266 }
267
268 SEXP names = PROTECT(r_names(x));
269 const SEXP* p_names = STRING_PTR_RO(names);
270
271 r_ssize n_out = n_cols - count;
272 SEXP out = PROTECT(Rf_allocVector(VECSXP, n_out));
273 SEXP out_names = PROTECT(Rf_allocVector(STRSXP, n_out));
274 r_ssize out_i = 0;
275
276 for (r_ssize i = 0; i < n_cols; ++i) {
277 SEXP col = VECTOR_ELT(x, i);
278
279 if (col != R_NilValue) {
280 SET_VECTOR_ELT(out, out_i, col);
281 SET_STRING_ELT(out_names, out_i, p_names[i]);
282 ++out_i;
283 }
284 }
285
286 r_poke_names(out, out_names);
287
288 UNPROTECT(3);
289 return out;
290 }
291
df_list_splice(SEXP x)292 static SEXP df_list_splice(SEXP x) {
293 SEXP names = PROTECT(r_names(x));
294 const SEXP* p_names = STRING_PTR_RO(names);
295
296 bool any_needs_splice = false;
297 r_ssize n_cols = r_length(x);
298 r_ssize i = 0;
299
300 for (; i < n_cols; ++i) {
301 // Only splice unnamed data frames
302 if (p_names[i] != strings_empty) {
303 continue;
304 }
305
306 SEXP col = VECTOR_ELT(x, i);
307
308 if (is_data_frame(col)) {
309 any_needs_splice = true;
310 break;
311 }
312 }
313
314 if (!any_needs_splice) {
315 UNPROTECT(1);
316 return x;
317 }
318
319 SEXP splice = PROTECT(r_new_logical(n_cols));
320 int* p_splice = LOGICAL(splice);
321
322 for (r_ssize j = 0; j < n_cols; ++j) {
323 p_splice[j] = 0;
324 }
325
326 r_ssize width = i;
327
328 for (; i < n_cols; ++i) {
329 // Only splice unnamed data frames
330 if (p_names[i] != strings_empty) {
331 ++width;
332 continue;
333 }
334
335 SEXP col = VECTOR_ELT(x, i);
336
337 if (is_data_frame(col)) {
338 width += r_length(col);
339 p_splice[i] = 1;
340 } else {
341 ++width;
342 }
343 }
344
345 SEXP out = PROTECT(r_new_list(width));
346 SEXP out_names = PROTECT(r_new_character(width));
347
348 r_ssize loc = 0;
349
350 // Splice loop
351 for (r_ssize i = 0; i < n_cols; ++i) {
352 if (!p_splice[i]) {
353 SET_VECTOR_ELT(out, loc, VECTOR_ELT(x, i));
354 SET_STRING_ELT(out_names, loc, p_names[i]);
355 ++loc;
356 continue;
357 }
358
359 SEXP col = VECTOR_ELT(x, i);
360 SEXP col_names = PROTECT(r_names(col));
361
362 if (TYPEOF(col_names) != STRSXP) {
363 stop_internal(
364 "df_splice",
365 "Encountered corrupt data frame. "
366 "Data frames must have character column names."
367 );
368 }
369
370 const SEXP* p_col_names = STRING_PTR_RO(col_names);
371 r_ssize col_i = 0;
372
373 r_ssize stop = loc + r_length(col);
374
375 for (; loc < stop; ++loc, ++col_i) {
376 SET_VECTOR_ELT(out, loc, VECTOR_ELT(col, col_i));
377 SET_STRING_ELT(out_names, loc, p_col_names[col_i]);
378 }
379
380 loc = stop;
381 UNPROTECT(1);
382 }
383
384 r_poke_names(out, out_names);
385
386 UNPROTECT(4);
387 return out;
388 }
389
390
391 // [[ include("type-data-frame.h") ]]
rownames_type(SEXP x)392 enum rownames_type rownames_type(SEXP x) {
393 switch (TYPEOF(x)) {
394 case STRSXP:
395 return ROWNAMES_IDENTIFIERS;
396 case INTSXP:
397 if (Rf_length(x) == 2 && INTEGER(x)[0] == NA_INTEGER) {
398 return ROWNAMES_AUTOMATIC_COMPACT;
399 } else {
400 return ROWNAMES_AUTOMATIC;
401 }
402 default:
403 Rf_error("Corrupt data in `rownames_type()`: Unexpected type `%s`.",
404 Rf_type2char(TYPEOF(x)));
405 }
406 }
407
compact_rownames_length(SEXP x)408 static R_len_t compact_rownames_length(SEXP x) {
409 return abs(INTEGER(x)[1]);
410 }
411
412 // [[ include("type-data-frame.h") ]]
rownames_size(SEXP rn)413 R_len_t rownames_size(SEXP rn) {
414 switch (rownames_type(rn)) {
415 case ROWNAMES_IDENTIFIERS:
416 case ROWNAMES_AUTOMATIC:
417 return Rf_length(rn);
418 case ROWNAMES_AUTOMATIC_COMPACT:
419 return compact_rownames_length(rn);
420 }
421
422 never_reached("rownames_size");
423 }
424
425 static void init_bare_data_frame(SEXP x, R_len_t n);
426
427 // [[ include("type-data-frame.h") ]]
init_data_frame(SEXP x,R_len_t n)428 void init_data_frame(SEXP x, R_len_t n) {
429 Rf_setAttrib(x, R_ClassSymbol, classes_data_frame);
430 init_bare_data_frame(x, n);
431 }
432 // [[ include("type-data-frame.h") ]]
init_tibble(SEXP x,R_len_t n)433 void init_tibble(SEXP x, R_len_t n) {
434 Rf_setAttrib(x, R_ClassSymbol, classes_tibble);
435 init_bare_data_frame(x, n);
436 }
437
init_bare_data_frame(SEXP x,R_len_t n)438 static void init_bare_data_frame(SEXP x, R_len_t n) {
439 if (Rf_length(x) == 0) {
440 Rf_setAttrib(x, R_NamesSymbol, vctrs_shared_empty_chr);
441 }
442
443 init_compact_rownames(x, n);
444 }
445
446 // [[ include("type-data-frame.h") ]]
init_compact_rownames(SEXP x,R_len_t n)447 void init_compact_rownames(SEXP x, R_len_t n) {
448 SEXP rn = PROTECT(new_compact_rownames(n));
449 Rf_setAttrib(x, R_RowNamesSymbol, rn);
450 UNPROTECT(1);
451 }
452
new_compact_rownames(R_len_t n)453 static SEXP new_compact_rownames(R_len_t n) {
454 if (n <= 0) {
455 return vctrs_shared_empty_int;
456 }
457
458 SEXP out = Rf_allocVector(INTSXP, 2);
459 int* out_data = INTEGER(out);
460 out_data[0] = NA_INTEGER;
461 out_data[1] = -n;
462 return out;
463 }
464
465
466 // vctrs type methods ------------------------------------------------
467
468 // [[ register() ]]
vctrs_df_ptype2_opts(SEXP x,SEXP y,SEXP opts,SEXP x_arg,SEXP y_arg)469 SEXP vctrs_df_ptype2_opts(SEXP x, SEXP y, SEXP opts, SEXP x_arg, SEXP y_arg) {
470 struct vctrs_arg c_x_arg = vec_as_arg(x_arg);
471 struct vctrs_arg c_y_arg = vec_as_arg(y_arg);
472
473 const struct ptype2_opts c_opts = new_ptype2_opts(x, y, &c_x_arg, &c_y_arg, opts);
474
475 return df_ptype2(&c_opts);
476 }
477
478 static
479 SEXP df_ptype2_match(const struct ptype2_opts* opts,
480 SEXP x_names,
481 SEXP y_names);
482
483 static
484 SEXP df_ptype2_loop(const struct ptype2_opts* opts,
485 SEXP y_names);
486
487 // [[ include("type-data-frame.h") ]]
df_ptype2(const struct ptype2_opts * opts)488 SEXP df_ptype2(const struct ptype2_opts* opts) {
489 SEXP x_names = PROTECT(r_names(opts->x));
490 SEXP y_names = PROTECT(r_names(opts->y));
491
492 SEXP out = R_NilValue;
493
494 if (equal_object(x_names, y_names)) {
495 out = df_ptype2_loop(opts, x_names);
496 } else {
497 out = df_ptype2_match(opts, x_names, y_names);
498 }
499
500 UNPROTECT(2);
501 return out;
502 }
503
df_ptype2_match(const struct ptype2_opts * opts,SEXP x_names,SEXP y_names)504 SEXP df_ptype2_match(const struct ptype2_opts* opts,
505 SEXP x_names,
506 SEXP y_names) {
507 SEXP x = opts->x;
508 SEXP y = opts->y;
509
510 SEXP x_dups_pos = PROTECT(vec_match(x_names, y_names));
511 SEXP y_dups_pos = PROTECT(vec_match(y_names, x_names));
512
513 int* x_dups_pos_data = INTEGER(x_dups_pos);
514 int* y_dups_pos_data = INTEGER(y_dups_pos);
515
516 R_len_t x_len = Rf_length(x_names);
517 R_len_t y_len = Rf_length(y_names);
518
519 // Count columns that are only in `y`
520 R_len_t rest_len = 0;
521 for (R_len_t i = 0; i < y_len; ++i) {
522 if (y_dups_pos_data[i] == NA_INTEGER) {
523 ++rest_len;
524 }
525 }
526
527 R_len_t out_len = x_len + rest_len;
528 SEXP out = PROTECT(Rf_allocVector(VECSXP, out_len));
529 SEXP nms = PROTECT(Rf_allocVector(STRSXP, out_len));
530 Rf_setAttrib(out, R_NamesSymbol, nms);
531
532 R_len_t i = 0;
533
534 // Fill in prototypes of all the columns that are in `x`, in order
535 for (; i < x_len; ++i) {
536 R_len_t dup = x_dups_pos_data[i];
537
538 struct arg_data_index x_arg_data = new_index_arg_data(r_chr_get_c_string(x_names, i),
539 opts->x_arg);
540 struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data);
541
542 SEXP col = VECTOR_ELT(x, i);
543 struct ptype2_opts col_opts = *opts;
544 col_opts.x = col;
545 col_opts.x_arg = &named_x_arg;
546
547 SEXP type;
548 if (dup == NA_INTEGER) {
549 col_opts.y = vctrs_shared_empty_uns;
550 col_opts.y_arg = NULL;
551 type = vec_ptype2_from_unspecified(&col_opts,
552 vec_typeof(col),
553 col,
554 &named_x_arg);
555 } else {
556 --dup; // 1-based index
557
558 struct arg_data_index y_arg_data = new_index_arg_data(r_chr_get_c_string(y_names, dup),
559 opts->y_arg);
560 struct vctrs_arg named_y_arg = new_index_arg(opts->y_arg, &y_arg_data);
561 col_opts.y = VECTOR_ELT(y, dup);
562 col_opts.y_arg = &named_y_arg;
563
564 int _left;
565 type = vec_ptype2_opts(&col_opts, &_left);
566 }
567
568 SET_VECTOR_ELT(out, i, type);
569 SET_STRING_ELT(nms, i, STRING_ELT(x_names, i));
570 }
571
572 // Fill in prototypes of the columns that are only in `y`
573 for (R_len_t j = 0; i < out_len; ++j) {
574 R_len_t dup = y_dups_pos_data[j];
575
576 if (dup == NA_INTEGER) {
577 SEXP col = VECTOR_ELT(y, j);
578
579 struct arg_data_index y_arg_data = new_index_arg_data(r_chr_get_c_string(y_names, j),
580 opts->y_arg);
581 struct vctrs_arg named_y_arg = new_index_arg(opts->y_arg, &y_arg_data);
582
583 struct ptype2_opts col_opts = *opts;
584 col_opts.y = col;
585 col_opts.y_arg = &named_y_arg;
586 col_opts.x = vctrs_shared_empty_uns;
587 col_opts.x_arg = NULL;
588 SEXP type = vec_ptype2_from_unspecified(&col_opts,
589 vec_typeof(col),
590 col,
591 &named_y_arg);
592
593 SET_VECTOR_ELT(out, i, type);
594 SET_STRING_ELT(nms, i, STRING_ELT(y_names, j));
595 ++i;
596 }
597 }
598
599 init_data_frame(out, 0);
600
601 UNPROTECT(4);
602 return out;
603 }
604
605 static
df_ptype2_loop(const struct ptype2_opts * opts,SEXP names)606 SEXP df_ptype2_loop(const struct ptype2_opts* opts,
607 SEXP names) {
608 SEXP x = opts->x;
609 SEXP y = opts->y;
610
611 R_len_t len = Rf_length(names);
612
613 SEXP out = PROTECT(Rf_allocVector(VECSXP, len));
614 Rf_setAttrib(out, R_NamesSymbol, names);
615
616 for (R_len_t i = 0; i < len; ++i) {
617 const char* name = r_chr_get_c_string(names, i);
618
619 struct arg_data_index x_arg_data = new_index_arg_data(name, opts->x_arg);
620 struct arg_data_index y_arg_data = new_index_arg_data(name, opts->y_arg);
621 struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data);
622 struct vctrs_arg named_y_arg = new_index_arg(opts->y_arg, &y_arg_data);
623
624 struct ptype2_opts col_opts = *opts;
625 col_opts.x = VECTOR_ELT(x, i);
626 col_opts.y = VECTOR_ELT(y, i);
627 col_opts.x_arg = &named_x_arg;
628 col_opts.y_arg = &named_y_arg;
629 int _left;
630
631 SEXP type = vec_ptype2_opts(&col_opts, &_left);
632
633 SET_VECTOR_ELT(out, i, type);
634 }
635
636 init_data_frame(out, 0);
637
638 UNPROTECT(1);
639 return out;
640 }
641
642 // [[ register() ]]
vctrs_df_cast_opts(SEXP x,SEXP to,SEXP opts,SEXP x_arg,SEXP to_arg)643 SEXP vctrs_df_cast_opts(SEXP x, SEXP to, SEXP opts, SEXP x_arg, SEXP to_arg) {
644 struct vctrs_arg c_x_arg = vec_as_arg(x_arg);
645 struct vctrs_arg c_to_arg = vec_as_arg(to_arg);
646
647 const struct cast_opts c_opts = new_cast_opts(x, to, &c_x_arg, &c_to_arg, opts);
648
649 return df_cast_opts(&c_opts);
650 }
651
652 static SEXP df_cast_match(const struct cast_opts* opts,
653 SEXP x_names,
654 SEXP to_names);
655
656 static SEXP df_cast_loop(const struct cast_opts* opts, SEXP names);
657
658 // Take all columns of `to` and preserve the order. Common columns are
659 // cast to their types in `to`. Extra `x` columns are dropped and
660 // cause a lossy cast. Extra `to` columns are filled with missing
661 // values.
662 // [[ include("cast.h") ]]
df_cast_opts(const struct cast_opts * opts)663 SEXP df_cast_opts(const struct cast_opts* opts) {
664 SEXP x_names = PROTECT(r_names(opts->x));
665 SEXP to_names = PROTECT(r_names(opts->to));
666
667 if (x_names == R_NilValue || to_names == R_NilValue) {
668 stop_internal("df_cast_opts", "Data frame must have names.");
669 }
670
671 SEXP out = R_NilValue;
672
673 if (equal_object(x_names, to_names)) {
674 out = df_cast_loop(opts, x_names);
675 } else {
676 out = df_cast_match(opts, x_names, to_names);
677 }
678
679 UNPROTECT(2);
680 return out;
681 }
682
df_cast_match(const struct cast_opts * opts,SEXP x_names,SEXP to_names)683 static SEXP df_cast_match(const struct cast_opts* opts,
684 SEXP x_names,
685 SEXP to_names) {
686 SEXP x = opts->x;
687 SEXP to = opts->to;
688
689 SEXP to_dups_pos = PROTECT(vec_match(to_names, x_names));
690 int* to_dups_pos_data = INTEGER(to_dups_pos);
691
692 R_len_t to_len = Rf_length(to_dups_pos);
693 SEXP out = PROTECT(Rf_allocVector(VECSXP, to_len));
694 Rf_setAttrib(out, R_NamesSymbol, to_names);
695
696 R_len_t size = df_size(x);
697 R_len_t common_len = 0;
698
699 for (R_len_t i = 0; i < to_len; ++i) {
700 R_len_t pos = to_dups_pos_data[i];
701
702 SEXP col;
703 if (pos == NA_INTEGER) {
704 SEXP to_col = VECTOR_ELT(to, i);
705 col = vec_init(to_col, size);
706
707 // FIXME: Need to initialise the vector because we currently use
708 // `vec_assign()` in `vec_rbind()` before falling back. Attach
709 // an attribute to recognise unspecified vectors in
710 // `base_c_invoke()`.
711 if (opts->fallback.s3 && vec_is_common_class_fallback(to_col)) {
712 PROTECT(col);
713 Rf_setAttrib(col, Rf_install("vctrs:::unspecified"), vctrs_shared_true);
714 UNPROTECT(1);
715 }
716 } else {
717 --pos; // 1-based index
718 ++common_len;
719
720 struct arg_data_index x_arg_data = new_index_arg_data(r_chr_get_c_string(x_names, pos), opts->x_arg);
721 struct arg_data_index to_arg_data = new_index_arg_data(r_chr_get_c_string(to_names, i), opts->to_arg);
722 struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data);
723 struct vctrs_arg named_to_arg = new_index_arg(opts->to_arg, &to_arg_data);
724
725 struct cast_opts col_opts = {
726 .x = VECTOR_ELT(x, pos),
727 .to = VECTOR_ELT(to, i),
728 .x_arg = &named_x_arg,
729 .to_arg = &named_to_arg,
730 .fallback = opts->fallback
731 };
732 col = vec_cast_opts(&col_opts);
733 }
734
735 SET_VECTOR_ELT(out, i, col);
736 }
737
738 // Restore data frame size before calling `vec_restore()`. `x` and
739 // `to` might not have any columns to compute the original size.
740 init_data_frame(out, size);
741 Rf_setAttrib(out, R_RowNamesSymbol, df_rownames(x));
742
743 R_len_t extra_len = Rf_length(x) - common_len;
744 if (extra_len) {
745 out = vctrs_dispatch3(syms_df_lossy_cast, fns_df_lossy_cast,
746 syms_out, out,
747 syms_x, x,
748 syms_to, to);
749 }
750
751 UNPROTECT(2);
752 return out;
753 }
754
df_cast_loop(const struct cast_opts * opts,SEXP names)755 static SEXP df_cast_loop(const struct cast_opts* opts, SEXP names) {
756 SEXP x = opts->x;
757 SEXP to = opts->to;
758
759 R_len_t len = Rf_length(names);
760
761 SEXP out = PROTECT(Rf_allocVector(VECSXP, len));
762 Rf_setAttrib(out, R_NamesSymbol, names);
763
764 R_len_t size = df_size(x);
765
766 for (R_len_t i = 0; i < len; ++i) {
767 const char* name = r_chr_get_c_string(names, i);
768
769 struct arg_data_index x_arg_data = new_index_arg_data(name, opts->x_arg);
770 struct arg_data_index to_arg_data = new_index_arg_data(name, opts->to_arg);
771 struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data);
772 struct vctrs_arg named_to_arg = new_index_arg(opts->to_arg, &to_arg_data);
773
774 struct cast_opts col_opts = {
775 .x = VECTOR_ELT(x, i),
776 .to = VECTOR_ELT(to, i),
777 .x_arg = &named_x_arg,
778 .to_arg = &named_to_arg,
779 .fallback = opts->fallback
780 };
781 SEXP col = vec_cast_opts(&col_opts);
782
783 SET_VECTOR_ELT(out, i, col);
784 }
785
786 // Restore data frame size before calling `vec_restore()`. `x` and
787 // `to` might not have any columns to compute the original size.
788 init_data_frame(out, size);
789 Rf_setAttrib(out, R_RowNamesSymbol, df_rownames(x));
790
791 UNPROTECT(1);
792 return out;
793 }
794
795 // If negative index, value is appended
df_poke(SEXP x,R_len_t i,SEXP value)796 SEXP df_poke(SEXP x, R_len_t i, SEXP value) {
797 if (i >= 0) {
798 SET_VECTOR_ELT(x, i, value);
799 return x;
800 }
801
802 R_len_t ncol = Rf_length(x);
803
804 SEXP tmp = PROTECT(r_resize(x, ncol + 1));
805 Rf_copyMostAttrib(x, tmp);
806 x = tmp;
807
808 SET_VECTOR_ELT(x, ncol, value);
809
810 UNPROTECT(1);
811 return x;
812 }
df_poke_at(SEXP x,SEXP name,SEXP value)813 SEXP df_poke_at(SEXP x, SEXP name, SEXP value) {
814 SEXP names = PROTECT(r_names(x));
815 R_len_t i = r_chr_find(names, name);
816 UNPROTECT(1);
817
818 x = PROTECT(df_poke(x, i, value));
819
820 if (i < 0) {
821 SEXP names = PROTECT(r_names(x));
822 SET_STRING_ELT(names, Rf_length(x) - 1, name);
823 UNPROTECT(1);
824 }
825
826 UNPROTECT(1);
827 return x;
828 }
829
830 static inline
df_flat_width(SEXP x)831 R_len_t df_flat_width(SEXP x) {
832 R_len_t n = Rf_length(x);
833 R_len_t out = n;
834
835 const SEXP* v_x = VECTOR_PTR_RO(x);
836
837 for (R_len_t i = 0; i < n; ++i) {
838 SEXP col = v_x[i];
839 if (is_data_frame(col)) {
840 out = out + df_flat_width(col) - 1;
841 }
842 }
843
844 return out;
845 }
846
847 struct flatten_info {
848 bool flatten;
849 R_len_t width;
850 };
851
852 static inline
df_flatten_info(SEXP x)853 struct flatten_info df_flatten_info(SEXP x) {
854 bool flatten = false;
855
856 R_len_t n = Rf_length(x);
857 R_len_t width = n;
858
859 const SEXP* v_x = VECTOR_PTR_RO(x);
860
861 for (R_len_t i = 0; i < n; ++i) {
862 SEXP col = v_x[i];
863 if (is_data_frame(col)) {
864 flatten = true;
865 width = width + df_flat_width(col) - 1;
866 }
867 }
868
869 return (struct flatten_info){flatten, width};
870 }
871
872 // [[ register() ]]
vctrs_df_flatten_info(SEXP x)873 SEXP vctrs_df_flatten_info(SEXP x) {
874 struct flatten_info info = df_flatten_info(x);
875
876 SEXP out = PROTECT(Rf_allocVector(VECSXP, 2));
877 SET_VECTOR_ELT(out, 0, r_lgl(info.flatten));
878 SET_VECTOR_ELT(out, 1, r_int(info.width));
879
880 UNPROTECT(1);
881 return out;
882 }
883
884 static R_len_t df_flatten_loop(SEXP x, SEXP out, SEXP out_names, R_len_t counter);
885
886 // Might return duplicate names. Currently only used for equality
887 // proxy so this doesn't matter. A less bare bone version would repair
888 // names.
889 //
890 // [[ register(); include("type-data-frame.h") ]]
df_flatten(SEXP x)891 SEXP df_flatten(SEXP x) {
892 struct flatten_info info = df_flatten_info(x);
893
894 if (!info.flatten) {
895 return x;
896 }
897
898 SEXP out = PROTECT(Rf_allocVector(VECSXP, info.width));
899 SEXP out_names = PROTECT(Rf_allocVector(STRSXP, info.width));
900 r_poke_names(out, out_names);
901
902 df_flatten_loop(x, out, out_names, 0);
903 init_data_frame(out, df_size(x));
904
905 UNPROTECT(2);
906 return out;
907 }
908
df_flatten_loop(SEXP x,SEXP out,SEXP out_names,R_len_t counter)909 static R_len_t df_flatten_loop(SEXP x, SEXP out, SEXP out_names, R_len_t counter) {
910 R_len_t n = Rf_length(x);
911 SEXP x_names = PROTECT(r_names(x));
912
913 for (R_len_t i = 0; i < n; ++i) {
914 SEXP col = VECTOR_ELT(x, i);
915
916 if (is_data_frame(col)) {
917 counter = df_flatten_loop(col, out, out_names, counter);
918 } else {
919 SET_VECTOR_ELT(out, counter, col);
920 SET_STRING_ELT(out_names, counter, STRING_ELT(x_names, i));
921 ++counter;
922 }
923 }
924
925 UNPROTECT(1);
926 return counter;
927 }
928
df_repair_names(SEXP x,struct name_repair_opts * name_repair)929 SEXP df_repair_names(SEXP x, struct name_repair_opts* name_repair) {
930 SEXP nms = PROTECT(r_names(x));
931 SEXP repaired = PROTECT(vec_as_names(nms, name_repair));
932
933 // Should this go through proxy and restore so that classes can
934 // update metadata and check invariants when special columns are
935 // renamed?
936 if (nms != repaired) {
937 x = PROTECT(r_clone_referenced(x));
938 r_poke_names(x, repaired);
939 UNPROTECT(1);
940 }
941
942 UNPROTECT(2);
943 return x;
944 }
945
946
vctrs_init_type_data_frame(SEXP ns)947 void vctrs_init_type_data_frame(SEXP ns) {
948 syms_df_lossy_cast = Rf_install("df_lossy_cast");
949 fns_df_lossy_cast = Rf_findVar(syms_df_lossy_cast, ns);
950 }
951