1 #include "vctrs.h"
2 #include "ptype-common.h"
3 #include "ptype2.h"
4 #include "type-data-frame.h"
5 #include "utils.h"
6 
7 static SEXP syms_df_lossy_cast = NULL;
8 static SEXP fns_df_lossy_cast = NULL;
9 
10 static SEXP new_compact_rownames(R_len_t n);
11 
12 
13 // [[ include("type-data-frame.h") ]]
is_data_frame(SEXP x)14 bool is_data_frame(SEXP x) {
15   if (TYPEOF(x) != VECSXP) {
16     return false;
17   }
18 
19   enum vctrs_class_type type = class_type(x);
20   return
21     type == vctrs_class_bare_data_frame ||
22     type == vctrs_class_bare_tibble ||
23     type == vctrs_class_data_frame;
24 }
25 
26 // [[ include("type-data-frame.h") ]]
is_native_df(SEXP x)27 bool is_native_df(SEXP x) {
28   enum vctrs_class_type type = class_type(x);
29   return
30     type == vctrs_class_bare_data_frame ||
31     type == vctrs_class_bare_tibble;
32 }
33 
34 // [[ include("type-data-frame.h") ]]
is_bare_data_frame(SEXP x)35 bool is_bare_data_frame(SEXP x) {
36   return class_type(x) == vctrs_class_bare_data_frame;
37 }
38 
39 // [[ include("type-data-frame.h") ]]
is_bare_tibble(SEXP x)40 bool is_bare_tibble(SEXP x) {
41   return class_type(x) == vctrs_class_bare_tibble;
42 }
43 
44 // [[ include("type-data-frame.h") ]]
new_data_frame(SEXP x,R_len_t n)45 SEXP new_data_frame(SEXP x, R_len_t n) {
46   x = PROTECT(r_clone_referenced(x));
47   init_data_frame(x, n);
48 
49   UNPROTECT(1);
50   return x;
51 }
52 
53 static R_len_t df_size_from_list(SEXP x, SEXP n);
54 static R_len_t df_size_from_n(SEXP n);
55 static SEXP c_data_frame_class(SEXP cls);
56 
57 // [[ register() ]]
vctrs_new_data_frame(SEXP args)58 SEXP vctrs_new_data_frame(SEXP args) {
59   args = CDR(args);
60 
61   SEXP x = CAR(args); args = CDR(args);
62   SEXP n = CAR(args); args = CDR(args);
63   SEXP cls = CAR(args); args = CDR(args);
64   SEXP attrib = args;
65 
66   PROTECT_INDEX pi;
67   PROTECT_WITH_INDEX(attrib, &pi);
68 
69   if (TYPEOF(x) != VECSXP) {
70     Rf_errorcall(R_NilValue, "`x` must be a list");
71   }
72 
73   bool has_names = false;
74   bool has_rownames = false;
75   R_len_t size = df_size_from_list(x, n);
76 
77   SEXP out = PROTECT(r_clone_referenced(x));
78 
79   for (SEXP node = attrib; node != R_NilValue; node = CDR(node)) {
80     SEXP tag = TAG(node);
81 
82     // We might add dynamic dots later on
83     if (tag == R_ClassSymbol) {
84       stop_internal("new_data_frame", "Can't supply `class` in `...`.");
85     }
86 
87     if (tag == R_NamesSymbol) {
88       has_names = true;
89       continue;
90     }
91 
92     if (tag == R_RowNamesSymbol) {
93       // "row.names" is checked for consistency with n (if provided)
94       if (size != rownames_size(CAR(node)) && n != R_NilValue) {
95         Rf_errorcall(R_NilValue, "`n` and `row.names` must be consistent.");
96       }
97 
98       has_rownames = true;
99       continue;
100     }
101   }
102 
103   // Take names from `x` if `attrib` doesn't have any
104   if (!has_names) {
105     SEXP nms = vctrs_shared_empty_chr;
106     if (Rf_length(out)) {
107       nms = r_names(out);
108     }
109     PROTECT(nms);
110 
111     if (nms != R_NilValue) {
112       attrib = Rf_cons(nms, attrib);
113       SET_TAG(attrib, R_NamesSymbol);
114       REPROTECT(attrib, pi);
115     }
116 
117     UNPROTECT(1);
118   }
119 
120   if (!has_rownames) {
121     SEXP rn = PROTECT(new_compact_rownames(size));
122     attrib = Rf_cons(rn, attrib);
123     SET_TAG(attrib, R_RowNamesSymbol);
124 
125     UNPROTECT(1);
126     REPROTECT(attrib, pi);
127   }
128 
129   if (cls == R_NilValue) {
130     cls = classes_data_frame;
131   } else {
132     cls = c_data_frame_class(cls);
133   }
134   PROTECT(cls);
135 
136   attrib = Rf_cons(cls, attrib);
137   SET_TAG(attrib, R_ClassSymbol);
138 
139   UNPROTECT(1);
140   REPROTECT(attrib, pi);
141 
142 
143   SET_ATTRIB(out, attrib);
144   SET_OBJECT(out, 1);
145 
146   UNPROTECT(2);
147   return out;
148 }
149 
df_size_from_list(SEXP x,SEXP n)150 static R_len_t df_size_from_list(SEXP x, SEXP n) {
151   if (n == R_NilValue) {
152     if (is_data_frame(x)) {
153       return df_size(x);
154     } else {
155       return df_raw_size_from_list(x);
156     }
157   } else {
158     return df_size_from_n(n);
159   }
160 }
161 
df_size_from_n(SEXP n)162 static R_len_t df_size_from_n(SEXP n) {
163   if (TYPEOF(n) != INTSXP || Rf_length(n) != 1) {
164     Rf_errorcall(R_NilValue, "`n` must be an integer of size 1");
165   }
166 
167   return r_int_get(n, 0);
168 }
169 
c_data_frame_class(SEXP cls)170 static SEXP c_data_frame_class(SEXP cls) {
171   if (TYPEOF(cls) != STRSXP) {
172     Rf_errorcall(R_NilValue, "`class` must be NULL or a character vector");
173   }
174   return chr_c(cls, classes_data_frame);
175 }
176 
177 
178 SEXP data_frame(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts);
179 
180 // [[ register() ]]
vctrs_data_frame(SEXP x,SEXP size,SEXP name_repair)181 SEXP vctrs_data_frame(SEXP x, SEXP size, SEXP name_repair) {
182   struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, args_empty, false);
183   PROTECT_NAME_REPAIR_OPTS(&name_repair_opts);
184 
185   r_ssize c_size = 0;
186   if (size == R_NilValue) {
187     c_size = vec_size_common(x, 0);
188   } else {
189     c_size = size_validate(size, ".size");
190   }
191 
192   SEXP out = data_frame(x, c_size, &name_repair_opts);
193 
194   UNPROTECT(1);
195   return out;
196 }
197 
198 SEXP df_list(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts);
199 
data_frame(SEXP x,r_ssize size,const struct name_repair_opts * p_name_repair_opts)200 SEXP data_frame(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts) {
201   SEXP out = PROTECT(df_list(x, size, p_name_repair_opts));
202   out = new_data_frame(out, size);
203   UNPROTECT(1);
204   return out;
205 }
206 
207 
208 // [[ register() ]]
vctrs_df_list(SEXP x,SEXP size,SEXP name_repair)209 SEXP vctrs_df_list(SEXP x, SEXP size, SEXP name_repair) {
210   struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, args_empty, false);
211   PROTECT_NAME_REPAIR_OPTS(&name_repair_opts);
212 
213   r_ssize c_size = 0;
214   if (size == R_NilValue) {
215     c_size = vec_size_common(x, 0);
216   } else {
217     c_size = size_validate(size, ".size");
218   }
219 
220   SEXP out = df_list(x, c_size, &name_repair_opts);
221 
222   UNPROTECT(1);
223   return out;
224 }
225 
226 static SEXP df_list_drop_null(SEXP x);
227 static SEXP df_list_splice(SEXP x);
228 
df_list(SEXP x,r_ssize size,const struct name_repair_opts * p_name_repair_opts)229 SEXP df_list(SEXP x, r_ssize size, const struct name_repair_opts* p_name_repair_opts) {
230   if (TYPEOF(x) != VECSXP) {
231     stop_internal("df_list", "`x` must be a list.");
232   }
233 
234   x = PROTECT(vec_recycle_common(x, size));
235 
236   r_ssize n_cols = r_length(x);
237 
238   // Unnamed columns are auto-named with `""`
239   if (r_names(x) == R_NilValue) {
240     SEXP names = PROTECT(r_new_character(n_cols));
241     r_poke_names(x, names);
242     UNPROTECT(1);
243   }
244 
245   x = PROTECT(df_list_drop_null(x));
246   x = PROTECT(df_list_splice(x));
247 
248   SEXP names = PROTECT(r_names(x));
249   names = PROTECT(vec_as_names(names, p_name_repair_opts));
250   r_poke_names(x, names);
251 
252   UNPROTECT(5);
253   return x;
254 }
255 
df_list_drop_null(SEXP x)256 static SEXP df_list_drop_null(SEXP x) {
257   r_ssize n_cols = r_length(x);
258   r_ssize count = 0;
259 
260   for (r_ssize i = 0; i < n_cols; ++i) {
261     count += VECTOR_ELT(x, i) == R_NilValue;
262   }
263 
264   if (count == 0) {
265     return x;
266   }
267 
268   SEXP names = PROTECT(r_names(x));
269   const SEXP* p_names = STRING_PTR_RO(names);
270 
271   r_ssize n_out = n_cols - count;
272   SEXP out = PROTECT(Rf_allocVector(VECSXP, n_out));
273   SEXP out_names = PROTECT(Rf_allocVector(STRSXP, n_out));
274   r_ssize out_i = 0;
275 
276   for (r_ssize i = 0; i < n_cols; ++i) {
277     SEXP col = VECTOR_ELT(x, i);
278 
279     if (col != R_NilValue) {
280       SET_VECTOR_ELT(out, out_i, col);
281       SET_STRING_ELT(out_names, out_i, p_names[i]);
282       ++out_i;
283     }
284   }
285 
286   r_poke_names(out, out_names);
287 
288   UNPROTECT(3);
289   return out;
290 }
291 
df_list_splice(SEXP x)292 static SEXP df_list_splice(SEXP x) {
293   SEXP names = PROTECT(r_names(x));
294   const SEXP* p_names = STRING_PTR_RO(names);
295 
296   bool any_needs_splice = false;
297   r_ssize n_cols = r_length(x);
298   r_ssize i = 0;
299 
300   for (; i < n_cols; ++i) {
301     // Only splice unnamed data frames
302     if (p_names[i] != strings_empty) {
303       continue;
304     }
305 
306     SEXP col = VECTOR_ELT(x, i);
307 
308     if (is_data_frame(col)) {
309       any_needs_splice = true;
310       break;
311     }
312   }
313 
314   if (!any_needs_splice) {
315     UNPROTECT(1);
316     return x;
317   }
318 
319   SEXP splice = PROTECT(r_new_logical(n_cols));
320   int* p_splice = LOGICAL(splice);
321 
322   for (r_ssize j = 0; j < n_cols; ++j) {
323     p_splice[j] = 0;
324   }
325 
326   r_ssize width = i;
327 
328   for (; i < n_cols; ++i) {
329     // Only splice unnamed data frames
330     if (p_names[i] != strings_empty) {
331       ++width;
332       continue;
333     }
334 
335     SEXP col = VECTOR_ELT(x, i);
336 
337     if (is_data_frame(col)) {
338       width += r_length(col);
339       p_splice[i] = 1;
340     } else {
341       ++width;
342     }
343   }
344 
345   SEXP out = PROTECT(r_new_list(width));
346   SEXP out_names = PROTECT(r_new_character(width));
347 
348   r_ssize loc = 0;
349 
350   // Splice loop
351   for (r_ssize i = 0; i < n_cols; ++i) {
352     if (!p_splice[i]) {
353       SET_VECTOR_ELT(out, loc, VECTOR_ELT(x, i));
354       SET_STRING_ELT(out_names, loc, p_names[i]);
355       ++loc;
356       continue;
357     }
358 
359     SEXP col = VECTOR_ELT(x, i);
360     SEXP col_names = PROTECT(r_names(col));
361 
362     if (TYPEOF(col_names) != STRSXP) {
363       stop_internal(
364         "df_splice",
365         "Encountered corrupt data frame. "
366         "Data frames must have character column names."
367       );
368     }
369 
370     const SEXP* p_col_names = STRING_PTR_RO(col_names);
371     r_ssize col_i = 0;
372 
373     r_ssize stop = loc + r_length(col);
374 
375     for (; loc < stop; ++loc, ++col_i) {
376       SET_VECTOR_ELT(out, loc, VECTOR_ELT(col, col_i));
377       SET_STRING_ELT(out_names, loc, p_col_names[col_i]);
378     }
379 
380     loc = stop;
381     UNPROTECT(1);
382   }
383 
384   r_poke_names(out, out_names);
385 
386   UNPROTECT(4);
387   return out;
388 }
389 
390 
391 // [[ include("type-data-frame.h") ]]
rownames_type(SEXP x)392 enum rownames_type rownames_type(SEXP x) {
393   switch (TYPEOF(x)) {
394   case STRSXP:
395     return ROWNAMES_IDENTIFIERS;
396   case INTSXP:
397     if (Rf_length(x) == 2 && INTEGER(x)[0] == NA_INTEGER) {
398       return ROWNAMES_AUTOMATIC_COMPACT;
399     } else {
400       return ROWNAMES_AUTOMATIC;
401     }
402   default:
403     Rf_error("Corrupt data in `rownames_type()`: Unexpected type `%s`.",
404              Rf_type2char(TYPEOF(x)));
405   }
406 }
407 
compact_rownames_length(SEXP x)408 static R_len_t compact_rownames_length(SEXP x) {
409   return abs(INTEGER(x)[1]);
410 }
411 
412 // [[ include("type-data-frame.h") ]]
rownames_size(SEXP rn)413 R_len_t rownames_size(SEXP rn) {
414   switch (rownames_type(rn)) {
415   case ROWNAMES_IDENTIFIERS:
416   case ROWNAMES_AUTOMATIC:
417     return Rf_length(rn);
418   case ROWNAMES_AUTOMATIC_COMPACT:
419     return compact_rownames_length(rn);
420   }
421 
422   never_reached("rownames_size");
423 }
424 
425 static void init_bare_data_frame(SEXP x, R_len_t n);
426 
427 // [[ include("type-data-frame.h") ]]
init_data_frame(SEXP x,R_len_t n)428 void init_data_frame(SEXP x, R_len_t n) {
429   Rf_setAttrib(x, R_ClassSymbol, classes_data_frame);
430   init_bare_data_frame(x, n);
431 }
432 // [[ include("type-data-frame.h") ]]
init_tibble(SEXP x,R_len_t n)433 void init_tibble(SEXP x, R_len_t n) {
434   Rf_setAttrib(x, R_ClassSymbol, classes_tibble);
435   init_bare_data_frame(x, n);
436 }
437 
init_bare_data_frame(SEXP x,R_len_t n)438 static void init_bare_data_frame(SEXP x, R_len_t n) {
439   if (Rf_length(x) == 0) {
440     Rf_setAttrib(x, R_NamesSymbol, vctrs_shared_empty_chr);
441   }
442 
443   init_compact_rownames(x, n);
444 }
445 
446 // [[ include("type-data-frame.h") ]]
init_compact_rownames(SEXP x,R_len_t n)447 void init_compact_rownames(SEXP x, R_len_t n) {
448   SEXP rn = PROTECT(new_compact_rownames(n));
449   Rf_setAttrib(x, R_RowNamesSymbol, rn);
450   UNPROTECT(1);
451 }
452 
new_compact_rownames(R_len_t n)453 static SEXP new_compact_rownames(R_len_t n) {
454   if (n <= 0) {
455     return vctrs_shared_empty_int;
456   }
457 
458   SEXP out = Rf_allocVector(INTSXP, 2);
459   int* out_data = INTEGER(out);
460   out_data[0] = NA_INTEGER;
461   out_data[1] = -n;
462   return out;
463 }
464 
465 
466 // vctrs type methods ------------------------------------------------
467 
468 // [[ register() ]]
vctrs_df_ptype2_opts(SEXP x,SEXP y,SEXP opts,SEXP x_arg,SEXP y_arg)469 SEXP vctrs_df_ptype2_opts(SEXP x, SEXP y, SEXP opts, SEXP x_arg, SEXP y_arg) {
470   struct vctrs_arg c_x_arg = vec_as_arg(x_arg);
471   struct vctrs_arg c_y_arg = vec_as_arg(y_arg);
472 
473   const struct ptype2_opts c_opts = new_ptype2_opts(x, y, &c_x_arg, &c_y_arg, opts);
474 
475   return df_ptype2(&c_opts);
476 }
477 
478 static
479 SEXP df_ptype2_match(const struct ptype2_opts* opts,
480                      SEXP x_names,
481                      SEXP y_names);
482 
483 static
484 SEXP df_ptype2_loop(const struct ptype2_opts* opts,
485                     SEXP y_names);
486 
487 // [[ include("type-data-frame.h") ]]
df_ptype2(const struct ptype2_opts * opts)488 SEXP df_ptype2(const struct ptype2_opts* opts) {
489   SEXP x_names = PROTECT(r_names(opts->x));
490   SEXP y_names = PROTECT(r_names(opts->y));
491 
492   SEXP out = R_NilValue;
493 
494   if (equal_object(x_names, y_names)) {
495     out = df_ptype2_loop(opts, x_names);
496   } else {
497     out = df_ptype2_match(opts, x_names, y_names);
498   }
499 
500   UNPROTECT(2);
501   return out;
502 }
503 
df_ptype2_match(const struct ptype2_opts * opts,SEXP x_names,SEXP y_names)504 SEXP df_ptype2_match(const struct ptype2_opts* opts,
505                      SEXP x_names,
506                      SEXP y_names) {
507   SEXP x = opts->x;
508   SEXP y = opts->y;
509 
510   SEXP x_dups_pos = PROTECT(vec_match(x_names, y_names));
511   SEXP y_dups_pos = PROTECT(vec_match(y_names, x_names));
512 
513   int* x_dups_pos_data = INTEGER(x_dups_pos);
514   int* y_dups_pos_data = INTEGER(y_dups_pos);
515 
516   R_len_t x_len = Rf_length(x_names);
517   R_len_t y_len = Rf_length(y_names);
518 
519   // Count columns that are only in `y`
520   R_len_t rest_len = 0;
521   for (R_len_t i = 0; i < y_len; ++i) {
522     if (y_dups_pos_data[i] == NA_INTEGER) {
523       ++rest_len;
524     }
525   }
526 
527   R_len_t out_len = x_len + rest_len;
528   SEXP out = PROTECT(Rf_allocVector(VECSXP, out_len));
529   SEXP nms = PROTECT(Rf_allocVector(STRSXP, out_len));
530   Rf_setAttrib(out, R_NamesSymbol, nms);
531 
532   R_len_t i = 0;
533 
534   // Fill in prototypes of all the columns that are in `x`, in order
535   for (; i < x_len; ++i) {
536     R_len_t dup = x_dups_pos_data[i];
537 
538     struct arg_data_index x_arg_data = new_index_arg_data(r_chr_get_c_string(x_names, i),
539                                                           opts->x_arg);
540     struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data);
541 
542     SEXP col = VECTOR_ELT(x, i);
543     struct ptype2_opts col_opts = *opts;
544     col_opts.x = col;
545     col_opts.x_arg = &named_x_arg;
546 
547     SEXP type;
548     if (dup == NA_INTEGER) {
549       col_opts.y = vctrs_shared_empty_uns;
550       col_opts.y_arg = NULL;
551       type = vec_ptype2_from_unspecified(&col_opts,
552                                          vec_typeof(col),
553                                          col,
554                                          &named_x_arg);
555     } else {
556       --dup; // 1-based index
557 
558       struct arg_data_index y_arg_data = new_index_arg_data(r_chr_get_c_string(y_names, dup),
559                                                             opts->y_arg);
560       struct vctrs_arg named_y_arg = new_index_arg(opts->y_arg, &y_arg_data);
561       col_opts.y = VECTOR_ELT(y, dup);
562       col_opts.y_arg = &named_y_arg;
563 
564       int _left;
565       type = vec_ptype2_opts(&col_opts, &_left);
566     }
567 
568     SET_VECTOR_ELT(out, i, type);
569     SET_STRING_ELT(nms, i, STRING_ELT(x_names, i));
570   }
571 
572   // Fill in prototypes of the columns that are only in `y`
573   for (R_len_t j = 0; i < out_len; ++j) {
574     R_len_t dup = y_dups_pos_data[j];
575 
576     if (dup == NA_INTEGER) {
577       SEXP col = VECTOR_ELT(y, j);
578 
579       struct arg_data_index y_arg_data = new_index_arg_data(r_chr_get_c_string(y_names, j),
580                                                             opts->y_arg);
581       struct vctrs_arg named_y_arg = new_index_arg(opts->y_arg, &y_arg_data);
582 
583       struct ptype2_opts col_opts = *opts;
584       col_opts.y = col;
585       col_opts.y_arg = &named_y_arg;
586       col_opts.x = vctrs_shared_empty_uns;
587       col_opts.x_arg = NULL;
588       SEXP type = vec_ptype2_from_unspecified(&col_opts,
589                                               vec_typeof(col),
590                                               col,
591                                               &named_y_arg);
592 
593       SET_VECTOR_ELT(out, i, type);
594       SET_STRING_ELT(nms, i, STRING_ELT(y_names, j));
595       ++i;
596     }
597   }
598 
599   init_data_frame(out, 0);
600 
601   UNPROTECT(4);
602   return out;
603 }
604 
605 static
df_ptype2_loop(const struct ptype2_opts * opts,SEXP names)606 SEXP df_ptype2_loop(const struct ptype2_opts* opts,
607                     SEXP names) {
608   SEXP x = opts->x;
609   SEXP y = opts->y;
610 
611   R_len_t len = Rf_length(names);
612 
613   SEXP out = PROTECT(Rf_allocVector(VECSXP, len));
614   Rf_setAttrib(out, R_NamesSymbol, names);
615 
616   for (R_len_t i = 0; i < len; ++i) {
617     const char* name = r_chr_get_c_string(names, i);
618 
619     struct arg_data_index x_arg_data = new_index_arg_data(name, opts->x_arg);
620     struct arg_data_index y_arg_data = new_index_arg_data(name, opts->y_arg);
621     struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data);
622     struct vctrs_arg named_y_arg = new_index_arg(opts->y_arg, &y_arg_data);
623 
624     struct ptype2_opts col_opts = *opts;
625     col_opts.x = VECTOR_ELT(x, i);
626     col_opts.y = VECTOR_ELT(y, i);
627     col_opts.x_arg = &named_x_arg;
628     col_opts.y_arg = &named_y_arg;
629     int _left;
630 
631     SEXP type = vec_ptype2_opts(&col_opts, &_left);
632 
633     SET_VECTOR_ELT(out, i, type);
634   }
635 
636   init_data_frame(out, 0);
637 
638   UNPROTECT(1);
639   return out;
640 }
641 
642 // [[ register() ]]
vctrs_df_cast_opts(SEXP x,SEXP to,SEXP opts,SEXP x_arg,SEXP to_arg)643 SEXP vctrs_df_cast_opts(SEXP x, SEXP to, SEXP opts, SEXP x_arg, SEXP to_arg) {
644   struct vctrs_arg c_x_arg = vec_as_arg(x_arg);
645   struct vctrs_arg c_to_arg = vec_as_arg(to_arg);
646 
647   const struct cast_opts c_opts = new_cast_opts(x, to, &c_x_arg, &c_to_arg, opts);
648 
649   return df_cast_opts(&c_opts);
650 }
651 
652 static SEXP df_cast_match(const struct cast_opts* opts,
653                           SEXP x_names,
654                           SEXP to_names);
655 
656 static SEXP df_cast_loop(const struct cast_opts* opts, SEXP names);
657 
658 // Take all columns of `to` and preserve the order. Common columns are
659 // cast to their types in `to`. Extra `x` columns are dropped and
660 // cause a lossy cast. Extra `to` columns are filled with missing
661 // values.
662 // [[ include("cast.h") ]]
df_cast_opts(const struct cast_opts * opts)663 SEXP df_cast_opts(const struct cast_opts* opts) {
664   SEXP x_names = PROTECT(r_names(opts->x));
665   SEXP to_names = PROTECT(r_names(opts->to));
666 
667   if (x_names == R_NilValue || to_names == R_NilValue) {
668     stop_internal("df_cast_opts", "Data frame must have names.");
669   }
670 
671   SEXP out = R_NilValue;
672 
673   if (equal_object(x_names, to_names)) {
674     out = df_cast_loop(opts, x_names);
675   } else {
676     out = df_cast_match(opts, x_names, to_names);
677   }
678 
679   UNPROTECT(2);
680   return out;
681 }
682 
df_cast_match(const struct cast_opts * opts,SEXP x_names,SEXP to_names)683 static SEXP df_cast_match(const struct cast_opts* opts,
684                           SEXP x_names,
685                           SEXP to_names) {
686   SEXP x = opts->x;
687   SEXP to = opts->to;
688 
689   SEXP to_dups_pos = PROTECT(vec_match(to_names, x_names));
690   int* to_dups_pos_data = INTEGER(to_dups_pos);
691 
692   R_len_t to_len = Rf_length(to_dups_pos);
693   SEXP out = PROTECT(Rf_allocVector(VECSXP, to_len));
694   Rf_setAttrib(out, R_NamesSymbol, to_names);
695 
696   R_len_t size = df_size(x);
697   R_len_t common_len = 0;
698 
699   for (R_len_t i = 0; i < to_len; ++i) {
700     R_len_t pos = to_dups_pos_data[i];
701 
702     SEXP col;
703     if (pos == NA_INTEGER) {
704       SEXP to_col = VECTOR_ELT(to, i);
705       col = vec_init(to_col, size);
706 
707       // FIXME: Need to initialise the vector because we currently use
708       // `vec_assign()` in `vec_rbind()` before falling back. Attach
709       // an attribute to recognise unspecified vectors in
710       // `base_c_invoke()`.
711       if (opts->fallback.s3 && vec_is_common_class_fallback(to_col)) {
712         PROTECT(col);
713         Rf_setAttrib(col, Rf_install("vctrs:::unspecified"), vctrs_shared_true);
714         UNPROTECT(1);
715       }
716     } else {
717       --pos; // 1-based index
718       ++common_len;
719 
720       struct arg_data_index x_arg_data = new_index_arg_data(r_chr_get_c_string(x_names, pos), opts->x_arg);
721       struct arg_data_index to_arg_data = new_index_arg_data(r_chr_get_c_string(to_names, i), opts->to_arg);
722       struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data);
723       struct vctrs_arg named_to_arg = new_index_arg(opts->to_arg, &to_arg_data);
724 
725       struct cast_opts col_opts = {
726         .x = VECTOR_ELT(x, pos),
727         .to = VECTOR_ELT(to, i),
728         .x_arg = &named_x_arg,
729         .to_arg = &named_to_arg,
730         .fallback = opts->fallback
731       };
732       col = vec_cast_opts(&col_opts);
733     }
734 
735     SET_VECTOR_ELT(out, i, col);
736   }
737 
738   // Restore data frame size before calling `vec_restore()`. `x` and
739   // `to` might not have any columns to compute the original size.
740   init_data_frame(out, size);
741   Rf_setAttrib(out, R_RowNamesSymbol, df_rownames(x));
742 
743   R_len_t extra_len = Rf_length(x) - common_len;
744   if (extra_len) {
745     out = vctrs_dispatch3(syms_df_lossy_cast, fns_df_lossy_cast,
746                           syms_out, out,
747                           syms_x, x,
748                           syms_to, to);
749   }
750 
751   UNPROTECT(2);
752   return out;
753 }
754 
df_cast_loop(const struct cast_opts * opts,SEXP names)755 static SEXP df_cast_loop(const struct cast_opts* opts, SEXP names) {
756   SEXP x = opts->x;
757   SEXP to = opts->to;
758 
759   R_len_t len = Rf_length(names);
760 
761   SEXP out = PROTECT(Rf_allocVector(VECSXP, len));
762   Rf_setAttrib(out, R_NamesSymbol, names);
763 
764   R_len_t size = df_size(x);
765 
766   for (R_len_t i = 0; i < len; ++i) {
767     const char* name = r_chr_get_c_string(names, i);
768 
769     struct arg_data_index x_arg_data = new_index_arg_data(name, opts->x_arg);
770     struct arg_data_index to_arg_data = new_index_arg_data(name, opts->to_arg);
771     struct vctrs_arg named_x_arg = new_index_arg(opts->x_arg, &x_arg_data);
772     struct vctrs_arg named_to_arg = new_index_arg(opts->to_arg, &to_arg_data);
773 
774     struct cast_opts col_opts = {
775       .x = VECTOR_ELT(x, i),
776       .to = VECTOR_ELT(to, i),
777       .x_arg = &named_x_arg,
778       .to_arg = &named_to_arg,
779       .fallback = opts->fallback
780     };
781     SEXP col = vec_cast_opts(&col_opts);
782 
783     SET_VECTOR_ELT(out, i, col);
784   }
785 
786   // Restore data frame size before calling `vec_restore()`. `x` and
787   // `to` might not have any columns to compute the original size.
788   init_data_frame(out, size);
789   Rf_setAttrib(out, R_RowNamesSymbol, df_rownames(x));
790 
791   UNPROTECT(1);
792   return out;
793 }
794 
795 // If negative index, value is appended
df_poke(SEXP x,R_len_t i,SEXP value)796 SEXP df_poke(SEXP x, R_len_t i, SEXP value) {
797   if (i >= 0) {
798     SET_VECTOR_ELT(x, i, value);
799     return x;
800   }
801 
802   R_len_t ncol = Rf_length(x);
803 
804   SEXP tmp = PROTECT(r_resize(x, ncol + 1));
805   Rf_copyMostAttrib(x, tmp);
806   x = tmp;
807 
808   SET_VECTOR_ELT(x, ncol, value);
809 
810   UNPROTECT(1);
811   return x;
812 }
df_poke_at(SEXP x,SEXP name,SEXP value)813 SEXP df_poke_at(SEXP x, SEXP name, SEXP value) {
814   SEXP names = PROTECT(r_names(x));
815   R_len_t i = r_chr_find(names, name);
816   UNPROTECT(1);
817 
818   x = PROTECT(df_poke(x, i, value));
819 
820   if (i < 0) {
821     SEXP names = PROTECT(r_names(x));
822     SET_STRING_ELT(names, Rf_length(x) - 1, name);
823     UNPROTECT(1);
824   }
825 
826   UNPROTECT(1);
827   return x;
828 }
829 
830 static inline
df_flat_width(SEXP x)831 R_len_t df_flat_width(SEXP x) {
832   R_len_t n = Rf_length(x);
833   R_len_t out = n;
834 
835   const SEXP* v_x = VECTOR_PTR_RO(x);
836 
837   for (R_len_t i = 0; i < n; ++i) {
838     SEXP col = v_x[i];
839     if (is_data_frame(col)) {
840       out = out + df_flat_width(col) - 1;
841     }
842   }
843 
844   return out;
845 }
846 
847 struct flatten_info {
848   bool flatten;
849   R_len_t width;
850 };
851 
852 static inline
df_flatten_info(SEXP x)853 struct flatten_info df_flatten_info(SEXP x) {
854   bool flatten = false;
855 
856   R_len_t n = Rf_length(x);
857   R_len_t width = n;
858 
859   const SEXP* v_x = VECTOR_PTR_RO(x);
860 
861   for (R_len_t i = 0; i < n; ++i) {
862     SEXP col = v_x[i];
863     if (is_data_frame(col)) {
864       flatten = true;
865       width = width + df_flat_width(col) - 1;
866     }
867   }
868 
869   return (struct flatten_info){flatten, width};
870 }
871 
872 // [[ register() ]]
vctrs_df_flatten_info(SEXP x)873 SEXP vctrs_df_flatten_info(SEXP x) {
874   struct flatten_info info = df_flatten_info(x);
875 
876   SEXP out = PROTECT(Rf_allocVector(VECSXP, 2));
877   SET_VECTOR_ELT(out, 0, r_lgl(info.flatten));
878   SET_VECTOR_ELT(out, 1, r_int(info.width));
879 
880   UNPROTECT(1);
881   return out;
882 }
883 
884 static R_len_t df_flatten_loop(SEXP x, SEXP out, SEXP out_names, R_len_t counter);
885 
886 // Might return duplicate names. Currently only used for equality
887 // proxy so this doesn't matter. A less bare bone version would repair
888 // names.
889 //
890 // [[ register(); include("type-data-frame.h") ]]
df_flatten(SEXP x)891 SEXP df_flatten(SEXP x) {
892   struct flatten_info info = df_flatten_info(x);
893 
894   if (!info.flatten) {
895     return x;
896   }
897 
898   SEXP out = PROTECT(Rf_allocVector(VECSXP, info.width));
899   SEXP out_names = PROTECT(Rf_allocVector(STRSXP, info.width));
900   r_poke_names(out, out_names);
901 
902   df_flatten_loop(x, out, out_names, 0);
903   init_data_frame(out, df_size(x));
904 
905   UNPROTECT(2);
906   return out;
907 }
908 
df_flatten_loop(SEXP x,SEXP out,SEXP out_names,R_len_t counter)909 static R_len_t df_flatten_loop(SEXP x, SEXP out, SEXP out_names, R_len_t counter) {
910   R_len_t n = Rf_length(x);
911   SEXP x_names = PROTECT(r_names(x));
912 
913   for (R_len_t i = 0; i < n; ++i) {
914     SEXP col = VECTOR_ELT(x, i);
915 
916     if (is_data_frame(col)) {
917       counter = df_flatten_loop(col, out, out_names, counter);
918     } else {
919       SET_VECTOR_ELT(out, counter, col);
920       SET_STRING_ELT(out_names, counter, STRING_ELT(x_names, i));
921       ++counter;
922     }
923   }
924 
925   UNPROTECT(1);
926   return counter;
927 }
928 
df_repair_names(SEXP x,struct name_repair_opts * name_repair)929 SEXP df_repair_names(SEXP x, struct name_repair_opts* name_repair) {
930   SEXP nms = PROTECT(r_names(x));
931   SEXP repaired = PROTECT(vec_as_names(nms, name_repair));
932 
933   // Should this go through proxy and restore so that classes can
934   // update metadata and check invariants when special columns are
935   // renamed?
936   if (nms != repaired) {
937     x = PROTECT(r_clone_referenced(x));
938     r_poke_names(x, repaired);
939     UNPROTECT(1);
940   }
941 
942   UNPROTECT(2);
943   return x;
944 }
945 
946 
vctrs_init_type_data_frame(SEXP ns)947 void vctrs_init_type_data_frame(SEXP ns) {
948   syms_df_lossy_cast = Rf_install("df_lossy_cast");
949   fns_df_lossy_cast = Rf_findVar(syms_df_lossy_cast, ns);
950 }
951