1 #include <ctype.h>
2 #include "vctrs.h"
3 #include "type-data-frame.h"
4 #include "utils.h"
5 #include "dim.h"
6 
7 static void describe_repair(SEXP old_names, SEXP new_names);
8 
9 // 3 leading '.' + 1 trailing '\0' + 24 characters
10 #define MAX_IOTA_SIZE 28
11 
12 // Initialised at load time
13 SEXP syms_as_universal_names = NULL;
14 SEXP syms_validate_unique_names = NULL;
15 SEXP fns_as_universal_names = NULL;
16 SEXP fns_validate_unique_names = NULL;
17 
18 // Defined below
19 SEXP vctrs_as_minimal_names(SEXP names);
20 SEXP vec_as_universal_names(SEXP names, bool quiet);
21 SEXP vec_validate_unique_names(SEXP names, struct vctrs_arg* arg);
22 SEXP vec_as_custom_names(SEXP names, const struct name_repair_opts* opts);
23 static void vec_validate_minimal_names(SEXP names, R_len_t n);
24 
25 
26 // [[ include("names.h") ]]
vec_as_names(SEXP names,const struct name_repair_opts * opts)27 SEXP vec_as_names(SEXP names, const struct name_repair_opts* opts) {
28   if (!opts) {
29     return names;
30   }
31   switch (opts->type) {
32   case name_repair_none: return names;
33   case name_repair_minimal: return vctrs_as_minimal_names(names);
34   case name_repair_unique: return vec_as_unique_names(names, opts->quiet);
35   case name_repair_universal: return vec_as_universal_names(names, opts->quiet);
36   case name_repair_check_unique: return vec_validate_unique_names(names, opts->arg);
37   case name_repair_custom: return vec_as_custom_names(names, opts);
38   }
39   never_reached("vec_as_names");
40 }
41 
42 // [[ register() ]]
vctrs_as_names(SEXP names,SEXP repair,SEXP repair_arg,SEXP quiet)43 SEXP vctrs_as_names(SEXP names, SEXP repair, SEXP repair_arg, SEXP quiet) {
44   if (!r_is_bool(quiet)) {
45     Rf_errorcall(R_NilValue, "`quiet` must a boolean value.");
46   }
47   bool quiet_ = LOGICAL(quiet)[0];
48 
49   struct vctrs_arg arg_ = vec_as_arg(repair_arg);
50 
51   struct name_repair_opts repair_opts = new_name_repair_opts(repair, &arg_, quiet_);
52   PROTECT_NAME_REPAIR_OPTS(&repair_opts);
53 
54   SEXP out = vec_as_names(names, &repair_opts);
55 
56   UNPROTECT(1);
57   return out;
58 }
59 
vec_as_universal_names(SEXP names,bool quiet)60 SEXP vec_as_universal_names(SEXP names, bool quiet) {
61   SEXP quiet_obj = PROTECT(r_lgl(quiet));
62   SEXP out = vctrs_dispatch2(syms_as_universal_names, fns_as_universal_names,
63                              syms_names, names,
64                              syms_quiet, quiet_obj);
65   UNPROTECT(1);
66   return out;
67 }
vec_validate_unique_names(SEXP names,struct vctrs_arg * arg)68 SEXP vec_validate_unique_names(SEXP names, struct vctrs_arg* arg) {
69   SEXP arg_obj = PROTECT(vctrs_arg(arg));
70 
71   SEXP out = PROTECT(vctrs_dispatch2(syms_validate_unique_names, fns_validate_unique_names,
72                                      syms_names, names,
73                                      syms_arg, arg_obj));
74 
75   // Restore visibility
76   Rf_eval(R_NilValue, R_EmptyEnv);
77 
78   UNPROTECT(2);
79   return out;
80 }
81 
vec_as_custom_names(SEXP names,const struct name_repair_opts * opts)82 SEXP vec_as_custom_names(SEXP names, const struct name_repair_opts* opts) {
83   names = PROTECT(vctrs_as_minimal_names(names));
84 
85   // Don't use vctrs dispatch utils because we match argument positionally
86   SEXP call = PROTECT(Rf_lang2(syms_repair, syms_names));
87   SEXP mask = PROTECT(r_new_environment(R_GlobalEnv));
88   Rf_defineVar(syms_repair, opts->fn, mask);
89   Rf_defineVar(syms_names, names, mask);
90   SEXP out = PROTECT(Rf_eval(call, mask));
91 
92   vec_validate_minimal_names(out, Rf_length(names));
93 
94   UNPROTECT(4);
95   return out;
96 }
97 
98 static
vec_names_impl(SEXP x,bool proxy)99 SEXP vec_names_impl(SEXP x, bool proxy) {
100   bool has_class = OBJECT(x);
101 
102   if (has_class && Rf_inherits(x, "data.frame")) {
103     // Only return row names if they are character. Data frames with
104     // automatic row names are treated as unnamed.
105     SEXP rn = df_rownames(x);
106     if (rownames_type(rn) == ROWNAMES_IDENTIFIERS) {
107       return rn;
108     } else {
109       return R_NilValue;
110     }
111   }
112 
113   if (vec_bare_dim(x) == R_NilValue) {
114     if (!proxy && has_class) {
115       return vctrs_dispatch1(syms_names, fns_names, syms_x, x);
116     } else {
117       return r_names(x);
118     }
119   }
120 
121   SEXP dimnames = PROTECT(r_attrib_get(x, R_DimNamesSymbol));
122   if (dimnames == R_NilValue || Rf_length(dimnames) < 1) {
123     UNPROTECT(1);
124     return R_NilValue;
125   }
126 
127   SEXP out = VECTOR_ELT(dimnames, 0);
128   UNPROTECT(1);
129   return out;
130 }
131 
132 // [[ register(); include("vctrs.h") ]]
vec_names(SEXP x)133 SEXP vec_names(SEXP x) {
134   return vec_names_impl(x, false);
135 }
136 // [[ include("vctrs.h") ]]
vec_proxy_names(SEXP x)137 SEXP vec_proxy_names(SEXP x) {
138   return vec_names_impl(x, true);
139 }
140 
141 // [[ register() ]]
vctrs_as_minimal_names(SEXP names)142 SEXP vctrs_as_minimal_names(SEXP names) {
143   if (TYPEOF(names) != STRSXP) {
144     Rf_errorcall(R_NilValue, "`names` must be a character vector");
145   }
146 
147   R_len_t i = 0;
148   R_len_t n = Rf_length(names);
149   const SEXP* ptr = STRING_PTR_RO(names);
150 
151   for (; i < n; ++i, ++ptr) {
152     SEXP elt = *ptr;
153     if (elt == NA_STRING) {
154       break;
155     }
156   }
157   if (i == n) {
158     return names;
159   }
160 
161   names = PROTECT(Rf_shallow_duplicate(names));
162 
163   for (; i < n; ++i, ++ptr) {
164     SEXP elt = *ptr;
165     if (elt == NA_STRING) {
166       SET_STRING_ELT(names, i, strings_empty);
167     }
168   }
169 
170   UNPROTECT(1);
171   return names;
172 }
173 
174 // [[ register() ]]
vctrs_minimal_names(SEXP x)175 SEXP vctrs_minimal_names(SEXP x) {
176   SEXP names = PROTECT(vec_names(x));
177 
178   if (names == R_NilValue) {
179     names = Rf_allocVector(STRSXP, vec_size(x));
180   } else {
181     names = vctrs_as_minimal_names(names);
182   }
183 
184   UNPROTECT(1);
185   return names;
186 }
187 
188 
189 // From dictionary.c
190 SEXP vctrs_duplicated(SEXP x);
191 
192 static bool any_has_suffix(SEXP names);
193 static SEXP as_unique_names_impl(SEXP names, bool quiet);
194 static void stop_large_name();
195 static bool is_dotdotint(const char* name);
196 static ptrdiff_t suffix_pos(const char* name);
197 static bool needs_suffix(SEXP str);
198 
199 // [[ include("vctrs.h") ]]
vec_as_unique_names(SEXP names,bool quiet)200 SEXP vec_as_unique_names(SEXP names, bool quiet) {
201   if (is_unique_names(names) && !any_has_suffix(names)) {
202     return names;
203   } else {
204     return(as_unique_names_impl(names, quiet));
205   }
206 }
207 
208 // [[ include("vctrs.h") ]]
is_unique_names(SEXP names)209 bool is_unique_names(SEXP names) {
210   if (TYPEOF(names) != STRSXP) {
211     Rf_errorcall(R_NilValue, "`names` must be a character vector");
212   }
213 
214   R_len_t n = Rf_length(names);
215   const SEXP* names_ptr = STRING_PTR_RO(names);
216 
217   if (duplicated_any(names)) {
218     return false;
219   }
220 
221   for (R_len_t i = 0; i < n; ++i) {
222     SEXP elt = names_ptr[i];
223 
224     if (needs_suffix(elt)) {
225       return false;
226     }
227   }
228 
229   return true;
230 }
231 
any_has_suffix(SEXP names)232 bool any_has_suffix(SEXP names) {
233   R_len_t n = Rf_length(names);
234   const SEXP* names_ptr = STRING_PTR_RO(names);
235 
236   for (R_len_t i = 0; i < n; ++i) {
237     SEXP elt = names_ptr[i];
238 
239     if (suffix_pos(CHAR(elt)) >= 0) {
240       return true;
241     }
242   }
243 
244   return false;
245 }
246 
as_unique_names_impl(SEXP names,bool quiet)247 SEXP as_unique_names_impl(SEXP names, bool quiet) {
248   R_len_t n = Rf_length(names);
249 
250   SEXP new_names = PROTECT(Rf_shallow_duplicate(names));
251   const SEXP* new_names_ptr = STRING_PTR_RO(new_names);
252 
253   for (R_len_t i = 0; i < n; ++i) {
254     SEXP elt = new_names_ptr[i];
255 
256     // Set `NA` and dots values to "" so they get replaced by `...n`
257     // later on
258     if (needs_suffix(elt)) {
259       elt = strings_empty;
260       SET_STRING_ELT(new_names, i, elt);
261       continue;
262     }
263 
264     // Strip `...n` suffixes
265     const char* nm = CHAR(elt);
266     int pos = suffix_pos(nm);
267     if (pos >= 0) {
268       elt = Rf_mkCharLenCE(nm, pos, Rf_getCharCE(elt));
269       SET_STRING_ELT(new_names, i, elt);
270       continue;
271     }
272   }
273 
274   // Append all duplicates with a suffix
275 
276   SEXP dups = PROTECT(vctrs_duplicated(new_names));
277   const int* dups_ptr = LOGICAL_RO(dups);
278 
279   for (R_len_t i = 0; i < n; ++i) {
280     SEXP elt = new_names_ptr[i];
281 
282     if (elt != strings_empty && !dups_ptr[i]) {
283       continue;
284     }
285 
286     const char* name = CHAR(elt);
287 
288     int size = strlen(name);
289     int buf_size = size + MAX_IOTA_SIZE;
290 
291     R_CheckStack2(buf_size);
292     char buf[buf_size];
293     buf[0] = '\0';
294 
295     memcpy(buf, name, size);
296     int remaining = buf_size - size;
297 
298     int needed = snprintf(buf + size, remaining, "...%d", i + 1);
299     if (needed >= remaining) {
300       stop_large_name();
301     }
302 
303     SET_STRING_ELT(new_names, i, Rf_mkCharLenCE(buf, size + needed, Rf_getCharCE(elt)));
304   }
305 
306   if (!quiet) {
307     describe_repair(names, new_names);
308   }
309 
310   UNPROTECT(2);
311   return new_names;
312 }
313 
vctrs_as_unique_names(SEXP names,SEXP quiet)314 SEXP vctrs_as_unique_names(SEXP names, SEXP quiet) {
315   SEXP out = PROTECT(vec_as_unique_names(names, LOGICAL(quiet)[0]));
316   UNPROTECT(1);
317   return out;
318 }
319 
vctrs_is_unique_names(SEXP names)320 SEXP vctrs_is_unique_names(SEXP names) {
321   bool out = is_unique_names(names);
322   return Rf_ScalarLogical(out);
323 }
324 
is_dotdotint(const char * name)325 static bool is_dotdotint(const char* name) {
326   int n = strlen(name);
327 
328   if (n < 3) {
329     return false;
330   }
331   if (name[0] != '.' || name[1] != '.') {
332     return false;
333   }
334 
335   if (name[2] == '.') {
336     name += 3;
337   } else {
338     name += 2;
339   }
340 
341   return (bool) strtol(name, NULL, 10);
342 }
343 
suffix_pos(const char * name)344 static ptrdiff_t suffix_pos(const char* name) {
345   int n = strlen(name);
346 
347   const char* suffix_end = NULL;
348   int in_dots = 0;
349   bool in_digits = false;
350 
351   for (const char* ptr = name + n - 1; ptr >= name; --ptr) {
352     char c = *ptr;
353 
354     if (in_digits) {
355       if (c == '.') {
356         in_digits = false;
357         in_dots = 1;
358         continue;
359       }
360 
361       if (isdigit(c)) {
362         continue;
363       }
364 
365       goto done;
366     }
367 
368     switch (in_dots) {
369     case 0:
370       if (isdigit(c)) {
371         in_digits = true;
372         continue;
373       }
374       goto done;
375     case 1:
376     case 2:
377       if (c == '.') {
378         ++in_dots;
379         continue;
380       }
381       goto done;
382     case 3:
383       suffix_end = ptr + 1;
384       if (isdigit(c)) {
385         in_dots = 0;
386         in_digits = true;
387         continue;
388       }
389       goto done;
390 
391     default:
392       stop_internal("suffix_pos", "Unexpected state.");
393     }}
394 
395  done:
396   if (suffix_end) {
397     return suffix_end - name;
398   } else {
399     return -1;
400   }
401 }
402 
stop_large_name()403 static void stop_large_name() {
404   Rf_errorcall(R_NilValue, "Can't tidy up name because it is too large");
405 }
406 
needs_suffix(SEXP str)407 static bool needs_suffix(SEXP str) {
408   return
409     str == NA_STRING ||
410     str == strings_dots ||
411     str == strings_empty ||
412     is_dotdotint(CHAR(str));
413 }
414 
415 
416 static SEXP names_iota(R_len_t n);
417 static SEXP vec_unique_names_impl(SEXP names, R_len_t n, bool quiet);
418 
419 // [[ register() ]]
vctrs_unique_names(SEXP x,SEXP quiet)420 SEXP vctrs_unique_names(SEXP x, SEXP quiet) {
421   return vec_unique_names(x, LOGICAL(quiet)[0]);
422 }
423 
424 // [[ include("utils.h") ]]
vec_unique_names(SEXP x,bool quiet)425 SEXP vec_unique_names(SEXP x, bool quiet) {
426   SEXP names = PROTECT(vec_names(x));
427   SEXP out = vec_unique_names_impl(names, vec_size(x), quiet);
428   UNPROTECT(1);
429   return out;
430 }
431 // [[ include("utils.h") ]]
vec_unique_colnames(SEXP x,bool quiet)432 SEXP vec_unique_colnames(SEXP x, bool quiet) {
433   SEXP names = PROTECT(colnames(x));
434   SEXP out = vec_unique_names_impl(names, Rf_ncols(x), quiet);
435   UNPROTECT(1);
436   return out;
437 }
438 
vec_unique_names_impl(SEXP names,R_len_t n,bool quiet)439 static SEXP vec_unique_names_impl(SEXP names, R_len_t n, bool quiet) {
440   SEXP out;
441   if (names == R_NilValue) {
442     out = PROTECT(names_iota(n));
443     if (!quiet) {
444       describe_repair(names, out);
445     }
446   } else {
447     out = PROTECT(vec_as_unique_names(names, quiet));
448   }
449 
450   UNPROTECT(1);
451   return(out);
452 }
453 
names_iota(R_len_t n)454 static SEXP names_iota(R_len_t n) {
455   char buf[MAX_IOTA_SIZE];
456   SEXP nms = r_chr_iota(n, buf, MAX_IOTA_SIZE, "...");
457 
458   if (nms == R_NilValue) {
459     Rf_errorcall(R_NilValue, "Too many names to repair.");
460   }
461 
462   return nms;
463 }
464 
465 
466 
describe_repair(SEXP old_names,SEXP new_names)467 static void describe_repair(SEXP old_names, SEXP new_names) {
468   SEXP call = PROTECT(Rf_lang3(Rf_install("describe_repair"),
469     old_names, new_names));
470   Rf_eval(call, vctrs_ns_env);
471 
472   // To reset visibility when called from a `.External2()`
473   Rf_eval(R_NilValue, R_EmptyEnv);
474 
475   UNPROTECT(1);
476 }
477 
478 
479 // [[ register() ]]
vctrs_outer_names(SEXP names,SEXP outer,SEXP n)480 SEXP vctrs_outer_names(SEXP names, SEXP outer, SEXP n) {
481   if (names != R_NilValue && TYPEOF(names) != STRSXP) {
482     stop_internal("vctrs_outer_names", "`names` must be `NULL` or a string.");
483   }
484   if (!r_is_number(n)) {
485     stop_internal("vctrs_outer_names", "`n` must be a single integer.");
486   }
487 
488   if (outer != R_NilValue) {
489     outer = r_chr_get(outer, 0);
490   }
491 
492   return outer_names(names, outer, r_int_get(n, 0));
493 }
494 
495 // [[ include("utils.h") ]]
outer_names(SEXP names,SEXP outer,R_len_t n)496 SEXP outer_names(SEXP names, SEXP outer, R_len_t n) {
497   if (outer == R_NilValue) {
498     return names;
499   }
500   if (TYPEOF(outer) != CHARSXP) {
501     stop_internal("outer_names", "`outer` must be a scalar string.");
502   }
503 
504   if (outer == strings_empty || outer == NA_STRING) {
505     return names;
506   }
507 
508   if (r_is_empty_names(names)) {
509     if (n == 1) {
510       return r_str_as_character(outer);
511     } else {
512       return r_seq_chr(CHAR(outer), n);
513     }
514   } else {
515     return r_chr_paste_prefix(names, CHAR(outer), "..");
516   }
517 }
518 
519 // [[ register() ]]
vctrs_apply_name_spec(SEXP name_spec,SEXP outer,SEXP inner,SEXP n)520 SEXP vctrs_apply_name_spec(SEXP name_spec, SEXP outer, SEXP inner, SEXP n) {
521   return apply_name_spec(name_spec, r_chr_get(outer, 0), inner, r_int_get(n, 0));
522 }
523 
524 static SEXP glue_as_name_spec(SEXP spec);
525 
526 // [[ include("utils.h") ]]
apply_name_spec(SEXP name_spec,SEXP outer,SEXP inner,R_len_t n)527 SEXP apply_name_spec(SEXP name_spec, SEXP outer, SEXP inner, R_len_t n) {
528   if (Rf_inherits(name_spec, "rlang_zap")) {
529     return R_NilValue;
530   }
531 
532   if (outer == R_NilValue) {
533     return inner;
534   }
535   if (TYPEOF(outer) != CHARSXP) {
536     stop_internal("apply_name_spec", "`outer` must be a scalar string.");
537   }
538 
539   if (outer == strings_empty || outer == NA_STRING) {
540     if (inner == R_NilValue) {
541       return chrs_empty;
542     } else {
543       return inner;
544     }
545   }
546 
547   if (r_is_empty_names(inner)) {
548     if (n == 0) {
549       return vctrs_shared_empty_chr;
550     }
551     if (n == 1) {
552       return r_str_as_character(outer);
553     }
554     inner = PROTECT(r_seq(1, n + 1));
555   } else {
556     inner = PROTECT(inner);
557   }
558 
559   switch (TYPEOF(name_spec)) {
560   case CLOSXP:
561     break;
562   case STRSXP:
563     name_spec = glue_as_name_spec(name_spec);
564     break;
565   default:
566     name_spec = r_as_function(name_spec, ".name_spec");
567     break;
568   case NILSXP:
569     Rf_errorcall(R_NilValue,
570                  "Can't merge the outer name `%s` with a vector of length > 1.\n"
571                  "Please supply a `.name_spec` specification.",
572                  CHAR(outer));
573   }
574   PROTECT(name_spec);
575 
576   SEXP outer_chr = PROTECT(r_str_as_character(outer));
577 
578   SEXP out = PROTECT(vctrs_dispatch2(syms_dot_name_spec, name_spec,
579                                      syms_outer, outer_chr,
580                                      syms_inner, inner));
581   out = vec_recycle(out, n, NULL);
582 
583   if (out != R_NilValue) {
584     if (TYPEOF(out) != STRSXP) {
585       Rf_errorcall(R_NilValue, "`.name_spec` must return a character vector.");
586     }
587     if (Rf_length(out) != n) {
588       Rf_errorcall(R_NilValue, "`.name_spec` must return a character vector as long as `inner`.");
589     }
590   }
591 
592   UNPROTECT(4);
593   return out;
594 }
595 
596 
597 static SEXP syms_glue_as_name_spec = NULL;
598 static SEXP fns_glue_as_name_spec = NULL;
599 static SEXP syms_internal_spec = NULL;
600 
glue_as_name_spec(SEXP spec)601 static SEXP glue_as_name_spec(SEXP spec) {
602   if (!r_is_string(spec)) {
603     Rf_errorcall(R_NilValue, "Glue specification in `.name_spec` must be a single string.");
604   }
605   return vctrs_dispatch1(syms_glue_as_name_spec, fns_glue_as_name_spec,
606                          syms_internal_spec, spec);
607 }
608 
609 #define VCTRS_PASTE_BUFFER_MAX_SIZE 4096
610 char vctrs_paste_buffer[VCTRS_PASTE_BUFFER_MAX_SIZE];
611 
612 // [[ include("names.h") ]]
r_chr_paste_prefix(SEXP names,const char * prefix,const char * sep)613 SEXP r_chr_paste_prefix(SEXP names, const char* prefix, const char* sep) {
614   int n_protect = 0;
615 
616   names = PROTECT_N(Rf_shallow_duplicate(names), &n_protect);
617   R_len_t n = Rf_length(names);
618 
619   int outer_len = strlen(prefix);
620   int names_len = r_chr_max_len(names);
621 
622   int sep_len = strlen(sep);
623   int total_len = outer_len + names_len + sep_len + 1;
624 
625   char* buf = vctrs_paste_buffer;
626   if (total_len > VCTRS_PASTE_BUFFER_MAX_SIZE) {
627     SEXP buf_box = PROTECT_N(
628       Rf_allocVector(RAWSXP, total_len * sizeof(char)),
629       &n_protect
630     );
631     buf = (char*) RAW(buf_box);
632   }
633 
634   buf[total_len - 1] = '\0';
635   char* bufp = buf;
636 
637   memcpy(bufp, prefix, outer_len); bufp += outer_len;
638 
639   for (int i = 0; i < sep_len; ++i) {
640     *bufp++ = sep[i];
641   }
642 
643   SEXP const* p_names = STRING_PTR_RO(names);
644 
645   for (R_len_t i = 0; i < n; ++i) {
646     const char* inner = CHAR(p_names[i]);
647     int inner_n = strlen(inner);
648 
649     memcpy(bufp, inner, inner_n);
650     bufp[inner_n] = '\0';
651 
652     SET_STRING_ELT(names, i, r_str(buf));
653   }
654 
655   UNPROTECT(n_protect);
656   return names;
657 }
658 
659 // [[ register() ]]
vctrs_chr_paste_prefix(SEXP names,SEXP prefix,SEXP sep)660 SEXP vctrs_chr_paste_prefix(SEXP names, SEXP prefix, SEXP sep) {
661   return r_chr_paste_prefix(names,
662                             r_chr_get_c_string(prefix, 0),
663                             r_chr_get_c_string(sep, 0));
664 }
665 
666 // [[ include("names.h") ]]
r_seq_chr(const char * prefix,R_len_t n)667 SEXP r_seq_chr(const char* prefix, R_len_t n) {
668   int total_len = 24 + strlen(prefix) + 1;
669 
670   R_CheckStack2(total_len);
671   char buf[total_len];
672 
673   return r_chr_iota(n, buf, total_len, prefix);
674 }
675 
676 
677 // Initialised at load time
678 SEXP syms_set_rownames_fallback = NULL;
679 SEXP fns_set_rownames_fallback = NULL;
680 
set_rownames_fallback(SEXP x,SEXP names)681 static SEXP set_rownames_fallback(SEXP x, SEXP names) {
682   return vctrs_dispatch2(syms_set_rownames_fallback, fns_set_rownames_fallback,
683                          syms_x, x,
684                          syms_names, names);
685 }
686 
687 // Initialised at load time
688 SEXP syms_set_names_fallback = NULL;
689 SEXP fns_set_names_fallback = NULL;
690 
set_names_fallback(SEXP x,SEXP names)691 static SEXP set_names_fallback(SEXP x, SEXP names) {
692   return vctrs_dispatch2(syms_set_names_fallback, fns_set_names_fallback,
693                          syms_x, x,
694                          syms_names, names);
695 }
696 
check_names(SEXP x,SEXP names)697 static void check_names(SEXP x, SEXP names) {
698   if (names == R_NilValue) {
699     return;
700   }
701 
702   if (TYPEOF(names) != STRSXP) {
703     Rf_errorcall(
704       R_NilValue,
705       "`names` must be a character vector, not a %s.",
706       Rf_type2char(TYPEOF(names))
707     );
708   }
709 
710   R_len_t x_size = vec_size(x);
711   R_len_t names_size = vec_size(names);
712 
713   if (x_size != names_size) {
714     Rf_errorcall(
715       R_NilValue,
716       "The size of `names`, %i, must be the same as the size of `x`, %i.",
717       names_size,
718       x_size
719     );
720   }
721 }
722 
vec_set_rownames(SEXP x,SEXP names,bool proxy,const enum vctrs_owned owned)723 SEXP vec_set_rownames(SEXP x, SEXP names, bool proxy, const enum vctrs_owned owned) {
724   if (!proxy && OBJECT(x)) {
725     return set_rownames_fallback(x, names);
726   }
727 
728   int nprot = 0;
729 
730   SEXP dim_names = Rf_getAttrib(x, R_DimNamesSymbol);
731 
732   // Early exit when no new row names and no existing row names
733   if (names == R_NilValue) {
734     if (dim_names == R_NilValue || VECTOR_ELT(dim_names, 0) == R_NilValue) {
735       return x;
736     }
737   }
738 
739   x = PROTECT_N(vec_clone_referenced(x, owned), &nprot);
740 
741   if (dim_names == R_NilValue) {
742     dim_names = PROTECT_N(Rf_allocVector(VECSXP, vec_dim_n(x)), &nprot);
743   } else {
744     // Also clone attribute
745     dim_names = PROTECT_N(Rf_shallow_duplicate(dim_names), &nprot);
746   }
747 
748   SET_VECTOR_ELT(dim_names, 0, names);
749 
750   Rf_setAttrib(x, R_DimNamesSymbol, dim_names);
751 
752   UNPROTECT(nprot);
753   return x;
754 }
755 
vec_set_df_rownames(SEXP x,SEXP names,bool proxy,const enum vctrs_owned owned)756 SEXP vec_set_df_rownames(SEXP x, SEXP names, bool proxy, const enum vctrs_owned owned) {
757   if (names == R_NilValue) {
758     if (rownames_type(df_rownames(x)) != ROWNAMES_IDENTIFIERS) {
759       return(x);
760     }
761 
762     x = PROTECT(vec_clone_referenced(x, owned));
763     init_compact_rownames(x, vec_size(x));
764 
765     UNPROTECT(1);
766     return x;
767   }
768 
769   // Repair row names silently
770   if (!proxy) {
771     names = vec_as_names(names, p_unique_repair_silent_opts);
772   }
773   PROTECT(names);
774 
775   x = PROTECT(vec_clone_referenced(x, owned));
776   Rf_setAttrib(x, R_RowNamesSymbol, names);
777 
778   UNPROTECT(2);
779   return x;
780 }
781 
782 // FIXME: Do we need to get the vec_proxy() and only fall back if it doesn't
783 // exist? See #526 and #531 for discussion and the related issue.
vec_set_names_impl(SEXP x,SEXP names,bool proxy,const enum vctrs_owned owned)784 SEXP vec_set_names_impl(SEXP x, SEXP names, bool proxy, const enum vctrs_owned owned) {
785   check_names(x, names);
786 
787   if (is_data_frame(x)) {
788     return vec_set_df_rownames(x, names, proxy, owned);
789   }
790 
791   if (has_dim(x)) {
792     return vec_set_rownames(x, names, proxy, owned);
793   }
794 
795   if (!proxy && OBJECT(x)) {
796     return set_names_fallback(x, names);
797   }
798 
799   // Early exit if no new names and no existing names
800   if (names == R_NilValue && Rf_getAttrib(x, R_NamesSymbol) == R_NilValue) {
801     return x;
802   }
803 
804   x = PROTECT(vec_clone_referenced(x, owned));
805   Rf_setAttrib(x, R_NamesSymbol, names);
806 
807   UNPROTECT(1);
808   return x;
809 }
810 // [[ include("utils.h"); register() ]]
vec_set_names(SEXP x,SEXP names)811 SEXP vec_set_names(SEXP x, SEXP names) {
812   return vec_set_names_impl(x, names, false, VCTRS_OWNED_false);
813 }
814 // [[ include("utils.h") ]]
vec_proxy_set_names(SEXP x,SEXP names,const enum vctrs_owned owned)815 SEXP vec_proxy_set_names(SEXP x, SEXP names, const enum vctrs_owned owned) {
816   return vec_set_names_impl(x, names, true, owned);
817 }
818 
819 
vctrs_validate_name_repair_arg(SEXP arg)820 SEXP vctrs_validate_name_repair_arg(SEXP arg) {
821   struct name_repair_opts opts = new_name_repair_opts(arg, args_empty, true);
822   if (opts.type == name_repair_custom) {
823     return opts.fn;
824   } else if (Rf_length(arg) != 1) {
825     return r_str_as_character(r_str(name_repair_arg_as_c_string(opts.type)));
826   } else {
827     return arg;
828   }
829 }
830 
stop_name_repair()831 void stop_name_repair() {
832   Rf_errorcall(R_NilValue, "`.name_repair` must be a string or a function. See `?vctrs::vec_as_names`.");
833 }
834 
new_name_repair_opts(SEXP name_repair,struct vctrs_arg * arg,bool quiet)835 struct name_repair_opts new_name_repair_opts(SEXP name_repair, struct vctrs_arg* arg, bool quiet) {
836   struct name_repair_opts opts = {
837     .type = 0,
838     .fn = R_NilValue,
839     .arg = arg,
840     .quiet = quiet
841   };
842 
843   switch (TYPEOF(name_repair)) {
844   case STRSXP: {
845     if (!Rf_length(name_repair)) {
846       stop_name_repair();
847     }
848 
849     SEXP c = r_chr_get(name_repair, 0);
850 
851     if (c == strings_none) {
852       opts.type = name_repair_none;
853     } else if (c == strings_minimal) {
854       opts.type = name_repair_minimal;
855     } else if (c == strings_unique) {
856       opts.type = name_repair_unique;
857     } else if (c == strings_universal) {
858       opts.type = name_repair_universal;
859     } else if (c == strings_check_unique) {
860       opts.type = name_repair_check_unique;
861     } else {
862       Rf_errorcall(R_NilValue, "`.name_repair` can't be \"%s\". See `?vctrs::vec_as_names`.", CHAR(c));
863     }
864 
865     return opts;
866   }
867 
868   case LANGSXP:
869     opts.fn = r_as_function(name_repair, ".name_repair");
870     opts.type = name_repair_custom;
871     return opts;
872 
873   case CLOSXP:
874     opts.fn = name_repair;
875     opts.type = name_repair_custom;
876     return opts;
877 
878   default:
879     stop_name_repair();
880   }
881 
882   never_reached("new_name_repair_opts");
883 }
884 
885 // [[ include("vctrs.h") ]]
name_repair_arg_as_c_string(enum name_repair_type type)886 const char* name_repair_arg_as_c_string(enum name_repair_type type) {
887   switch (type) {
888   case name_repair_none: return "none";
889   case name_repair_minimal: return "minimal";
890   case name_repair_unique: return "unique";
891   case name_repair_universal: return "universal";
892   case name_repair_check_unique: return "check_unique";
893   case name_repair_custom: return "custom";
894   }
895   never_reached("name_repair_arg_as_c_string");
896 }
897 
vec_validate_minimal_names(SEXP names,R_len_t n)898 static void vec_validate_minimal_names(SEXP names, R_len_t n) {
899   if (names == R_NilValue) {
900     Rf_errorcall(R_NilValue, "Names repair functions can't return `NULL`.");
901   }
902 
903   if (TYPEOF(names) != STRSXP) {
904     Rf_errorcall(R_NilValue, "Names repair functions must return a character vector.");
905   }
906 
907   if (n >= 0 && Rf_length(names) != n) {
908     Rf_errorcall(R_NilValue,
909                  "Repaired names have length %d instead of length %d.",
910                  Rf_length(names),
911                  n);
912   }
913 
914   if (r_chr_has_string(names, NA_STRING)) {
915     Rf_errorcall(R_NilValue, "Names repair functions can't return `NA` values.");
916   }
917 }
vctrs_validate_minimal_names(SEXP names,SEXP n_)918 SEXP vctrs_validate_minimal_names(SEXP names, SEXP n_) {
919   R_len_t n = -1;
920 
921   if (TYPEOF(n_) == INTSXP) {
922     if (Rf_length(n_) != 1) {
923       stop_internal("vctrs_validate_minimal_names", "`n` must be a single number.");
924     }
925     n = INTEGER(n_)[0];
926   }
927 
928   vec_validate_minimal_names(names, n);
929   return names;
930 }
931 
932 
933 struct name_repair_opts unique_repair_default_opts;
934 struct name_repair_opts unique_repair_silent_opts;
935 
vctrs_init_names(SEXP ns)936 void vctrs_init_names(SEXP ns) {
937   syms_set_rownames_fallback = Rf_install("set_rownames_fallback");
938   syms_set_names_fallback = Rf_install("set_names_fallback");
939   syms_as_universal_names = Rf_install("as_universal_names");
940   syms_validate_unique_names = Rf_install("validate_unique");
941 
942   fns_set_rownames_fallback = r_env_get(ns, syms_set_rownames_fallback);
943   fns_set_names_fallback = r_env_get(ns, syms_set_names_fallback);
944   fns_as_universal_names = r_env_get(ns, syms_as_universal_names);
945   fns_validate_unique_names = r_env_get(ns, syms_validate_unique_names);
946 
947   syms_glue_as_name_spec = Rf_install("glue_as_name_spec");
948   fns_glue_as_name_spec = r_env_get(ns, syms_glue_as_name_spec);
949   syms_internal_spec = Rf_install("_spec");
950 
951   unique_repair_default_opts.type = name_repair_unique;
952   unique_repair_default_opts.fn = R_NilValue;
953   unique_repair_default_opts.quiet = false;
954 
955   unique_repair_silent_opts.type = name_repair_unique;
956   unique_repair_silent_opts.fn = R_NilValue;
957   unique_repair_silent_opts.quiet = true;
958 }
959