1 #include <ctype.h>
2 #include "vctrs.h"
3 #include "type-data-frame.h"
4 #include "utils.h"
5 #include "dim.h"
6
7 static void describe_repair(SEXP old_names, SEXP new_names);
8
9 // 3 leading '.' + 1 trailing '\0' + 24 characters
10 #define MAX_IOTA_SIZE 28
11
12 // Initialised at load time
13 SEXP syms_as_universal_names = NULL;
14 SEXP syms_validate_unique_names = NULL;
15 SEXP fns_as_universal_names = NULL;
16 SEXP fns_validate_unique_names = NULL;
17
18 // Defined below
19 SEXP vctrs_as_minimal_names(SEXP names);
20 SEXP vec_as_universal_names(SEXP names, bool quiet);
21 SEXP vec_validate_unique_names(SEXP names, struct vctrs_arg* arg);
22 SEXP vec_as_custom_names(SEXP names, const struct name_repair_opts* opts);
23 static void vec_validate_minimal_names(SEXP names, R_len_t n);
24
25
26 // [[ include("names.h") ]]
vec_as_names(SEXP names,const struct name_repair_opts * opts)27 SEXP vec_as_names(SEXP names, const struct name_repair_opts* opts) {
28 if (!opts) {
29 return names;
30 }
31 switch (opts->type) {
32 case name_repair_none: return names;
33 case name_repair_minimal: return vctrs_as_minimal_names(names);
34 case name_repair_unique: return vec_as_unique_names(names, opts->quiet);
35 case name_repair_universal: return vec_as_universal_names(names, opts->quiet);
36 case name_repair_check_unique: return vec_validate_unique_names(names, opts->arg);
37 case name_repair_custom: return vec_as_custom_names(names, opts);
38 }
39 never_reached("vec_as_names");
40 }
41
42 // [[ register() ]]
vctrs_as_names(SEXP names,SEXP repair,SEXP repair_arg,SEXP quiet)43 SEXP vctrs_as_names(SEXP names, SEXP repair, SEXP repair_arg, SEXP quiet) {
44 if (!r_is_bool(quiet)) {
45 Rf_errorcall(R_NilValue, "`quiet` must a boolean value.");
46 }
47 bool quiet_ = LOGICAL(quiet)[0];
48
49 struct vctrs_arg arg_ = vec_as_arg(repair_arg);
50
51 struct name_repair_opts repair_opts = new_name_repair_opts(repair, &arg_, quiet_);
52 PROTECT_NAME_REPAIR_OPTS(&repair_opts);
53
54 SEXP out = vec_as_names(names, &repair_opts);
55
56 UNPROTECT(1);
57 return out;
58 }
59
vec_as_universal_names(SEXP names,bool quiet)60 SEXP vec_as_universal_names(SEXP names, bool quiet) {
61 SEXP quiet_obj = PROTECT(r_lgl(quiet));
62 SEXP out = vctrs_dispatch2(syms_as_universal_names, fns_as_universal_names,
63 syms_names, names,
64 syms_quiet, quiet_obj);
65 UNPROTECT(1);
66 return out;
67 }
vec_validate_unique_names(SEXP names,struct vctrs_arg * arg)68 SEXP vec_validate_unique_names(SEXP names, struct vctrs_arg* arg) {
69 SEXP arg_obj = PROTECT(vctrs_arg(arg));
70
71 SEXP out = PROTECT(vctrs_dispatch2(syms_validate_unique_names, fns_validate_unique_names,
72 syms_names, names,
73 syms_arg, arg_obj));
74
75 // Restore visibility
76 Rf_eval(R_NilValue, R_EmptyEnv);
77
78 UNPROTECT(2);
79 return out;
80 }
81
vec_as_custom_names(SEXP names,const struct name_repair_opts * opts)82 SEXP vec_as_custom_names(SEXP names, const struct name_repair_opts* opts) {
83 names = PROTECT(vctrs_as_minimal_names(names));
84
85 // Don't use vctrs dispatch utils because we match argument positionally
86 SEXP call = PROTECT(Rf_lang2(syms_repair, syms_names));
87 SEXP mask = PROTECT(r_new_environment(R_GlobalEnv));
88 Rf_defineVar(syms_repair, opts->fn, mask);
89 Rf_defineVar(syms_names, names, mask);
90 SEXP out = PROTECT(Rf_eval(call, mask));
91
92 vec_validate_minimal_names(out, Rf_length(names));
93
94 UNPROTECT(4);
95 return out;
96 }
97
98 static
vec_names_impl(SEXP x,bool proxy)99 SEXP vec_names_impl(SEXP x, bool proxy) {
100 bool has_class = OBJECT(x);
101
102 if (has_class && Rf_inherits(x, "data.frame")) {
103 // Only return row names if they are character. Data frames with
104 // automatic row names are treated as unnamed.
105 SEXP rn = df_rownames(x);
106 if (rownames_type(rn) == ROWNAMES_IDENTIFIERS) {
107 return rn;
108 } else {
109 return R_NilValue;
110 }
111 }
112
113 if (vec_bare_dim(x) == R_NilValue) {
114 if (!proxy && has_class) {
115 return vctrs_dispatch1(syms_names, fns_names, syms_x, x);
116 } else {
117 return r_names(x);
118 }
119 }
120
121 SEXP dimnames = PROTECT(r_attrib_get(x, R_DimNamesSymbol));
122 if (dimnames == R_NilValue || Rf_length(dimnames) < 1) {
123 UNPROTECT(1);
124 return R_NilValue;
125 }
126
127 SEXP out = VECTOR_ELT(dimnames, 0);
128 UNPROTECT(1);
129 return out;
130 }
131
132 // [[ register(); include("vctrs.h") ]]
vec_names(SEXP x)133 SEXP vec_names(SEXP x) {
134 return vec_names_impl(x, false);
135 }
136 // [[ include("vctrs.h") ]]
vec_proxy_names(SEXP x)137 SEXP vec_proxy_names(SEXP x) {
138 return vec_names_impl(x, true);
139 }
140
141 // [[ register() ]]
vctrs_as_minimal_names(SEXP names)142 SEXP vctrs_as_minimal_names(SEXP names) {
143 if (TYPEOF(names) != STRSXP) {
144 Rf_errorcall(R_NilValue, "`names` must be a character vector");
145 }
146
147 R_len_t i = 0;
148 R_len_t n = Rf_length(names);
149 const SEXP* ptr = STRING_PTR_RO(names);
150
151 for (; i < n; ++i, ++ptr) {
152 SEXP elt = *ptr;
153 if (elt == NA_STRING) {
154 break;
155 }
156 }
157 if (i == n) {
158 return names;
159 }
160
161 names = PROTECT(Rf_shallow_duplicate(names));
162
163 for (; i < n; ++i, ++ptr) {
164 SEXP elt = *ptr;
165 if (elt == NA_STRING) {
166 SET_STRING_ELT(names, i, strings_empty);
167 }
168 }
169
170 UNPROTECT(1);
171 return names;
172 }
173
174 // [[ register() ]]
vctrs_minimal_names(SEXP x)175 SEXP vctrs_minimal_names(SEXP x) {
176 SEXP names = PROTECT(vec_names(x));
177
178 if (names == R_NilValue) {
179 names = Rf_allocVector(STRSXP, vec_size(x));
180 } else {
181 names = vctrs_as_minimal_names(names);
182 }
183
184 UNPROTECT(1);
185 return names;
186 }
187
188
189 // From dictionary.c
190 SEXP vctrs_duplicated(SEXP x);
191
192 static bool any_has_suffix(SEXP names);
193 static SEXP as_unique_names_impl(SEXP names, bool quiet);
194 static void stop_large_name();
195 static bool is_dotdotint(const char* name);
196 static ptrdiff_t suffix_pos(const char* name);
197 static bool needs_suffix(SEXP str);
198
199 // [[ include("vctrs.h") ]]
vec_as_unique_names(SEXP names,bool quiet)200 SEXP vec_as_unique_names(SEXP names, bool quiet) {
201 if (is_unique_names(names) && !any_has_suffix(names)) {
202 return names;
203 } else {
204 return(as_unique_names_impl(names, quiet));
205 }
206 }
207
208 // [[ include("vctrs.h") ]]
is_unique_names(SEXP names)209 bool is_unique_names(SEXP names) {
210 if (TYPEOF(names) != STRSXP) {
211 Rf_errorcall(R_NilValue, "`names` must be a character vector");
212 }
213
214 R_len_t n = Rf_length(names);
215 const SEXP* names_ptr = STRING_PTR_RO(names);
216
217 if (duplicated_any(names)) {
218 return false;
219 }
220
221 for (R_len_t i = 0; i < n; ++i) {
222 SEXP elt = names_ptr[i];
223
224 if (needs_suffix(elt)) {
225 return false;
226 }
227 }
228
229 return true;
230 }
231
any_has_suffix(SEXP names)232 bool any_has_suffix(SEXP names) {
233 R_len_t n = Rf_length(names);
234 const SEXP* names_ptr = STRING_PTR_RO(names);
235
236 for (R_len_t i = 0; i < n; ++i) {
237 SEXP elt = names_ptr[i];
238
239 if (suffix_pos(CHAR(elt)) >= 0) {
240 return true;
241 }
242 }
243
244 return false;
245 }
246
as_unique_names_impl(SEXP names,bool quiet)247 SEXP as_unique_names_impl(SEXP names, bool quiet) {
248 R_len_t n = Rf_length(names);
249
250 SEXP new_names = PROTECT(Rf_shallow_duplicate(names));
251 const SEXP* new_names_ptr = STRING_PTR_RO(new_names);
252
253 for (R_len_t i = 0; i < n; ++i) {
254 SEXP elt = new_names_ptr[i];
255
256 // Set `NA` and dots values to "" so they get replaced by `...n`
257 // later on
258 if (needs_suffix(elt)) {
259 elt = strings_empty;
260 SET_STRING_ELT(new_names, i, elt);
261 continue;
262 }
263
264 // Strip `...n` suffixes
265 const char* nm = CHAR(elt);
266 int pos = suffix_pos(nm);
267 if (pos >= 0) {
268 elt = Rf_mkCharLenCE(nm, pos, Rf_getCharCE(elt));
269 SET_STRING_ELT(new_names, i, elt);
270 continue;
271 }
272 }
273
274 // Append all duplicates with a suffix
275
276 SEXP dups = PROTECT(vctrs_duplicated(new_names));
277 const int* dups_ptr = LOGICAL_RO(dups);
278
279 for (R_len_t i = 0; i < n; ++i) {
280 SEXP elt = new_names_ptr[i];
281
282 if (elt != strings_empty && !dups_ptr[i]) {
283 continue;
284 }
285
286 const char* name = CHAR(elt);
287
288 int size = strlen(name);
289 int buf_size = size + MAX_IOTA_SIZE;
290
291 R_CheckStack2(buf_size);
292 char buf[buf_size];
293 buf[0] = '\0';
294
295 memcpy(buf, name, size);
296 int remaining = buf_size - size;
297
298 int needed = snprintf(buf + size, remaining, "...%d", i + 1);
299 if (needed >= remaining) {
300 stop_large_name();
301 }
302
303 SET_STRING_ELT(new_names, i, Rf_mkCharLenCE(buf, size + needed, Rf_getCharCE(elt)));
304 }
305
306 if (!quiet) {
307 describe_repair(names, new_names);
308 }
309
310 UNPROTECT(2);
311 return new_names;
312 }
313
vctrs_as_unique_names(SEXP names,SEXP quiet)314 SEXP vctrs_as_unique_names(SEXP names, SEXP quiet) {
315 SEXP out = PROTECT(vec_as_unique_names(names, LOGICAL(quiet)[0]));
316 UNPROTECT(1);
317 return out;
318 }
319
vctrs_is_unique_names(SEXP names)320 SEXP vctrs_is_unique_names(SEXP names) {
321 bool out = is_unique_names(names);
322 return Rf_ScalarLogical(out);
323 }
324
is_dotdotint(const char * name)325 static bool is_dotdotint(const char* name) {
326 int n = strlen(name);
327
328 if (n < 3) {
329 return false;
330 }
331 if (name[0] != '.' || name[1] != '.') {
332 return false;
333 }
334
335 if (name[2] == '.') {
336 name += 3;
337 } else {
338 name += 2;
339 }
340
341 return (bool) strtol(name, NULL, 10);
342 }
343
suffix_pos(const char * name)344 static ptrdiff_t suffix_pos(const char* name) {
345 int n = strlen(name);
346
347 const char* suffix_end = NULL;
348 int in_dots = 0;
349 bool in_digits = false;
350
351 for (const char* ptr = name + n - 1; ptr >= name; --ptr) {
352 char c = *ptr;
353
354 if (in_digits) {
355 if (c == '.') {
356 in_digits = false;
357 in_dots = 1;
358 continue;
359 }
360
361 if (isdigit(c)) {
362 continue;
363 }
364
365 goto done;
366 }
367
368 switch (in_dots) {
369 case 0:
370 if (isdigit(c)) {
371 in_digits = true;
372 continue;
373 }
374 goto done;
375 case 1:
376 case 2:
377 if (c == '.') {
378 ++in_dots;
379 continue;
380 }
381 goto done;
382 case 3:
383 suffix_end = ptr + 1;
384 if (isdigit(c)) {
385 in_dots = 0;
386 in_digits = true;
387 continue;
388 }
389 goto done;
390
391 default:
392 stop_internal("suffix_pos", "Unexpected state.");
393 }}
394
395 done:
396 if (suffix_end) {
397 return suffix_end - name;
398 } else {
399 return -1;
400 }
401 }
402
stop_large_name()403 static void stop_large_name() {
404 Rf_errorcall(R_NilValue, "Can't tidy up name because it is too large");
405 }
406
needs_suffix(SEXP str)407 static bool needs_suffix(SEXP str) {
408 return
409 str == NA_STRING ||
410 str == strings_dots ||
411 str == strings_empty ||
412 is_dotdotint(CHAR(str));
413 }
414
415
416 static SEXP names_iota(R_len_t n);
417 static SEXP vec_unique_names_impl(SEXP names, R_len_t n, bool quiet);
418
419 // [[ register() ]]
vctrs_unique_names(SEXP x,SEXP quiet)420 SEXP vctrs_unique_names(SEXP x, SEXP quiet) {
421 return vec_unique_names(x, LOGICAL(quiet)[0]);
422 }
423
424 // [[ include("utils.h") ]]
vec_unique_names(SEXP x,bool quiet)425 SEXP vec_unique_names(SEXP x, bool quiet) {
426 SEXP names = PROTECT(vec_names(x));
427 SEXP out = vec_unique_names_impl(names, vec_size(x), quiet);
428 UNPROTECT(1);
429 return out;
430 }
431 // [[ include("utils.h") ]]
vec_unique_colnames(SEXP x,bool quiet)432 SEXP vec_unique_colnames(SEXP x, bool quiet) {
433 SEXP names = PROTECT(colnames(x));
434 SEXP out = vec_unique_names_impl(names, Rf_ncols(x), quiet);
435 UNPROTECT(1);
436 return out;
437 }
438
vec_unique_names_impl(SEXP names,R_len_t n,bool quiet)439 static SEXP vec_unique_names_impl(SEXP names, R_len_t n, bool quiet) {
440 SEXP out;
441 if (names == R_NilValue) {
442 out = PROTECT(names_iota(n));
443 if (!quiet) {
444 describe_repair(names, out);
445 }
446 } else {
447 out = PROTECT(vec_as_unique_names(names, quiet));
448 }
449
450 UNPROTECT(1);
451 return(out);
452 }
453
names_iota(R_len_t n)454 static SEXP names_iota(R_len_t n) {
455 char buf[MAX_IOTA_SIZE];
456 SEXP nms = r_chr_iota(n, buf, MAX_IOTA_SIZE, "...");
457
458 if (nms == R_NilValue) {
459 Rf_errorcall(R_NilValue, "Too many names to repair.");
460 }
461
462 return nms;
463 }
464
465
466
describe_repair(SEXP old_names,SEXP new_names)467 static void describe_repair(SEXP old_names, SEXP new_names) {
468 SEXP call = PROTECT(Rf_lang3(Rf_install("describe_repair"),
469 old_names, new_names));
470 Rf_eval(call, vctrs_ns_env);
471
472 // To reset visibility when called from a `.External2()`
473 Rf_eval(R_NilValue, R_EmptyEnv);
474
475 UNPROTECT(1);
476 }
477
478
479 // [[ register() ]]
vctrs_outer_names(SEXP names,SEXP outer,SEXP n)480 SEXP vctrs_outer_names(SEXP names, SEXP outer, SEXP n) {
481 if (names != R_NilValue && TYPEOF(names) != STRSXP) {
482 stop_internal("vctrs_outer_names", "`names` must be `NULL` or a string.");
483 }
484 if (!r_is_number(n)) {
485 stop_internal("vctrs_outer_names", "`n` must be a single integer.");
486 }
487
488 if (outer != R_NilValue) {
489 outer = r_chr_get(outer, 0);
490 }
491
492 return outer_names(names, outer, r_int_get(n, 0));
493 }
494
495 // [[ include("utils.h") ]]
outer_names(SEXP names,SEXP outer,R_len_t n)496 SEXP outer_names(SEXP names, SEXP outer, R_len_t n) {
497 if (outer == R_NilValue) {
498 return names;
499 }
500 if (TYPEOF(outer) != CHARSXP) {
501 stop_internal("outer_names", "`outer` must be a scalar string.");
502 }
503
504 if (outer == strings_empty || outer == NA_STRING) {
505 return names;
506 }
507
508 if (r_is_empty_names(names)) {
509 if (n == 1) {
510 return r_str_as_character(outer);
511 } else {
512 return r_seq_chr(CHAR(outer), n);
513 }
514 } else {
515 return r_chr_paste_prefix(names, CHAR(outer), "..");
516 }
517 }
518
519 // [[ register() ]]
vctrs_apply_name_spec(SEXP name_spec,SEXP outer,SEXP inner,SEXP n)520 SEXP vctrs_apply_name_spec(SEXP name_spec, SEXP outer, SEXP inner, SEXP n) {
521 return apply_name_spec(name_spec, r_chr_get(outer, 0), inner, r_int_get(n, 0));
522 }
523
524 static SEXP glue_as_name_spec(SEXP spec);
525
526 // [[ include("utils.h") ]]
apply_name_spec(SEXP name_spec,SEXP outer,SEXP inner,R_len_t n)527 SEXP apply_name_spec(SEXP name_spec, SEXP outer, SEXP inner, R_len_t n) {
528 if (Rf_inherits(name_spec, "rlang_zap")) {
529 return R_NilValue;
530 }
531
532 if (outer == R_NilValue) {
533 return inner;
534 }
535 if (TYPEOF(outer) != CHARSXP) {
536 stop_internal("apply_name_spec", "`outer` must be a scalar string.");
537 }
538
539 if (outer == strings_empty || outer == NA_STRING) {
540 if (inner == R_NilValue) {
541 return chrs_empty;
542 } else {
543 return inner;
544 }
545 }
546
547 if (r_is_empty_names(inner)) {
548 if (n == 0) {
549 return vctrs_shared_empty_chr;
550 }
551 if (n == 1) {
552 return r_str_as_character(outer);
553 }
554 inner = PROTECT(r_seq(1, n + 1));
555 } else {
556 inner = PROTECT(inner);
557 }
558
559 switch (TYPEOF(name_spec)) {
560 case CLOSXP:
561 break;
562 case STRSXP:
563 name_spec = glue_as_name_spec(name_spec);
564 break;
565 default:
566 name_spec = r_as_function(name_spec, ".name_spec");
567 break;
568 case NILSXP:
569 Rf_errorcall(R_NilValue,
570 "Can't merge the outer name `%s` with a vector of length > 1.\n"
571 "Please supply a `.name_spec` specification.",
572 CHAR(outer));
573 }
574 PROTECT(name_spec);
575
576 SEXP outer_chr = PROTECT(r_str_as_character(outer));
577
578 SEXP out = PROTECT(vctrs_dispatch2(syms_dot_name_spec, name_spec,
579 syms_outer, outer_chr,
580 syms_inner, inner));
581 out = vec_recycle(out, n, NULL);
582
583 if (out != R_NilValue) {
584 if (TYPEOF(out) != STRSXP) {
585 Rf_errorcall(R_NilValue, "`.name_spec` must return a character vector.");
586 }
587 if (Rf_length(out) != n) {
588 Rf_errorcall(R_NilValue, "`.name_spec` must return a character vector as long as `inner`.");
589 }
590 }
591
592 UNPROTECT(4);
593 return out;
594 }
595
596
597 static SEXP syms_glue_as_name_spec = NULL;
598 static SEXP fns_glue_as_name_spec = NULL;
599 static SEXP syms_internal_spec = NULL;
600
glue_as_name_spec(SEXP spec)601 static SEXP glue_as_name_spec(SEXP spec) {
602 if (!r_is_string(spec)) {
603 Rf_errorcall(R_NilValue, "Glue specification in `.name_spec` must be a single string.");
604 }
605 return vctrs_dispatch1(syms_glue_as_name_spec, fns_glue_as_name_spec,
606 syms_internal_spec, spec);
607 }
608
609 #define VCTRS_PASTE_BUFFER_MAX_SIZE 4096
610 char vctrs_paste_buffer[VCTRS_PASTE_BUFFER_MAX_SIZE];
611
612 // [[ include("names.h") ]]
r_chr_paste_prefix(SEXP names,const char * prefix,const char * sep)613 SEXP r_chr_paste_prefix(SEXP names, const char* prefix, const char* sep) {
614 int n_protect = 0;
615
616 names = PROTECT_N(Rf_shallow_duplicate(names), &n_protect);
617 R_len_t n = Rf_length(names);
618
619 int outer_len = strlen(prefix);
620 int names_len = r_chr_max_len(names);
621
622 int sep_len = strlen(sep);
623 int total_len = outer_len + names_len + sep_len + 1;
624
625 char* buf = vctrs_paste_buffer;
626 if (total_len > VCTRS_PASTE_BUFFER_MAX_SIZE) {
627 SEXP buf_box = PROTECT_N(
628 Rf_allocVector(RAWSXP, total_len * sizeof(char)),
629 &n_protect
630 );
631 buf = (char*) RAW(buf_box);
632 }
633
634 buf[total_len - 1] = '\0';
635 char* bufp = buf;
636
637 memcpy(bufp, prefix, outer_len); bufp += outer_len;
638
639 for (int i = 0; i < sep_len; ++i) {
640 *bufp++ = sep[i];
641 }
642
643 SEXP const* p_names = STRING_PTR_RO(names);
644
645 for (R_len_t i = 0; i < n; ++i) {
646 const char* inner = CHAR(p_names[i]);
647 int inner_n = strlen(inner);
648
649 memcpy(bufp, inner, inner_n);
650 bufp[inner_n] = '\0';
651
652 SET_STRING_ELT(names, i, r_str(buf));
653 }
654
655 UNPROTECT(n_protect);
656 return names;
657 }
658
659 // [[ register() ]]
vctrs_chr_paste_prefix(SEXP names,SEXP prefix,SEXP sep)660 SEXP vctrs_chr_paste_prefix(SEXP names, SEXP prefix, SEXP sep) {
661 return r_chr_paste_prefix(names,
662 r_chr_get_c_string(prefix, 0),
663 r_chr_get_c_string(sep, 0));
664 }
665
666 // [[ include("names.h") ]]
r_seq_chr(const char * prefix,R_len_t n)667 SEXP r_seq_chr(const char* prefix, R_len_t n) {
668 int total_len = 24 + strlen(prefix) + 1;
669
670 R_CheckStack2(total_len);
671 char buf[total_len];
672
673 return r_chr_iota(n, buf, total_len, prefix);
674 }
675
676
677 // Initialised at load time
678 SEXP syms_set_rownames_fallback = NULL;
679 SEXP fns_set_rownames_fallback = NULL;
680
set_rownames_fallback(SEXP x,SEXP names)681 static SEXP set_rownames_fallback(SEXP x, SEXP names) {
682 return vctrs_dispatch2(syms_set_rownames_fallback, fns_set_rownames_fallback,
683 syms_x, x,
684 syms_names, names);
685 }
686
687 // Initialised at load time
688 SEXP syms_set_names_fallback = NULL;
689 SEXP fns_set_names_fallback = NULL;
690
set_names_fallback(SEXP x,SEXP names)691 static SEXP set_names_fallback(SEXP x, SEXP names) {
692 return vctrs_dispatch2(syms_set_names_fallback, fns_set_names_fallback,
693 syms_x, x,
694 syms_names, names);
695 }
696
check_names(SEXP x,SEXP names)697 static void check_names(SEXP x, SEXP names) {
698 if (names == R_NilValue) {
699 return;
700 }
701
702 if (TYPEOF(names) != STRSXP) {
703 Rf_errorcall(
704 R_NilValue,
705 "`names` must be a character vector, not a %s.",
706 Rf_type2char(TYPEOF(names))
707 );
708 }
709
710 R_len_t x_size = vec_size(x);
711 R_len_t names_size = vec_size(names);
712
713 if (x_size != names_size) {
714 Rf_errorcall(
715 R_NilValue,
716 "The size of `names`, %i, must be the same as the size of `x`, %i.",
717 names_size,
718 x_size
719 );
720 }
721 }
722
vec_set_rownames(SEXP x,SEXP names,bool proxy,const enum vctrs_owned owned)723 SEXP vec_set_rownames(SEXP x, SEXP names, bool proxy, const enum vctrs_owned owned) {
724 if (!proxy && OBJECT(x)) {
725 return set_rownames_fallback(x, names);
726 }
727
728 int nprot = 0;
729
730 SEXP dim_names = Rf_getAttrib(x, R_DimNamesSymbol);
731
732 // Early exit when no new row names and no existing row names
733 if (names == R_NilValue) {
734 if (dim_names == R_NilValue || VECTOR_ELT(dim_names, 0) == R_NilValue) {
735 return x;
736 }
737 }
738
739 x = PROTECT_N(vec_clone_referenced(x, owned), &nprot);
740
741 if (dim_names == R_NilValue) {
742 dim_names = PROTECT_N(Rf_allocVector(VECSXP, vec_dim_n(x)), &nprot);
743 } else {
744 // Also clone attribute
745 dim_names = PROTECT_N(Rf_shallow_duplicate(dim_names), &nprot);
746 }
747
748 SET_VECTOR_ELT(dim_names, 0, names);
749
750 Rf_setAttrib(x, R_DimNamesSymbol, dim_names);
751
752 UNPROTECT(nprot);
753 return x;
754 }
755
vec_set_df_rownames(SEXP x,SEXP names,bool proxy,const enum vctrs_owned owned)756 SEXP vec_set_df_rownames(SEXP x, SEXP names, bool proxy, const enum vctrs_owned owned) {
757 if (names == R_NilValue) {
758 if (rownames_type(df_rownames(x)) != ROWNAMES_IDENTIFIERS) {
759 return(x);
760 }
761
762 x = PROTECT(vec_clone_referenced(x, owned));
763 init_compact_rownames(x, vec_size(x));
764
765 UNPROTECT(1);
766 return x;
767 }
768
769 // Repair row names silently
770 if (!proxy) {
771 names = vec_as_names(names, p_unique_repair_silent_opts);
772 }
773 PROTECT(names);
774
775 x = PROTECT(vec_clone_referenced(x, owned));
776 Rf_setAttrib(x, R_RowNamesSymbol, names);
777
778 UNPROTECT(2);
779 return x;
780 }
781
782 // FIXME: Do we need to get the vec_proxy() and only fall back if it doesn't
783 // exist? See #526 and #531 for discussion and the related issue.
vec_set_names_impl(SEXP x,SEXP names,bool proxy,const enum vctrs_owned owned)784 SEXP vec_set_names_impl(SEXP x, SEXP names, bool proxy, const enum vctrs_owned owned) {
785 check_names(x, names);
786
787 if (is_data_frame(x)) {
788 return vec_set_df_rownames(x, names, proxy, owned);
789 }
790
791 if (has_dim(x)) {
792 return vec_set_rownames(x, names, proxy, owned);
793 }
794
795 if (!proxy && OBJECT(x)) {
796 return set_names_fallback(x, names);
797 }
798
799 // Early exit if no new names and no existing names
800 if (names == R_NilValue && Rf_getAttrib(x, R_NamesSymbol) == R_NilValue) {
801 return x;
802 }
803
804 x = PROTECT(vec_clone_referenced(x, owned));
805 Rf_setAttrib(x, R_NamesSymbol, names);
806
807 UNPROTECT(1);
808 return x;
809 }
810 // [[ include("utils.h"); register() ]]
vec_set_names(SEXP x,SEXP names)811 SEXP vec_set_names(SEXP x, SEXP names) {
812 return vec_set_names_impl(x, names, false, VCTRS_OWNED_false);
813 }
814 // [[ include("utils.h") ]]
vec_proxy_set_names(SEXP x,SEXP names,const enum vctrs_owned owned)815 SEXP vec_proxy_set_names(SEXP x, SEXP names, const enum vctrs_owned owned) {
816 return vec_set_names_impl(x, names, true, owned);
817 }
818
819
vctrs_validate_name_repair_arg(SEXP arg)820 SEXP vctrs_validate_name_repair_arg(SEXP arg) {
821 struct name_repair_opts opts = new_name_repair_opts(arg, args_empty, true);
822 if (opts.type == name_repair_custom) {
823 return opts.fn;
824 } else if (Rf_length(arg) != 1) {
825 return r_str_as_character(r_str(name_repair_arg_as_c_string(opts.type)));
826 } else {
827 return arg;
828 }
829 }
830
stop_name_repair()831 void stop_name_repair() {
832 Rf_errorcall(R_NilValue, "`.name_repair` must be a string or a function. See `?vctrs::vec_as_names`.");
833 }
834
new_name_repair_opts(SEXP name_repair,struct vctrs_arg * arg,bool quiet)835 struct name_repair_opts new_name_repair_opts(SEXP name_repair, struct vctrs_arg* arg, bool quiet) {
836 struct name_repair_opts opts = {
837 .type = 0,
838 .fn = R_NilValue,
839 .arg = arg,
840 .quiet = quiet
841 };
842
843 switch (TYPEOF(name_repair)) {
844 case STRSXP: {
845 if (!Rf_length(name_repair)) {
846 stop_name_repair();
847 }
848
849 SEXP c = r_chr_get(name_repair, 0);
850
851 if (c == strings_none) {
852 opts.type = name_repair_none;
853 } else if (c == strings_minimal) {
854 opts.type = name_repair_minimal;
855 } else if (c == strings_unique) {
856 opts.type = name_repair_unique;
857 } else if (c == strings_universal) {
858 opts.type = name_repair_universal;
859 } else if (c == strings_check_unique) {
860 opts.type = name_repair_check_unique;
861 } else {
862 Rf_errorcall(R_NilValue, "`.name_repair` can't be \"%s\". See `?vctrs::vec_as_names`.", CHAR(c));
863 }
864
865 return opts;
866 }
867
868 case LANGSXP:
869 opts.fn = r_as_function(name_repair, ".name_repair");
870 opts.type = name_repair_custom;
871 return opts;
872
873 case CLOSXP:
874 opts.fn = name_repair;
875 opts.type = name_repair_custom;
876 return opts;
877
878 default:
879 stop_name_repair();
880 }
881
882 never_reached("new_name_repair_opts");
883 }
884
885 // [[ include("vctrs.h") ]]
name_repair_arg_as_c_string(enum name_repair_type type)886 const char* name_repair_arg_as_c_string(enum name_repair_type type) {
887 switch (type) {
888 case name_repair_none: return "none";
889 case name_repair_minimal: return "minimal";
890 case name_repair_unique: return "unique";
891 case name_repair_universal: return "universal";
892 case name_repair_check_unique: return "check_unique";
893 case name_repair_custom: return "custom";
894 }
895 never_reached("name_repair_arg_as_c_string");
896 }
897
vec_validate_minimal_names(SEXP names,R_len_t n)898 static void vec_validate_minimal_names(SEXP names, R_len_t n) {
899 if (names == R_NilValue) {
900 Rf_errorcall(R_NilValue, "Names repair functions can't return `NULL`.");
901 }
902
903 if (TYPEOF(names) != STRSXP) {
904 Rf_errorcall(R_NilValue, "Names repair functions must return a character vector.");
905 }
906
907 if (n >= 0 && Rf_length(names) != n) {
908 Rf_errorcall(R_NilValue,
909 "Repaired names have length %d instead of length %d.",
910 Rf_length(names),
911 n);
912 }
913
914 if (r_chr_has_string(names, NA_STRING)) {
915 Rf_errorcall(R_NilValue, "Names repair functions can't return `NA` values.");
916 }
917 }
vctrs_validate_minimal_names(SEXP names,SEXP n_)918 SEXP vctrs_validate_minimal_names(SEXP names, SEXP n_) {
919 R_len_t n = -1;
920
921 if (TYPEOF(n_) == INTSXP) {
922 if (Rf_length(n_) != 1) {
923 stop_internal("vctrs_validate_minimal_names", "`n` must be a single number.");
924 }
925 n = INTEGER(n_)[0];
926 }
927
928 vec_validate_minimal_names(names, n);
929 return names;
930 }
931
932
933 struct name_repair_opts unique_repair_default_opts;
934 struct name_repair_opts unique_repair_silent_opts;
935
vctrs_init_names(SEXP ns)936 void vctrs_init_names(SEXP ns) {
937 syms_set_rownames_fallback = Rf_install("set_rownames_fallback");
938 syms_set_names_fallback = Rf_install("set_names_fallback");
939 syms_as_universal_names = Rf_install("as_universal_names");
940 syms_validate_unique_names = Rf_install("validate_unique");
941
942 fns_set_rownames_fallback = r_env_get(ns, syms_set_rownames_fallback);
943 fns_set_names_fallback = r_env_get(ns, syms_set_names_fallback);
944 fns_as_universal_names = r_env_get(ns, syms_as_universal_names);
945 fns_validate_unique_names = r_env_get(ns, syms_validate_unique_names);
946
947 syms_glue_as_name_spec = Rf_install("glue_as_name_spec");
948 fns_glue_as_name_spec = r_env_get(ns, syms_glue_as_name_spec);
949 syms_internal_spec = Rf_install("_spec");
950
951 unique_repair_default_opts.type = name_repair_unique;
952 unique_repair_default_opts.fn = R_NilValue;
953 unique_repair_default_opts.quiet = false;
954
955 unique_repair_silent_opts.type = name_repair_unique;
956 unique_repair_silent_opts.fn = R_NilValue;
957 unique_repair_silent_opts.quiet = true;
958 }
959