1 #ifndef RLANG_VECTOR_CHR_H
2 #define RLANG_VECTOR_CHR_H
3
4 #include <string.h>
5
6
7 #define r_missing_str R_NaString
8
9 extern sexp* r_shared_empty_chr;
10 extern sexp* r_empty_str;
11
12
r_chr_get(sexp * chr,r_ssize i)13 static inline sexp* r_chr_get(sexp* chr, r_ssize i) {
14 return STRING_ELT(chr, i);
15 }
r_chr_poke(sexp * chr,r_ssize i,sexp * elt)16 static inline void r_chr_poke(sexp* chr, r_ssize i, sexp* elt) {
17 SET_STRING_ELT(chr, i, elt);
18 }
19
r_str_deref(sexp * str)20 static inline const char* r_str_deref(sexp* str) {
21 return CHAR(str);
22 }
23
r_chr_get_c_string(sexp * chr,r_ssize i)24 static inline const char* r_chr_get_c_string(sexp* chr, r_ssize i) {
25 return CHAR(r_chr_get(chr, i));
26 }
27
r_nms_get(sexp * nms,r_ssize i)28 static inline sexp* r_nms_get(sexp* nms, r_ssize i) {
29 if (nms == r_null) {
30 return r_empty_str;
31 } else {
32 return r_chr_get(nms, i);
33 }
34 }
35
36 bool r_chr_has(sexp* chr, const char* c_string);
37 bool r_chr_has_any(sexp* chr, const char** c_strings);
38 r_ssize r_chr_detect_index(sexp* chr, const char* c_string);
39
40 void r_chr_fill(sexp* chr, sexp* value, r_ssize n);
41
42 sexp* r_new_character(const char** strings);
43
r_string(const char * c_string)44 static inline sexp* r_string(const char* c_string) {
45 return Rf_mkChar(c_string);
46 }
47
r_chr(const char * c_string)48 static inline sexp* r_chr(const char* c_string) {
49 return Rf_mkString(c_string);
50 }
51
52
53 sexp* chr_prepend(sexp* chr, sexp* r_string);
54 sexp* chr_append(sexp* chr, sexp* r_string);
55
56 sexp* r_nms_are_duplicated(sexp* nms, bool from_last);
57
58 sexp* r_str_unserialise_unicode(sexp* r_string);
59
r_is_string(sexp * x,const char * string)60 static inline bool r_is_string(sexp* x, const char* string) {
61 if (r_typeof(x) != r_type_character || r_length(x) != 1) {
62 return false;
63 }
64 if (string && strcmp(r_chr_get_c_string(x, 0), string) != 0) {
65 return false;
66 }
67 return true;
68 }
69
r_str_as_character(sexp * x)70 static inline sexp* r_str_as_character(sexp* x) {
71 return Rf_ScalarString(x);
72 }
73
74 /*
75 * A symbol is always in the native encoding. This means that UTF-8
76 * data frame names undergo a lossy translation when they are
77 * transformed to symbols to create a data mask. To deal with this, we
78 * translate all serialised unicode tags back to UTF-8. This way the
79 * UTF-8 -> native -> UTF-8 translation that occurs during the
80 * character -> symbol -> character conversion fundamental for data
81 * masking is transparent and lossless for the end user.
82 *
83 * Starting from R 4.0, `installChar()` warns when translation to
84 * native encoding is lossy. This warning is disruptive for us since
85 * we correctly translate strings behind the scene. To work around
86 * this, we call `translateChar()` which doesn't warn (at least
87 * currently). If the pointers are the same, no translation is
88 * needed and we can call `installChar()`, which preserves the
89 * current encoding of the string. Otherwise we intern the symbol
90 * with `install()` without encoding.
91 */
r_str_as_symbol(sexp * str)92 static inline sexp* r_str_as_symbol(sexp* str) {
93 const char* str_native = Rf_translateChar(str);
94
95 if (str_native == CHAR(str)) {
96 return Rf_installChar(str);
97 } else {
98 return Rf_install(str_native);
99 }
100 }
101
r_chr_as_symbol(sexp * str)102 static inline sexp* r_chr_as_symbol(sexp* str) {
103 return r_sym(Rf_translateChar(r_chr_get(str, 0)));
104 }
105
r_str_is_name(sexp * str)106 static inline bool r_str_is_name(sexp* str) {
107 if (str == r_missing_str) {
108 return false;
109 }
110 if (str == r_empty_str) {
111 return false;
112 }
113 return true;
114 }
115
116
117 #endif
118