1 #ifndef RLANG_VECTOR_CHR_H
2 #define RLANG_VECTOR_CHR_H
3 
4 #include <string.h>
5 
6 
7 #define r_missing_str R_NaString
8 
9 extern sexp* r_shared_empty_chr;
10 extern sexp* r_empty_str;
11 
12 
r_chr_get(sexp * chr,r_ssize i)13 static inline sexp* r_chr_get(sexp* chr, r_ssize i) {
14   return STRING_ELT(chr, i);
15 }
r_chr_poke(sexp * chr,r_ssize i,sexp * elt)16 static inline void r_chr_poke(sexp* chr, r_ssize i, sexp* elt) {
17   SET_STRING_ELT(chr, i, elt);
18 }
19 
r_str_deref(sexp * str)20 static inline const char* r_str_deref(sexp* str) {
21   return CHAR(str);
22 }
23 
r_chr_get_c_string(sexp * chr,r_ssize i)24 static inline const char* r_chr_get_c_string(sexp* chr, r_ssize i) {
25   return CHAR(r_chr_get(chr, i));
26 }
27 
r_nms_get(sexp * nms,r_ssize i)28 static inline sexp* r_nms_get(sexp* nms, r_ssize i) {
29   if (nms == r_null) {
30     return r_empty_str;
31   } else {
32     return r_chr_get(nms, i);
33   }
34 }
35 
36 bool r_chr_has(sexp* chr, const char* c_string);
37 bool r_chr_has_any(sexp* chr, const char** c_strings);
38 r_ssize r_chr_detect_index(sexp* chr, const char* c_string);
39 
40 void r_chr_fill(sexp* chr, sexp* value, r_ssize n);
41 
42 sexp* r_new_character(const char** strings);
43 
r_string(const char * c_string)44 static inline sexp* r_string(const char* c_string) {
45   return Rf_mkChar(c_string);
46 }
47 
r_chr(const char * c_string)48 static inline sexp* r_chr(const char* c_string) {
49   return Rf_mkString(c_string);
50 }
51 
52 
53 sexp* chr_prepend(sexp* chr, sexp* r_string);
54 sexp* chr_append(sexp* chr, sexp* r_string);
55 
56 sexp* r_nms_are_duplicated(sexp* nms, bool from_last);
57 
58 sexp* r_str_unserialise_unicode(sexp* r_string);
59 
r_is_string(sexp * x,const char * string)60 static inline bool r_is_string(sexp* x, const char* string) {
61   if (r_typeof(x) != r_type_character || r_length(x) != 1) {
62     return false;
63   }
64   if (string && strcmp(r_chr_get_c_string(x, 0), string) != 0) {
65     return false;
66   }
67   return true;
68 }
69 
r_str_as_character(sexp * x)70 static inline sexp* r_str_as_character(sexp* x) {
71   return Rf_ScalarString(x);
72 }
73 
74 /*
75  * A symbol is always in the native encoding. This means that UTF-8
76  * data frame names undergo a lossy translation when they are
77  * transformed to symbols to create a data mask. To deal with this, we
78  * translate all serialised unicode tags back to UTF-8. This way the
79  * UTF-8 -> native -> UTF-8 translation that occurs during the
80  * character -> symbol -> character conversion fundamental for data
81  * masking is transparent and lossless for the end user.
82  *
83  * Starting from R 4.0, `installChar()` warns when translation to
84  * native encoding is lossy. This warning is disruptive for us since
85  * we correctly translate strings behind the scene. To work around
86  * this, we call `translateChar()` which doesn't warn (at least
87  * currently). If the pointers are the same, no translation is
88  * needed and we can call `installChar()`, which preserves the
89  * current encoding of the string. Otherwise we intern the symbol
90  * with `install()` without encoding.
91  */
r_str_as_symbol(sexp * str)92 static inline sexp* r_str_as_symbol(sexp* str) {
93   const char* str_native = Rf_translateChar(str);
94 
95   if (str_native == CHAR(str)) {
96     return Rf_installChar(str);
97   } else {
98     return Rf_install(str_native);
99   }
100 }
101 
r_chr_as_symbol(sexp * str)102 static inline sexp* r_chr_as_symbol(sexp* str) {
103   return r_sym(Rf_translateChar(r_chr_get(str, 0)));
104 }
105 
r_str_is_name(sexp * str)106 static inline bool r_str_is_name(sexp* str) {
107   if (str == r_missing_str) {
108     return false;
109   }
110   if (str == r_empty_str) {
111     return false;
112   }
113   return true;
114 }
115 
116 
117 #endif
118