1 #define STRICT_R_HEADERS
2 #define R_NO_REMAP
3 #include "Rinternals.h"
4 #include <stdlib.h>
5 #include <string.h>
6
set(SEXP x,int i,SEXP val)7 SEXP set(SEXP x, int i, SEXP val) {
8 R_xlen_t len = Rf_xlength(x);
9 if (i >= len) {
10 len *= 2;
11 x = Rf_lengthgets(x, len);
12 }
13 SET_VECTOR_ELT(x, i, val);
14 return x;
15 }
16
resize(SEXP out,R_xlen_t n)17 SEXP resize(SEXP out, R_xlen_t n) {
18 if (n == Rf_xlength(out)) {
19 return out;
20 }
21 return Rf_xlengthgets(out, n);
22 }
23
glue_(SEXP x,SEXP f,SEXP open_arg,SEXP close_arg,SEXP comment_arg,SEXP literal_arg)24 SEXP glue_(
25 SEXP x,
26 SEXP f,
27 SEXP open_arg,
28 SEXP close_arg,
29 SEXP comment_arg,
30 SEXP literal_arg) {
31 typedef enum {
32 text,
33 escape,
34 single_quote,
35 double_quote,
36 backtick,
37 delim,
38 comment
39 } states;
40
41 const char* xx = Rf_translateCharUTF8(STRING_ELT(x, 0));
42 size_t str_len = strlen(xx);
43
44 char* str = (char*)malloc(str_len + 1);
45
46 const char* open = CHAR(STRING_ELT(open_arg, 0));
47 size_t open_len = strlen(open);
48
49 const char* close = CHAR(STRING_ELT(close_arg, 0));
50 size_t close_len = strlen(close);
51
52 char comment_char = '\0';
53 if (Rf_xlength(comment_arg) > 0) {
54 comment_char = CHAR(STRING_ELT(comment_arg, 0))[0];
55 }
56
57 Rboolean literal = LOGICAL(literal_arg)[0];
58
59 int delim_equal = strncmp(open, close, open_len) == 0;
60
61 SEXP out = Rf_allocVector(VECSXP, 1);
62 PROTECT_INDEX out_idx;
63 PROTECT_WITH_INDEX(out, &out_idx);
64
65 size_t j = 0;
66 size_t k = 0;
67 int delim_level = 0;
68 size_t start = 0;
69 states state = text;
70 states prev_state = text;
71 size_t i = 0;
72 for (i = 0; i < str_len; ++i) {
73 switch (state) {
74 case text: {
75 if (strncmp(&xx[i], open, open_len) == 0) {
76 /* check for open delim doubled */
77 if (strncmp(&xx[i + open_len], open, open_len) == 0) {
78 i += open_len;
79 } else {
80 state = delim;
81 delim_level = 1;
82 start = i + open_len;
83 break;
84 }
85 }
86 if (strncmp(&xx[i], close, close_len) == 0 &&
87 strncmp(&xx[i + close_len], close, close_len) == 0) {
88 i += close_len;
89 }
90
91 str[j++] = xx[i];
92 break;
93 }
94 case escape: {
95 state = prev_state;
96 break;
97 }
98 case single_quote: {
99 if (xx[i] == '\\') {
100 prev_state = single_quote;
101 state = escape;
102 } else if (xx[i] == '\'') {
103 state = delim;
104 }
105 break;
106 }
107 case double_quote: {
108 if (xx[i] == '\\') {
109 prev_state = double_quote;
110 state = escape;
111 } else if (xx[i] == '\"') {
112 state = delim;
113 }
114 break;
115 }
116 case backtick: {
117 if (xx[i] == '\\') {
118 prev_state = backtick;
119 state = escape;
120 } else if (xx[i] == '`') {
121 state = delim;
122 }
123 break;
124 }
125 case comment: {
126 if (xx[i] == '\n') {
127 state = delim;
128 }
129 break;
130 }
131 case delim: {
132 if (!delim_equal && strncmp(&xx[i], open, open_len) == 0) {
133 ++delim_level;
134 i += open_len - 1;
135 } else if (strncmp(&xx[i], close, close_len) == 0) {
136 --delim_level;
137 i += close_len - 1;
138 } else {
139 if (!literal && xx[i] == comment_char) {
140 state = comment;
141 } else {
142 switch (xx[i]) {
143 case '\'':
144 if (!literal) {
145 state = single_quote;
146 }
147 break;
148 case '"':
149 if (!literal) {
150 state = double_quote;
151 }
152 break;
153 case '`':
154 if (!literal) {
155 state = backtick;
156 }
157 break;
158 };
159 }
160 }
161 if (delim_level == 0) {
162 /* Result of the current glue statement */
163 SEXP expr = PROTECT(Rf_ScalarString(
164 Rf_mkCharLenCE(&xx[start], (i - close_len) + 1 - start, CE_UTF8)));
165 SEXP call = PROTECT(Rf_lang2(f, expr));
166 SEXP result = PROTECT(Rf_eval(call, R_EmptyEnv));
167
168 /* text in between last glue statement */
169 if (j > 0) {
170 str[j] = '\0';
171 SEXP str_ = PROTECT(Rf_ScalarString(Rf_mkCharLenCE(str, j, CE_UTF8)));
172 REPROTECT(out = set(out, k++, str_), out_idx);
173 UNPROTECT(1);
174 }
175
176 REPROTECT(out = set(out, k++, result), out_idx);
177
178 /* Clear the string buffer */
179 memset(str, 0, j);
180 j = 0;
181 UNPROTECT(3);
182 state = text;
183 }
184 break;
185 }
186 };
187 }
188
189 if (k == 0 || j > 0) {
190 str[j] = '\0';
191 SEXP str_ = PROTECT(Rf_ScalarString(Rf_mkCharLenCE(str, j, CE_UTF8)));
192 REPROTECT(out = set(out, k++, str_), out_idx);
193 UNPROTECT(1);
194 }
195
196 if (state == delim) {
197 free(str);
198 Rf_error("Expecting '%s'", close);
199 } else if (state == single_quote) {
200 free(str);
201 Rf_error("Unterminated quote (')");
202 } else if (state == double_quote) {
203 free(str);
204 Rf_error("Unterminated quote (\")");
205 } else if (state == backtick) {
206 free(str);
207 Rf_error("Unterminated quote (`)");
208 } else if (state == comment) {
209 free(str);
210 Rf_error("Unterminated comment");
211 }
212
213 free(str);
214
215 out = resize(out, k);
216
217 UNPROTECT(1);
218
219 return out;
220 }
221