1 #define STRICT_R_HEADERS
2 #define R_NO_REMAP
3 #include "Rinternals.h"
4 #include <stdlib.h>
5 #include <string.h>
6 
set(SEXP x,int i,SEXP val)7 SEXP set(SEXP x, int i, SEXP val) {
8   R_xlen_t len = Rf_xlength(x);
9   if (i >= len) {
10     len *= 2;
11     x = Rf_lengthgets(x, len);
12   }
13   SET_VECTOR_ELT(x, i, val);
14   return x;
15 }
16 
resize(SEXP out,R_xlen_t n)17 SEXP resize(SEXP out, R_xlen_t n) {
18   if (n == Rf_xlength(out)) {
19     return out;
20   }
21   return Rf_xlengthgets(out, n);
22 }
23 
glue_(SEXP x,SEXP f,SEXP open_arg,SEXP close_arg,SEXP comment_arg,SEXP literal_arg)24 SEXP glue_(
25     SEXP x,
26     SEXP f,
27     SEXP open_arg,
28     SEXP close_arg,
29     SEXP comment_arg,
30     SEXP literal_arg) {
31   typedef enum {
32     text,
33     escape,
34     single_quote,
35     double_quote,
36     backtick,
37     delim,
38     comment
39   } states;
40 
41   const char* xx = Rf_translateCharUTF8(STRING_ELT(x, 0));
42   size_t str_len = strlen(xx);
43 
44   char* str = (char*)malloc(str_len + 1);
45 
46   const char* open = CHAR(STRING_ELT(open_arg, 0));
47   size_t open_len = strlen(open);
48 
49   const char* close = CHAR(STRING_ELT(close_arg, 0));
50   size_t close_len = strlen(close);
51 
52   char comment_char = '\0';
53   if (Rf_xlength(comment_arg) > 0) {
54     comment_char = CHAR(STRING_ELT(comment_arg, 0))[0];
55   }
56 
57   Rboolean literal = LOGICAL(literal_arg)[0];
58 
59   int delim_equal = strncmp(open, close, open_len) == 0;
60 
61   SEXP out = Rf_allocVector(VECSXP, 1);
62   PROTECT_INDEX out_idx;
63   PROTECT_WITH_INDEX(out, &out_idx);
64 
65   size_t j = 0;
66   size_t k = 0;
67   int delim_level = 0;
68   size_t start = 0;
69   states state = text;
70   states prev_state = text;
71   size_t i = 0;
72   for (i = 0; i < str_len; ++i) {
73     switch (state) {
74     case text: {
75       if (strncmp(&xx[i], open, open_len) == 0) {
76         /* check for open delim doubled */
77         if (strncmp(&xx[i + open_len], open, open_len) == 0) {
78           i += open_len;
79         } else {
80           state = delim;
81           delim_level = 1;
82           start = i + open_len;
83           break;
84         }
85       }
86       if (strncmp(&xx[i], close, close_len) == 0 &&
87           strncmp(&xx[i + close_len], close, close_len) == 0) {
88         i += close_len;
89       }
90 
91       str[j++] = xx[i];
92       break;
93     }
94     case escape: {
95       state = prev_state;
96       break;
97     }
98     case single_quote: {
99       if (xx[i] == '\\') {
100         prev_state = single_quote;
101         state = escape;
102       } else if (xx[i] == '\'') {
103         state = delim;
104       }
105       break;
106     }
107     case double_quote: {
108       if (xx[i] == '\\') {
109         prev_state = double_quote;
110         state = escape;
111       } else if (xx[i] == '\"') {
112         state = delim;
113       }
114       break;
115     }
116     case backtick: {
117       if (xx[i] == '\\') {
118         prev_state = backtick;
119         state = escape;
120       } else if (xx[i] == '`') {
121         state = delim;
122       }
123       break;
124     }
125     case comment: {
126       if (xx[i] == '\n') {
127         state = delim;
128       }
129       break;
130     }
131     case delim: {
132       if (!delim_equal && strncmp(&xx[i], open, open_len) == 0) {
133         ++delim_level;
134         i += open_len - 1;
135       } else if (strncmp(&xx[i], close, close_len) == 0) {
136         --delim_level;
137         i += close_len - 1;
138       } else {
139         if (!literal && xx[i] == comment_char) {
140           state = comment;
141         } else {
142           switch (xx[i]) {
143           case '\'':
144             if (!literal) {
145               state = single_quote;
146             }
147             break;
148           case '"':
149             if (!literal) {
150               state = double_quote;
151             }
152             break;
153           case '`':
154             if (!literal) {
155               state = backtick;
156             }
157             break;
158           };
159         }
160       }
161       if (delim_level == 0) {
162         /* Result of the current glue statement */
163         SEXP expr = PROTECT(Rf_ScalarString(
164             Rf_mkCharLenCE(&xx[start], (i - close_len) + 1 - start, CE_UTF8)));
165         SEXP call = PROTECT(Rf_lang2(f, expr));
166         SEXP result = PROTECT(Rf_eval(call, R_EmptyEnv));
167 
168         /* text in between last glue statement */
169         if (j > 0) {
170           str[j] = '\0';
171           SEXP str_ = PROTECT(Rf_ScalarString(Rf_mkCharLenCE(str, j, CE_UTF8)));
172           REPROTECT(out = set(out, k++, str_), out_idx);
173           UNPROTECT(1);
174         }
175 
176         REPROTECT(out = set(out, k++, result), out_idx);
177 
178         /* Clear the string buffer */
179         memset(str, 0, j);
180         j = 0;
181         UNPROTECT(3);
182         state = text;
183       }
184       break;
185     }
186     };
187   }
188 
189   if (k == 0 || j > 0) {
190     str[j] = '\0';
191     SEXP str_ = PROTECT(Rf_ScalarString(Rf_mkCharLenCE(str, j, CE_UTF8)));
192     REPROTECT(out = set(out, k++, str_), out_idx);
193     UNPROTECT(1);
194   }
195 
196   if (state == delim) {
197     free(str);
198     Rf_error("Expecting '%s'", close);
199   } else if (state == single_quote) {
200     free(str);
201     Rf_error("Unterminated quote (')");
202   } else if (state == double_quote) {
203     free(str);
204     Rf_error("Unterminated quote (\")");
205   } else if (state == backtick) {
206     free(str);
207     Rf_error("Unterminated quote (`)");
208   } else if (state == comment) {
209     free(str);
210     Rf_error("Unterminated comment");
211   }
212 
213   free(str);
214 
215   out = resize(out, k);
216 
217   UNPROTECT(1);
218 
219   return out;
220 }
221