1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2013 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <glenn.s.fowler@gmail.com> *
18 * David Korn <dgkorn@gmail.com> *
19 * Phong Vo <phongvo@gmail.com> *
20 * *
21 ***********************************************************************/
22 /*
23 * Glenn Fowler
24 * AT&T Research
25 *
26 * return the next character in the string s
27 * \ character constants are expanded
28 * *p is updated to point to the next character in s
29 * *m is 1 if return value is wide
30 */
31 #include "config_ast.h" // IWYU pragma: keep
32
33 #include <ctype.h>
34 #include <limits.h>
35 #include <stdbool.h>
36 #include <stdint.h>
37 #include <wchar.h>
38
39 #include "ast.h"
40
chrexp(const char * s,char ** p,int * m,int flags)41 int chrexp(const char *s, char **p, int *m, int flags) {
42 const char *t;
43 int c;
44 const char *e;
45 const char *b;
46 char *r;
47 int n;
48 int x;
49 wchar_t d;
50 Mbstate_t q;
51 bool u;
52 bool w;
53
54 u = w = 0;
55 mbinit(&q);
56 for (;;) {
57 b = s;
58 c = mbchar(&d, (char **)&s, MB_LEN_MAX, &q);
59 switch (c) {
60 case 0:
61 s = b;
62 break;
63 case '\\':
64 b = s;
65 switch (c = *s++) {
66 case '0':
67 case '1':
68 case '2':
69 case '3':
70 case '4':
71 case '5':
72 case '6':
73 case '7':
74 if (!(flags & FMT_EXP_CHAR)) goto noexpand;
75 c -= '0';
76 t = s + 2;
77 while (s < t) {
78 switch (*s) {
79 case '0':
80 case '1':
81 case '2':
82 case '3':
83 case '4':
84 case '5':
85 case '6':
86 case '7':
87 c = (c << 3) + *s++ - '0';
88 break;
89 default:
90 t = s;
91 break;
92 }
93 }
94 break;
95 case 'a':
96 if (!(flags & FMT_EXP_CHAR)) goto noexpand;
97 c = CC_bel;
98 break;
99 case 'b':
100 if (!(flags & FMT_EXP_CHAR)) goto noexpand;
101 c = '\b';
102 break;
103 case 'c': /*DEPRECATED*/
104 case 'C':
105 if (!(flags & FMT_EXP_CHAR)) goto noexpand;
106 c = *s;
107 if (c) {
108 s++;
109 if (c == '\\') {
110 c = chrexp(s - 1, &r, 0, flags);
111 s = (const char *)r;
112 }
113 if (islower(c)) c = toupper(c);
114 c ^= 0x40;
115 }
116 break;
117 case 'e': /*DEPRECATED*/
118 case 'E':
119 if (!(flags & FMT_EXP_CHAR)) goto noexpand;
120 c = CC_esc;
121 break;
122 case 'f':
123 if (!(flags & FMT_EXP_CHAR)) goto noexpand;
124 c = '\f';
125 break;
126 case 'M':
127 if (!(flags & FMT_EXP_CHAR)) goto noexpand;
128 if (*s == '-') {
129 s++;
130 c = CC_esc;
131 }
132 break;
133 case 'n':
134 if (flags & FMT_EXP_NONL) continue;
135 if (!(flags & FMT_EXP_LINE)) goto noexpand;
136 c = '\n';
137 break;
138 case 'r':
139 if (flags & FMT_EXP_NOCR) continue;
140 if (!(flags & FMT_EXP_LINE)) goto noexpand;
141 c = '\r';
142 break;
143 case 't':
144 if (!(flags & FMT_EXP_CHAR)) goto noexpand;
145 c = '\t';
146 break;
147 case 'v':
148 if (!(flags & FMT_EXP_CHAR)) goto noexpand;
149 c = CC_vt;
150 break;
151 case 'u':
152 u = 1;
153 // FALLTHRU
154 case 'w':
155 t = s + 4;
156 goto wex;
157 case 'U':
158 u = 1;
159 // FALLTHRU
160 case 'W':
161 t = s + 8;
162 wex:
163 if (!(flags & FMT_EXP_WIDE)) goto noexpand;
164 w = 1;
165 goto hex;
166 case 'x':
167 t = s + 2;
168 hex:
169 e = s;
170 n = 0;
171 c = 0;
172 x = 0;
173 while (!e || !t || s < t) {
174 switch (*s) {
175 case 'a':
176 case 'b':
177 case 'c':
178 case 'd':
179 case 'e':
180 case 'f':
181 c = (c << 4) + *s++ - 'a' + 10;
182 n++;
183 continue;
184 case 'A':
185 case 'B':
186 case 'C':
187 case 'D':
188 case 'E':
189 case 'F':
190 c = (c << 4) + *s++ - 'A' + 10;
191 n++;
192 continue;
193 case '0':
194 case '1':
195 case '2':
196 case '3':
197 case '4':
198 case '5':
199 case '6':
200 case '7':
201 case '8':
202 case '9':
203 c = (c << 4) + *s++ - '0';
204 n++;
205 continue;
206 case '{':
207 case '[':
208 if (s != e) break;
209 e = 0;
210 s++;
211 if (w && (*s == 'U' || *s == 'W') && *(s + 1) == '+') s += 2;
212 continue;
213 case '-':
214 if (e) break;
215 if (*(s + 1) != '}' && *(s + 1) != ']') {
216 if (!*(s + 1) || (*(s + 2) != '}' && *(s + 2) != ']')) {
217 break;
218 }
219 x = *(unsigned char *)(s + 1);
220 s += 2;
221 } else {
222 x = -1;
223 s++;
224 }
225 /*FALLTHROUGH*/
226 case '}':
227 case ']':
228 if (!e) e = ++s;
229 break;
230 default:
231 break;
232 }
233 break;
234 }
235 if (e) {
236 if (n < 8 || (n == 8 && c >= 0)) {
237 if (!w) {
238 if (n > 2) {
239 if (!(flags & FMT_EXP_WIDE)) goto noexpand;
240 w = 1;
241 } else if (!(flags & FMT_EXP_CHAR)) {
242 goto noexpand;
243 } else {
244 break;
245 }
246 }
247 if (!mbwide()) w = 0;
248 if (c <= 0x7f) break;
249 if (u) {
250 uint32_t i = c;
251 wchar_t o;
252
253 if (!utf32invalid(i) && utf32stowcs(&o, &i, 1) > 0) {
254 c = o;
255 break;
256 }
257 } else if (w || c <= ast.byte_max) {
258 break;
259 }
260 }
261 if (x) {
262 c = x;
263 w = 0;
264 break;
265 }
266 }
267 /*FALLTHROUGH*/
268 case 0:
269 goto noexpand;
270 }
271 break;
272 default:
273 if ((s - b) > 1) w = 1;
274 break;
275 noexpand:
276 s = b;
277 w = 0;
278 c = '\\';
279 break;
280 }
281 break;
282 }
283 if (m) *m = w;
284 if (p) *p = (char *)s;
285 return c;
286 }
287
chresc(const char * s,char ** p)288 int chresc(const char *s, char **p) {
289 return chrexp(s, p, NULL, FMT_EXP_CHAR | FMT_EXP_LINE | FMT_EXP_WIDE);
290 }
291