1 /*
2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3 *
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
7 *
8 * See the COPYRIGHT file distributed with this work for additional
9 * information regarding copyright ownership.
10 */
11
12 #include <config.h>
13
14 #include <stdbool.h>
15
16 #include <isc/file.h>
17 #include <isc/print.h>
18 #include <isc/regex.h>
19 #include <isc/string.h>
20
21 #if VALREGEX_REPORT_REASON
22 #define FAIL(x) do { reason = (x); goto error; } while(0)
23 #else
24 #define FAIL(x) goto error
25 #endif
26
27 /*
28 * Validate the regular expression 'C' locale.
29 */
30 int
isc_regex_validate(const char * c)31 isc_regex_validate(const char *c) {
32 enum {
33 none, parse_bracket, parse_bound,
34 parse_ce, parse_ec, parse_cc
35 } state = none;
36 /* Well known character classes. */
37 const char *cc[] = {
38 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
39 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
40 ":print:", ":xdigit:"
41 };
42 bool seen_comma = false;
43 bool seen_high = false;
44 bool seen_char = false;
45 bool seen_ec = false;
46 bool seen_ce = false;
47 bool have_atom = false;
48 int group = 0;
49 int range = 0;
50 int sub = 0;
51 bool empty_ok = false;
52 bool neg = false;
53 bool was_multiple = false;
54 unsigned int low = 0;
55 unsigned int high = 0;
56 const char *ccname = NULL;
57 int range_start = 0;
58 #if VALREGEX_REPORT_REASON
59 const char *reason = "";
60 #endif
61
62 if (c == NULL || *c == 0)
63 FAIL("empty string");
64
65 while (c != NULL && *c != 0) {
66 switch (state) {
67 case none:
68 switch (*c) {
69 case '\\': /* make literal */
70 ++c;
71 switch (*c) {
72 case '1': case '2': case '3':
73 case '4': case '5': case '6':
74 case '7': case '8': case '9':
75 if ((*c - '0') > sub)
76 FAIL("bad back reference");
77 have_atom = true;
78 was_multiple = false;
79 break;
80 case 0:
81 FAIL("escaped end-of-string");
82 default:
83 goto literal;
84 }
85 ++c;
86 break;
87 case '[': /* bracket start */
88 ++c;
89 neg = false;
90 was_multiple = false;
91 seen_char = false;
92 state = parse_bracket;
93 break;
94 case '{': /* bound start */
95 switch (c[1]) {
96 case '0': case '1': case '2': case '3':
97 case '4': case '5': case '6': case '7':
98 case '8': case '9':
99 if (!have_atom)
100 FAIL("no atom");
101 if (was_multiple)
102 FAIL("was multiple");
103 seen_comma = false;
104 seen_high = false;
105 low = high = 0;
106 state = parse_bound;
107 break;
108 default:
109 goto literal;
110 }
111 ++c;
112 have_atom = true;
113 was_multiple = true;
114 break;
115 case '}':
116 goto literal;
117 case '(': /* group start */
118 have_atom = false;
119 was_multiple = false;
120 empty_ok = true;
121 ++group;
122 ++sub;
123 ++c;
124 break;
125 case ')': /* group end */
126 if (group && !have_atom && !empty_ok)
127 FAIL("empty alternative");
128 have_atom = true;
129 was_multiple = false;
130 if (group != 0)
131 --group;
132 ++c;
133 break;
134 case '|': /* alternative separator */
135 if (!have_atom)
136 FAIL("no atom");
137 have_atom = false;
138 empty_ok = false;
139 was_multiple = false;
140 ++c;
141 break;
142 case '^':
143 case '$':
144 have_atom = true;
145 was_multiple = true;
146 ++c;
147 break;
148 case '+':
149 case '*':
150 case '?':
151 if (was_multiple)
152 FAIL("was multiple");
153 if (!have_atom)
154 FAIL("no atom");
155 have_atom = true;
156 was_multiple = true;
157 ++c;
158 break;
159 case '.':
160 default:
161 literal:
162 have_atom = true;
163 was_multiple = false;
164 ++c;
165 break;
166 }
167 break;
168 case parse_bound:
169 switch (*c) {
170 case '0': case '1': case '2': case '3': case '4':
171 case '5': case '6': case '7': case '8': case '9':
172 if (!seen_comma) {
173 low = low * 10 + *c - '0';
174 if (low > 255)
175 FAIL("lower bound too big");
176 } else {
177 seen_high = true;
178 high = high * 10 + *c - '0';
179 if (high > 255)
180 FAIL("upper bound too big");
181 }
182 ++c;
183 break;
184 case ',':
185 if (seen_comma)
186 FAIL("multiple commas");
187 seen_comma = true;
188 ++c;
189 break;
190 default:
191 case '{':
192 FAIL("non digit/comma");
193 case '}':
194 if (seen_high && low > high)
195 FAIL("bad parse bound");
196 seen_comma = false;
197 state = none;
198 ++c;
199 break;
200 }
201 break;
202 case parse_bracket:
203 switch (*c) {
204 case '^':
205 if (seen_char || neg) goto inside;
206 neg = true;
207 ++c;
208 break;
209 case '-':
210 if (range == 2) goto inside;
211 if (!seen_char) goto inside;
212 if (range == 1)
213 FAIL("bad range");
214 range = 2;
215 ++c;
216 break;
217 case '[':
218 ++c;
219 switch (*c) {
220 case '.': /* collating element */
221 if (range != 0) --range;
222 ++c;
223 state = parse_ce;
224 seen_ce = false;
225 break;
226 case '=': /* equivalence class */
227 if (range == 2)
228 FAIL("equivalence class in range");
229 ++c;
230 state = parse_ec;
231 seen_ec = false;
232 break;
233 case ':': /* character class */
234 if (range == 2)
235 FAIL("character class in range");
236 ccname = c;
237 ++c;
238 state = parse_cc;
239 break;
240 }
241 seen_char = true;
242 break;
243 case ']':
244 if (!c[1] && !seen_char)
245 FAIL("unfinished brace");
246 if (!seen_char)
247 goto inside;
248 ++c;
249 range = 0;
250 have_atom = true;
251 state = none;
252 break;
253 default:
254 inside:
255 seen_char = true;
256 if (range == 2 && (*c & 0xff) < range_start)
257 FAIL("out of order range");
258 if (range != 0)
259 --range;
260 range_start = *c & 0xff;
261 ++c;
262 break;
263 };
264 break;
265 case parse_ce:
266 switch (*c) {
267 case '.':
268 ++c;
269 switch (*c) {
270 case ']':
271 if (!seen_ce)
272 FAIL("empty ce");
273 ++c;
274 state = parse_bracket;
275 break;
276 default:
277 if (seen_ce)
278 range_start = 256;
279 else
280 range_start = '.';
281 seen_ce = true;
282 break;
283 }
284 break;
285 default:
286 if (seen_ce)
287 range_start = 256;
288 else
289 range_start = *c;
290 seen_ce = true;
291 ++c;
292 break;
293 }
294 break;
295 case parse_ec:
296 switch (*c) {
297 case '=':
298 ++c;
299 switch (*c) {
300 case ']':
301 if (!seen_ec)
302 FAIL("no ec");
303 ++c;
304 state = parse_bracket;
305 break;
306 default:
307 seen_ec = true;
308 break;
309 }
310 break;
311 default:
312 seen_ec = true;
313 ++c;
314 break;
315 }
316 break;
317 case parse_cc:
318 switch (*c) {
319 case ':':
320 ++c;
321 switch (*c) {
322 case ']': {
323 unsigned int i;
324 bool found = false;
325 for (i = 0;
326 i < sizeof(cc)/sizeof(*cc);
327 i++)
328 {
329 unsigned int len;
330 len = strlen(cc[i]);
331 if (len !=
332 (unsigned int)(c - ccname))
333 continue;
334 if (strncmp(cc[i], ccname, len))
335 continue;
336 found = true;
337 }
338 if (!found)
339 FAIL("unknown cc");
340 ++c;
341 state = parse_bracket;
342 break;
343 }
344 default:
345 break;
346 }
347 break;
348 default:
349 ++c;
350 break;
351 }
352 break;
353 }
354 }
355 if (group != 0)
356 FAIL("group open");
357 if (state != none)
358 FAIL("incomplete");
359 if (!have_atom)
360 FAIL("no atom");
361 return (sub);
362
363 error:
364 #if VALREGEX_REPORT_REASON
365 fprintf(stderr, "%s\n", reason);
366 #endif
367 return (-1);
368 }
369