1 /*
2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14 * PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include <isc/regex.h>
18 #include <isc/types.h>
19 #include <string.h>
20
21 /*
22 * Validate the regular expression 'C' locale.
23 */
24 int
isc_regex_validate(const char * c)25 isc_regex_validate(const char *c) {
26 enum {
27 none, parse_bracket, parse_bound,
28 parse_ce, parse_ec, parse_cc
29 } state = none;
30 /* Well known character classes. */
31 const char *cc[] = {
32 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
33 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
34 ":print:", ":xdigit:"
35 };
36 int seen_comma = 0;
37 int seen_high = 0;
38 int seen_char = 0;
39 int seen_ec = 0;
40 int seen_ce = 0;
41 int have_atom = 0;
42 int group = 0;
43 int range = 0;
44 int sub = 0;
45 int empty_ok = 0;
46 int neg = 0;
47 int was_multiple = 0;
48 unsigned int low = 0;
49 unsigned int high = 0;
50 const char *ccname = NULL;
51 int range_start = 0;
52
53 if (c == NULL || *c == 0)
54 return(-1);
55
56 while (c != NULL && *c != 0) {
57 switch (state) {
58 case none:
59 switch (*c) {
60 case '\\': /* make literal */
61 ++c;
62 switch (*c) {
63 case '1': case '2': case '3':
64 case '4': case '5': case '6':
65 case '7': case '8': case '9':
66 if ((*c - '0') > sub)
67 return(-1);
68 have_atom = 1;
69 was_multiple = 0;
70 break;
71 case 0:
72 return(-1);
73 default:
74 goto literal;
75 }
76 ++c;
77 break;
78 case '[': /* bracket start */
79 ++c;
80 neg = 0;
81 was_multiple = 0;
82 seen_char = 0;
83 state = parse_bracket;
84 break;
85 case '{': /* bound start */
86 switch (c[1]) {
87 case '0': case '1': case '2': case '3':
88 case '4': case '5': case '6': case '7':
89 case '8': case '9':
90 if (!have_atom)
91 return(-1);
92 if (was_multiple)
93 return(-1);
94 seen_comma = 0;
95 seen_high = 0;
96 low = high = 0;
97 state = parse_bound;
98 break;
99 default:
100 goto literal;
101 }
102 ++c;
103 have_atom = 1;
104 was_multiple = 1;
105 break;
106 case '}':
107 goto literal;
108 case '(': /* group start */
109 have_atom = 0;
110 was_multiple = 0;
111 empty_ok = 1;
112 ++group;
113 ++sub;
114 ++c;
115 break;
116 case ')': /* group end */
117 if (group && !have_atom && !empty_ok)
118 return(-1);
119 have_atom = 1;
120 was_multiple = 0;
121 if (group != 0)
122 --group;
123 ++c;
124 break;
125 case '|': /* alternative separator */
126 if (!have_atom)
127 return(-1);
128 have_atom = 0;
129 empty_ok = 0;
130 was_multiple = 0;
131 ++c;
132 break;
133 case '^':
134 case '$':
135 have_atom = 1;
136 was_multiple = 1;
137 ++c;
138 break;
139 case '+':
140 case '*':
141 case '?':
142 if (was_multiple)
143 return(-1);
144 if (!have_atom)
145 return(-1);
146 have_atom = 1;
147 was_multiple = 1;
148 ++c;
149 break;
150 case '.':
151 default:
152 literal:
153 have_atom = 1;
154 was_multiple = 0;
155 ++c;
156 break;
157 }
158 break;
159 case parse_bound:
160 switch (*c) {
161 case '0': case '1': case '2': case '3': case '4':
162 case '5': case '6': case '7': case '8': case '9':
163 if (!seen_comma) {
164 low = low * 10 + *c - '0';
165 if (low > 255)
166 return(-1);
167 } else {
168 seen_high = 1;
169 high = high * 10 + *c - '0';
170 if (high > 255)
171 return(-1);
172 }
173 ++c;
174 break;
175 case ',':
176 if (seen_comma)
177 return(-1);
178 seen_comma = 1;
179 ++c;
180 break;
181 default:
182 case '{':
183 return(-1);
184 case '}':
185 if (seen_high && low > high)
186 return(-1);
187 seen_comma = 0;
188 state = none;
189 ++c;
190 break;
191 }
192 break;
193 case parse_bracket:
194 switch (*c) {
195 case '^':
196 if (seen_char || neg) goto inside;
197 neg = 1;
198 ++c;
199 break;
200 case '-':
201 if (range == 2) goto inside;
202 if (!seen_char) goto inside;
203 if (range == 1)
204 return(-1);
205 range = 2;
206 ++c;
207 break;
208 case '[':
209 ++c;
210 switch (*c) {
211 case '.': /* collating element */
212 if (range != 0) --range;
213 ++c;
214 state = parse_ce;
215 seen_ce = 0;
216 break;
217 case '=': /* equivalence class */
218 if (range == 2)
219 return(-1);
220 ++c;
221 state = parse_ec;
222 seen_ec = 0;
223 break;
224 case ':': /* character class */
225 if (range == 2)
226 return(-1);
227 ccname = c;
228 ++c;
229 state = parse_cc;
230 break;
231 }
232 seen_char = 1;
233 break;
234 case ']':
235 if (!c[1] && !seen_char)
236 return(-1);
237 if (!seen_char)
238 goto inside;
239 ++c;
240 range = 0;
241 have_atom = 1;
242 state = none;
243 break;
244 default:
245 inside:
246 seen_char = 1;
247 if (range == 2 && (*c & 0xff) < range_start)
248 return(-1);
249 if (range != 0)
250 --range;
251 range_start = *c & 0xff;
252 ++c;
253 break;
254 };
255 break;
256 case parse_ce:
257 switch (*c) {
258 case '.':
259 ++c;
260 switch (*c) {
261 case ']':
262 if (!seen_ce)
263 return(-1);
264 ++c;
265 state = parse_bracket;
266 break;
267 default:
268 if (seen_ce)
269 range_start = 256;
270 else
271 range_start = '.';
272 seen_ce = 1;
273 break;
274 }
275 break;
276 default:
277 if (seen_ce)
278 range_start = 256;
279 else
280 range_start = *c;
281 seen_ce = 1;
282 ++c;
283 break;
284 }
285 break;
286 case parse_ec:
287 switch (*c) {
288 case '=':
289 ++c;
290 switch (*c) {
291 case ']':
292 if (!seen_ec)
293 return(-1);
294 ++c;
295 state = parse_bracket;
296 break;
297 default:
298 seen_ec = 1;
299 break;
300 }
301 break;
302 default:
303 seen_ec = 1;
304 ++c;
305 break;
306 }
307 break;
308 case parse_cc:
309 switch (*c) {
310 case ':':
311 ++c;
312 switch (*c) {
313 case ']': {
314 unsigned int i;
315 int found = 0;
316 for (i = 0;
317 i < sizeof(cc)/sizeof(*cc);
318 i++)
319 {
320 unsigned int len;
321 len = strlen(cc[i]);
322 if (len !=
323 (unsigned int)(c - ccname))
324 continue;
325 if (strncmp(cc[i], ccname, len))
326 continue;
327 found = 1;
328 }
329 if (!found)
330 return(-1);
331 ++c;
332 state = parse_bracket;
333 break;
334 }
335 default:
336 break;
337 }
338 break;
339 default:
340 ++c;
341 break;
342 }
343 break;
344 }
345 }
346 if (group != 0)
347 return(-1);
348 if (state != none)
349 return(-1);
350 if (!have_atom)
351 return(-1);
352 return (sub);
353 }
354