1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 #include <isc/regex.h> 18 #include <isc/types.h> 19 #include <string.h> 20 21 /* 22 * Validate the regular expression 'C' locale. 23 */ 24 int 25 isc_regex_validate(const char *c) { 26 enum { 27 none, parse_bracket, parse_bound, 28 parse_ce, parse_ec, parse_cc 29 } state = none; 30 /* Well known character classes. */ 31 const char *cc[] = { 32 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:", 33 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:", 34 ":print:", ":xdigit:" 35 }; 36 int seen_comma = 0; 37 int seen_high = 0; 38 int seen_char = 0; 39 int seen_ec = 0; 40 int seen_ce = 0; 41 int have_atom = 0; 42 int group = 0; 43 int range = 0; 44 int sub = 0; 45 int empty_ok = 0; 46 int neg = 0; 47 int was_multiple = 0; 48 unsigned int low = 0; 49 unsigned int high = 0; 50 const char *ccname = NULL; 51 int range_start = 0; 52 53 if (c == NULL || *c == 0) 54 return(-1); 55 56 while (c != NULL && *c != 0) { 57 switch (state) { 58 case none: 59 switch (*c) { 60 case '\\': /* make literal */ 61 ++c; 62 switch (*c) { 63 case '1': case '2': case '3': 64 case '4': case '5': case '6': 65 case '7': case '8': case '9': 66 if ((*c - '0') > sub) 67 return(-1); 68 have_atom = 1; 69 was_multiple = 0; 70 break; 71 case 0: 72 return(-1); 73 default: 74 goto literal; 75 } 76 ++c; 77 break; 78 case '[': /* bracket start */ 79 ++c; 80 neg = 0; 81 was_multiple = 0; 82 seen_char = 0; 83 state = parse_bracket; 84 break; 85 case '{': /* bound start */ 86 switch (c[1]) { 87 case '0': case '1': case '2': case '3': 88 case '4': case '5': case '6': case '7': 89 case '8': case '9': 90 if (!have_atom) 91 return(-1); 92 if (was_multiple) 93 return(-1); 94 seen_comma = 0; 95 seen_high = 0; 96 low = high = 0; 97 state = parse_bound; 98 break; 99 default: 100 goto literal; 101 } 102 ++c; 103 have_atom = 1; 104 was_multiple = 1; 105 break; 106 case '}': 107 goto literal; 108 case '(': /* group start */ 109 have_atom = 0; 110 was_multiple = 0; 111 empty_ok = 1; 112 ++group; 113 ++sub; 114 ++c; 115 break; 116 case ')': /* group end */ 117 if (group && !have_atom && !empty_ok) 118 return(-1); 119 have_atom = 1; 120 was_multiple = 0; 121 if (group != 0) 122 --group; 123 ++c; 124 break; 125 case '|': /* alternative seperator */ 126 if (!have_atom) 127 return(-1); 128 have_atom = 0; 129 empty_ok = 0; 130 was_multiple = 0; 131 ++c; 132 break; 133 case '^': 134 case '$': 135 have_atom = 1; 136 was_multiple = 1; 137 ++c; 138 break; 139 case '+': 140 case '*': 141 case '?': 142 if (was_multiple) 143 return(-1); 144 if (!have_atom) 145 return(-1); 146 have_atom = 1; 147 was_multiple = 1; 148 ++c; 149 break; 150 case '.': 151 default: 152 literal: 153 have_atom = 1; 154 was_multiple = 0; 155 ++c; 156 break; 157 } 158 break; 159 case parse_bound: 160 switch (*c) { 161 case '0': case '1': case '2': case '3': case '4': 162 case '5': case '6': case '7': case '8': case '9': 163 if (!seen_comma) { 164 low = low * 10 + *c - '0'; 165 if (low > 255) 166 return(-1); 167 } else { 168 seen_high = 1; 169 high = high * 10 + *c - '0'; 170 if (high > 255) 171 return(-1); 172 } 173 ++c; 174 break; 175 case ',': 176 if (seen_comma) 177 return(-1); 178 seen_comma = 1; 179 ++c; 180 break; 181 default: 182 case '{': 183 return(-1); 184 case '}': 185 if (seen_high && low > high) 186 return(-1); 187 seen_comma = 0; 188 state = none; 189 ++c; 190 break; 191 } 192 break; 193 case parse_bracket: 194 switch (*c) { 195 case '^': 196 if (seen_char || neg) goto inside; 197 neg = 1; 198 ++c; 199 break; 200 case '-': 201 if (range == 2) goto inside; 202 if (!seen_char) goto inside; 203 if (range == 1) 204 return(-1); 205 range = 2; 206 ++c; 207 break; 208 case '[': 209 ++c; 210 switch (*c) { 211 case '.': /* collating element */ 212 if (range != 0) --range; 213 ++c; 214 state = parse_ce; 215 seen_ce = 0; 216 break; 217 case '=': /* equivalence class */ 218 if (range == 2) 219 return(-1); 220 ++c; 221 state = parse_ec; 222 seen_ec = 0; 223 break; 224 case ':': /* character class */ 225 if (range == 2) 226 return(-1); 227 ccname = c; 228 ++c; 229 state = parse_cc; 230 break; 231 } 232 seen_char = 1; 233 break; 234 case ']': 235 if (!c[1] && !seen_char) 236 return(-1); 237 if (!seen_char) 238 goto inside; 239 ++c; 240 range = 0; 241 have_atom = 1; 242 state = none; 243 break; 244 default: 245 inside: 246 seen_char = 1; 247 if (range == 2 && (*c & 0xff) < range_start) 248 return(-1); 249 if (range != 0) 250 --range; 251 range_start = *c & 0xff; 252 ++c; 253 break; 254 }; 255 break; 256 case parse_ce: 257 switch (*c) { 258 case '.': 259 ++c; 260 switch (*c) { 261 case ']': 262 if (!seen_ce) 263 return(-1); 264 ++c; 265 state = parse_bracket; 266 break; 267 default: 268 if (seen_ce) 269 range_start = 256; 270 else 271 range_start = '.'; 272 seen_ce = 1; 273 break; 274 } 275 break; 276 default: 277 if (seen_ce) 278 range_start = 256; 279 else 280 range_start = *c; 281 seen_ce = 1; 282 ++c; 283 break; 284 } 285 break; 286 case parse_ec: 287 switch (*c) { 288 case '=': 289 ++c; 290 switch (*c) { 291 case ']': 292 if (!seen_ec) 293 return(-1); 294 ++c; 295 state = parse_bracket; 296 break; 297 default: 298 seen_ec = 1; 299 break; 300 } 301 break; 302 default: 303 seen_ec = 1; 304 ++c; 305 break; 306 } 307 break; 308 case parse_cc: 309 switch (*c) { 310 case ':': 311 ++c; 312 switch (*c) { 313 case ']': { 314 unsigned int i; 315 int found = 0; 316 for (i = 0; 317 i < sizeof(cc)/sizeof(*cc); 318 i++) 319 { 320 unsigned int len; 321 len = strlen(cc[i]); 322 if (len != 323 (unsigned int)(c - ccname)) 324 continue; 325 if (strncmp(cc[i], ccname, len)) 326 continue; 327 found = 1; 328 } 329 if (!found) 330 return(-1); 331 ++c; 332 state = parse_bracket; 333 break; 334 } 335 default: 336 break; 337 } 338 break; 339 default: 340 ++c; 341 break; 342 } 343 break; 344 } 345 } 346 if (group != 0) 347 return(-1); 348 if (state != none) 349 return(-1); 350 if (!have_atom) 351 return(-1); 352 return (sub); 353 } 354