1 /* $NetBSD: parse_rx.c,v 1.1.1.1 2008/12/22 00:18:37 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. 5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 6 * 7 * This file is part of the device-mapper userspace tools. 8 * 9 * This copyrighted material is made available to anyone wishing to use, 10 * modify, copy, or redistribute it subject to the terms and conditions 11 * of the GNU Lesser General Public License v.2.1. 12 * 13 * You should have received a copy of the GNU Lesser General Public License 14 * along with this program; if not, write to the Free Software Foundation, 15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 #include "dmlib.h" 19 #include "parse_rx.h" 20 21 struct parse_sp { /* scratch pad for the parsing process */ 22 struct dm_pool *mem; 23 int type; /* token type, 0 indicates a charset */ 24 dm_bitset_t charset; /* The current charset */ 25 const char *cursor; /* where we are in the regex */ 26 const char *rx_end; /* 1pte for the expression being parsed */ 27 }; 28 29 static struct rx_node *_or_term(struct parse_sp *ps); 30 31 static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr) 32 { 33 ps->type = 0; 34 ps->cursor = ptr + 1; 35 dm_bit_clear_all(ps->charset); 36 dm_bit_set(ps->charset, c); 37 } 38 39 /* 40 * Get the next token from the regular expression. 41 * Returns: 1 success, 0 end of input, -1 error. 42 */ 43 static int _rx_get_token(struct parse_sp *ps) 44 { 45 int neg = 0, range = 0; 46 char c, lc = 0; 47 const char *ptr = ps->cursor; 48 if (ptr == ps->rx_end) { /* end of input ? */ 49 ps->type = -1; 50 return 0; 51 } 52 53 switch (*ptr) { 54 /* charsets and ncharsets */ 55 case '[': 56 ptr++; 57 if (*ptr == '^') { 58 dm_bit_set_all(ps->charset); 59 60 /* never transition on zero */ 61 dm_bit_clear(ps->charset, 0); 62 neg = 1; 63 ptr++; 64 65 } else 66 dm_bit_clear_all(ps->charset); 67 68 while ((ptr < ps->rx_end) && (*ptr != ']')) { 69 if (*ptr == '\\') { 70 /* an escaped character */ 71 ptr++; 72 switch (*ptr) { 73 case 'n': 74 c = '\n'; 75 break; 76 case 'r': 77 c = '\r'; 78 break; 79 case 't': 80 c = '\t'; 81 break; 82 default: 83 c = *ptr; 84 } 85 } else if (*ptr == '-' && lc) { 86 /* we've got a range on our hands */ 87 range = 1; 88 ptr++; 89 if (ptr == ps->rx_end) { 90 log_error("Incomplete range" 91 "specification"); 92 return -1; 93 } 94 c = *ptr; 95 } else 96 c = *ptr; 97 98 if (range) { 99 /* add lc - c into the bitset */ 100 if (lc > c) { 101 char tmp = c; 102 c = lc; 103 lc = tmp; 104 } 105 106 for (; lc <= c; lc++) { 107 if (neg) 108 dm_bit_clear(ps->charset, lc); 109 else 110 dm_bit_set(ps->charset, lc); 111 } 112 range = 0; 113 } else { 114 /* add c into the bitset */ 115 if (neg) 116 dm_bit_clear(ps->charset, c); 117 else 118 dm_bit_set(ps->charset, c); 119 } 120 ptr++; 121 lc = c; 122 } 123 124 if (ptr >= ps->rx_end) { 125 ps->type = -1; 126 return -1; 127 } 128 129 ps->type = 0; 130 ps->cursor = ptr + 1; 131 break; 132 133 /* These characters are special, we just return their ASCII 134 codes as the type. Sorted into ascending order to help the 135 compiler */ 136 case '(': 137 case ')': 138 case '*': 139 case '+': 140 case '?': 141 case '|': 142 ps->type = (int) *ptr; 143 ps->cursor = ptr + 1; 144 break; 145 146 case '^': 147 _single_char(ps, HAT_CHAR, ptr); 148 break; 149 150 case '$': 151 _single_char(ps, DOLLAR_CHAR, ptr); 152 break; 153 154 case '.': 155 /* The 'all but newline' character set */ 156 ps->type = 0; 157 ps->cursor = ptr + 1; 158 dm_bit_set_all(ps->charset); 159 dm_bit_clear(ps->charset, (int) '\n'); 160 dm_bit_clear(ps->charset, (int) '\r'); 161 dm_bit_clear(ps->charset, 0); 162 break; 163 164 case '\\': 165 /* escaped character */ 166 ptr++; 167 if (ptr >= ps->rx_end) { 168 log_error("Badly quoted character at end " 169 "of expression"); 170 ps->type = -1; 171 return -1; 172 } 173 174 ps->type = 0; 175 ps->cursor = ptr + 1; 176 dm_bit_clear_all(ps->charset); 177 switch (*ptr) { 178 case 'n': 179 dm_bit_set(ps->charset, (int) '\n'); 180 break; 181 case 'r': 182 dm_bit_set(ps->charset, (int) '\r'); 183 break; 184 case 't': 185 dm_bit_set(ps->charset, (int) '\t'); 186 break; 187 default: 188 dm_bit_set(ps->charset, (int) *ptr); 189 } 190 break; 191 192 default: 193 /* add a single character to the bitset */ 194 ps->type = 0; 195 ps->cursor = ptr + 1; 196 dm_bit_clear_all(ps->charset); 197 dm_bit_set(ps->charset, (int) *ptr); 198 break; 199 } 200 201 return 1; 202 } 203 204 static struct rx_node *_node(struct dm_pool *mem, int type, 205 struct rx_node *l, struct rx_node *r) 206 { 207 struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n)); 208 209 if (n) { 210 if (!(n->charset = dm_bitset_create(mem, 256))) { 211 dm_pool_free(mem, n); 212 return NULL; 213 } 214 215 n->type = type; 216 n->left = l; 217 n->right = r; 218 } 219 220 return n; 221 } 222 223 static struct rx_node *_term(struct parse_sp *ps) 224 { 225 struct rx_node *n; 226 227 switch (ps->type) { 228 case 0: 229 if (!(n = _node(ps->mem, CHARSET, NULL, NULL))) { 230 stack; 231 return NULL; 232 } 233 234 dm_bit_copy(n->charset, ps->charset); 235 _rx_get_token(ps); /* match charset */ 236 break; 237 238 case '(': 239 _rx_get_token(ps); /* match '(' */ 240 n = _or_term(ps); 241 if (ps->type != ')') { 242 log_error("missing ')' in regular expression"); 243 return 0; 244 } 245 _rx_get_token(ps); /* match ')' */ 246 break; 247 248 default: 249 n = 0; 250 } 251 252 return n; 253 } 254 255 static struct rx_node *_closure_term(struct parse_sp *ps) 256 { 257 struct rx_node *l, *n; 258 259 if (!(l = _term(ps))) 260 return NULL; 261 262 for (;;) { 263 switch (ps->type) { 264 case '*': 265 n = _node(ps->mem, STAR, l, NULL); 266 break; 267 268 case '+': 269 n = _node(ps->mem, PLUS, l, NULL); 270 break; 271 272 case '?': 273 n = _node(ps->mem, QUEST, l, NULL); 274 break; 275 276 default: 277 return l; 278 } 279 280 if (!n) { 281 stack; 282 return NULL; 283 } 284 285 _rx_get_token(ps); 286 l = n; 287 } 288 289 return n; 290 } 291 292 static struct rx_node *_cat_term(struct parse_sp *ps) 293 { 294 struct rx_node *l, *r, *n; 295 296 if (!(l = _closure_term(ps))) 297 return NULL; 298 299 if (ps->type == '|') 300 return l; 301 302 if (!(r = _cat_term(ps))) 303 return l; 304 305 if (!(n = _node(ps->mem, CAT, l, r))) 306 stack; 307 308 return n; 309 } 310 311 static struct rx_node *_or_term(struct parse_sp *ps) 312 { 313 struct rx_node *l, *r, *n; 314 315 if (!(l = _cat_term(ps))) 316 return NULL; 317 318 if (ps->type != '|') 319 return l; 320 321 _rx_get_token(ps); /* match '|' */ 322 323 if (!(r = _or_term(ps))) { 324 log_error("Badly formed 'or' expression"); 325 return NULL; 326 } 327 328 if (!(n = _node(ps->mem, OR, l, r))) 329 stack; 330 331 return n; 332 } 333 334 struct rx_node *rx_parse_tok(struct dm_pool *mem, 335 const char *begin, const char *end) 336 { 337 struct rx_node *r; 338 struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps)); 339 340 if (!ps) { 341 stack; 342 return NULL; 343 } 344 345 ps->mem = mem; 346 ps->charset = dm_bitset_create(mem, 256); 347 ps->cursor = begin; 348 ps->rx_end = end; 349 _rx_get_token(ps); /* load the first token */ 350 351 if (!(r = _or_term(ps))) { 352 log_error("Parse error in regex"); 353 dm_pool_free(mem, ps); 354 } 355 356 return r; 357 } 358 359 struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str) 360 { 361 return rx_parse_tok(mem, str, str + strlen(str)); 362 } 363