1 /* $NetBSD: matcher.c,v 1.1.1.1 2008/12/22 00:18:36 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. 5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 6 * 7 * This file is part of the device-mapper userspace tools. 8 * 9 * This copyrighted material is made available to anyone wishing to use, 10 * modify, copy, or redistribute it subject to the terms and conditions 11 * of the GNU Lesser General Public License v.2.1. 12 * 13 * You should have received a copy of the GNU Lesser General Public License 14 * along with this program; if not, write to the Free Software Foundation, 15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 #include "dmlib.h" 19 #include "parse_rx.h" 20 #include "ttree.h" 21 #include "assert.h" 22 23 struct dfa_state { 24 int final; 25 struct dfa_state *lookup[256]; 26 }; 27 28 struct state_queue { 29 struct dfa_state *s; 30 dm_bitset_t bits; 31 struct state_queue *next; 32 }; 33 34 struct dm_regex { /* Instance variables for the lexer */ 35 struct dfa_state *start; 36 unsigned num_nodes; 37 int nodes_entered; 38 struct rx_node **nodes; 39 struct dm_pool *scratch, *mem; 40 }; 41 42 #define TARGET_TRANS '\0' 43 44 static int _count_nodes(struct rx_node *rx) 45 { 46 int r = 1; 47 48 if (rx->left) 49 r += _count_nodes(rx->left); 50 51 if (rx->right) 52 r += _count_nodes(rx->right); 53 54 return r; 55 } 56 57 static void _fill_table(struct dm_regex *m, struct rx_node *rx) 58 { 59 assert((rx->type != OR) || (rx->left && rx->right)); 60 61 if (rx->left) 62 _fill_table(m, rx->left); 63 64 if (rx->right) 65 _fill_table(m, rx->right); 66 67 m->nodes[m->nodes_entered++] = rx; 68 } 69 70 static void _create_bitsets(struct dm_regex *m) 71 { 72 int i; 73 74 for (i = 0; i < m->num_nodes; i++) { 75 struct rx_node *n = m->nodes[i]; 76 n->firstpos = dm_bitset_create(m->scratch, m->num_nodes); 77 n->lastpos = dm_bitset_create(m->scratch, m->num_nodes); 78 n->followpos = dm_bitset_create(m->scratch, m->num_nodes); 79 } 80 } 81 82 static void _calc_functions(struct dm_regex *m) 83 { 84 int i, j, final = 1; 85 struct rx_node *rx, *c1, *c2; 86 87 for (i = 0; i < m->num_nodes; i++) { 88 rx = m->nodes[i]; 89 c1 = rx->left; 90 c2 = rx->right; 91 92 if (dm_bit(rx->charset, TARGET_TRANS)) 93 rx->final = final++; 94 95 switch (rx->type) { 96 case CAT: 97 if (c1->nullable) 98 dm_bit_union(rx->firstpos, 99 c1->firstpos, c2->firstpos); 100 else 101 dm_bit_copy(rx->firstpos, c1->firstpos); 102 103 if (c2->nullable) 104 dm_bit_union(rx->lastpos, 105 c1->lastpos, c2->lastpos); 106 else 107 dm_bit_copy(rx->lastpos, c2->lastpos); 108 109 rx->nullable = c1->nullable && c2->nullable; 110 break; 111 112 case PLUS: 113 dm_bit_copy(rx->firstpos, c1->firstpos); 114 dm_bit_copy(rx->lastpos, c1->lastpos); 115 rx->nullable = c1->nullable; 116 break; 117 118 case OR: 119 dm_bit_union(rx->firstpos, c1->firstpos, c2->firstpos); 120 dm_bit_union(rx->lastpos, c1->lastpos, c2->lastpos); 121 rx->nullable = c1->nullable || c2->nullable; 122 break; 123 124 case QUEST: 125 case STAR: 126 dm_bit_copy(rx->firstpos, c1->firstpos); 127 dm_bit_copy(rx->lastpos, c1->lastpos); 128 rx->nullable = 1; 129 break; 130 131 case CHARSET: 132 dm_bit_set(rx->firstpos, i); 133 dm_bit_set(rx->lastpos, i); 134 rx->nullable = 0; 135 break; 136 137 default: 138 log_error("Internal error: Unknown calc node type"); 139 } 140 141 /* 142 * followpos has it's own switch 143 * because PLUS and STAR do the 144 * same thing. 145 */ 146 switch (rx->type) { 147 case CAT: 148 for (j = 0; j < m->num_nodes; j++) { 149 if (dm_bit(c1->lastpos, j)) { 150 struct rx_node *n = m->nodes[j]; 151 dm_bit_union(n->followpos, 152 n->followpos, c2->firstpos); 153 } 154 } 155 break; 156 157 case PLUS: 158 case STAR: 159 for (j = 0; j < m->num_nodes; j++) { 160 if (dm_bit(rx->lastpos, j)) { 161 struct rx_node *n = m->nodes[j]; 162 dm_bit_union(n->followpos, 163 n->followpos, rx->firstpos); 164 } 165 } 166 break; 167 } 168 } 169 } 170 171 static struct dfa_state *_create_dfa_state(struct dm_pool *mem) 172 { 173 return dm_pool_zalloc(mem, sizeof(struct dfa_state)); 174 } 175 176 static struct state_queue *_create_state_queue(struct dm_pool *mem, 177 struct dfa_state *dfa, 178 dm_bitset_t bits) 179 { 180 struct state_queue *r = dm_pool_alloc(mem, sizeof(*r)); 181 182 if (!r) { 183 stack; 184 return NULL; 185 } 186 187 r->s = dfa; 188 r->bits = dm_bitset_create(mem, bits[0]); /* first element is the size */ 189 dm_bit_copy(r->bits, bits); 190 r->next = 0; 191 return r; 192 } 193 194 static int _calc_states(struct dm_regex *m, struct rx_node *rx) 195 { 196 unsigned iwidth = (m->num_nodes / DM_BITS_PER_INT) + 1; 197 struct ttree *tt = ttree_create(m->scratch, iwidth); 198 struct state_queue *h, *t, *tmp; 199 struct dfa_state *dfa, *ldfa; 200 int i, a, set_bits = 0, count = 0; 201 dm_bitset_t bs, dfa_bits; 202 203 if (!tt) 204 return_0; 205 206 if (!(bs = dm_bitset_create(m->scratch, m->num_nodes))) 207 return_0; 208 209 /* create first state */ 210 dfa = _create_dfa_state(m->mem); 211 m->start = dfa; 212 ttree_insert(tt, rx->firstpos + 1, dfa); 213 214 /* prime the queue */ 215 h = t = _create_state_queue(m->scratch, dfa, rx->firstpos); 216 while (h) { 217 /* pop state off front of the queue */ 218 dfa = h->s; 219 dfa_bits = h->bits; 220 h = h->next; 221 222 /* iterate through all the inputs for this state */ 223 dm_bit_clear_all(bs); 224 for (a = 0; a < 256; a++) { 225 /* iterate through all the states in firstpos */ 226 for (i = dm_bit_get_first(dfa_bits); 227 i >= 0; i = dm_bit_get_next(dfa_bits, i)) { 228 if (dm_bit(m->nodes[i]->charset, a)) { 229 if (a == TARGET_TRANS) 230 dfa->final = m->nodes[i]->final; 231 232 dm_bit_union(bs, bs, 233 m->nodes[i]->followpos); 234 set_bits = 1; 235 } 236 } 237 238 if (set_bits) { 239 ldfa = ttree_lookup(tt, bs + 1); 240 if (!ldfa) { 241 /* push */ 242 ldfa = _create_dfa_state(m->mem); 243 ttree_insert(tt, bs + 1, ldfa); 244 tmp = 245 _create_state_queue(m->scratch, 246 ldfa, bs); 247 if (!h) 248 h = t = tmp; 249 else { 250 t->next = tmp; 251 t = tmp; 252 } 253 254 count++; 255 } 256 257 dfa->lookup[a] = ldfa; 258 set_bits = 0; 259 dm_bit_clear_all(bs); 260 } 261 } 262 } 263 264 log_debug("Matcher built with %d dfa states", count); 265 return 1; 266 } 267 268 struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns, 269 unsigned num_patterns) 270 { 271 char *all, *ptr; 272 int i; 273 size_t len = 0; 274 struct rx_node *rx; 275 struct dm_pool *scratch = dm_pool_create("regex matcher", 10 * 1024); 276 struct dm_regex *m; 277 278 if (!scratch) 279 return_NULL; 280 281 if (!(m = dm_pool_alloc(mem, sizeof(*m)))) { 282 dm_pool_destroy(scratch); 283 return_NULL; 284 } 285 286 memset(m, 0, sizeof(*m)); 287 288 /* join the regexps together, delimiting with zero */ 289 for (i = 0; i < num_patterns; i++) 290 len += strlen(patterns[i]) + 8; 291 292 ptr = all = dm_pool_alloc(scratch, len + 1); 293 294 if (!all) 295 goto_bad; 296 297 for (i = 0; i < num_patterns; i++) { 298 ptr += sprintf(ptr, "(.*(%s)%c)", patterns[i], TARGET_TRANS); 299 if (i < (num_patterns - 1)) 300 *ptr++ = '|'; 301 } 302 303 /* parse this expression */ 304 if (!(rx = rx_parse_tok(scratch, all, ptr))) { 305 log_error("Couldn't parse regex"); 306 goto bad; 307 } 308 309 m->mem = mem; 310 m->scratch = scratch; 311 m->num_nodes = _count_nodes(rx); 312 m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes); 313 314 if (!m->nodes) 315 goto_bad; 316 317 _fill_table(m, rx); 318 _create_bitsets(m); 319 _calc_functions(m); 320 _calc_states(m, rx); 321 dm_pool_destroy(scratch); 322 m->scratch = NULL; 323 324 return m; 325 326 bad: 327 dm_pool_destroy(scratch); 328 dm_pool_free(mem, m); 329 return NULL; 330 } 331 332 static struct dfa_state *_step_matcher(int c, struct dfa_state *cs, int *r) 333 { 334 if (!(cs = cs->lookup[(unsigned char) c])) 335 return NULL; 336 337 if (cs->final && (cs->final > *r)) 338 *r = cs->final; 339 340 return cs; 341 } 342 343 int dm_regex_match(struct dm_regex *regex, const char *s) 344 { 345 struct dfa_state *cs = regex->start; 346 int r = 0; 347 348 if (!(cs = _step_matcher(HAT_CHAR, cs, &r))) 349 goto out; 350 351 for (; *s; s++) 352 if (!(cs = _step_matcher(*s, cs, &r))) 353 goto out; 354 355 _step_matcher(DOLLAR_CHAR, cs, &r); 356 357 out: 358 /* subtract 1 to get back to zero index */ 359 return r - 1; 360 } 361