1 /*- 2 * Copyright (c) 1992 Henry Spencer. 3 * Copyright (c) 1992 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer of the University of Toronto. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)engine.c 5.4 (Berkeley) 09/30/92 12 */ 13 14 /* 15 * The matching engine and friends. This file is #included by regexec.c 16 * after suitable #defines of a variety of macros used herein, so that 17 * different state representations can be used without duplicating masses 18 * of code. 19 */ 20 21 #ifdef SNAMES 22 #define matcher smatcher 23 #define fast sfast 24 #define slow sslow 25 #define dissect sdissect 26 #define backref sbackref 27 #define expand sexpand 28 #define step sstep 29 #define print sprint 30 #define at sat 31 #define match smat 32 #endif 33 #ifdef LNAMES 34 #define matcher lmatcher 35 #define fast lfast 36 #define slow lslow 37 #define dissect ldissect 38 #define backref lbackref 39 #define expand lexpand 40 #define step lstep 41 #define print lprint 42 #define at lat 43 #define match lmat 44 #endif 45 46 /* another structure passed up and down to avoid zillions of parameters */ 47 struct match { 48 struct re_guts *g; 49 int eflags; 50 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 51 uchar *offp; /* offsets work from here */ 52 uchar *beginp; /* start of string -- virtual NUL precedes */ 53 uchar *endp; /* end of string -- virtual NUL here */ 54 uchar *coldp; /* can be no match starting before here */ 55 uchar **lastpos; /* [nplus+1] */ 56 STATEVARS; 57 states st; /* current states */ 58 states fresh; /* states for a fresh start */ 59 states tmp; /* temporary */ 60 states empty; /* empty set of states */ 61 }; 62 63 #ifdef REDEBUG 64 #define SP(t, s, c) print(m, t, s, c, stdout) 65 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 66 #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 67 #else 68 #define SP(t, s, c) /* nothing */ 69 #define AT(t, p1, p2, s1, s2) /* nothing */ 70 #define NOTE(s) /* nothing */ 71 #endif 72 73 static uchar *backref 74 __P((struct match *, uchar *, uchar *, sopno, sopno, sopno)); 75 static uchar *dissect __P((struct match *, uchar *, uchar *, sopno, sopno)); 76 static states expand __P((struct re_guts *, int, int, states, int, int)); 77 static uchar *fast __P((struct match *, uchar *, uchar *, sopno, sopno)); 78 static int matcher 79 __P((struct re_guts *, uchar *, size_t, regmatch_t[], int)); 80 static uchar *slow __P((struct match *, uchar *, uchar *, sopno, sopno)); 81 static states step __P((struct re_guts *, int, int, states, u_int, states)); 82 83 #ifdef REDEBUG 84 static void at __P((struct match *, char *, uchar *, uchar *, sopno, stopno)); 85 static char *pchar __P((int)); 86 static void print __P((struct match *, char *, states, u_int, FILE *)); 87 #endif 88 89 /* 90 - matcher - the actual matching engine 91 */ 92 static int /* 0 success, REG_NOMATCH failure */ 93 matcher(g, string, nmatch, pmatch, eflags) 94 register struct re_guts *g; 95 uchar *string; 96 size_t nmatch; 97 regmatch_t pmatch[]; 98 int eflags; 99 { 100 register uchar *endp; 101 register int i; 102 struct match mv; 103 register struct match *m = &mv; 104 register uchar *dp; 105 const register sopno gf = g->firststate+1; /* +1 for OEND */ 106 const register sopno gl = g->laststate; 107 uchar *start; 108 uchar *stop; 109 110 /* simplify the situation where possible */ 111 if (g->cflags®_NOSUB) 112 nmatch = 0; 113 if (eflags®_STARTEND) { 114 start = string + pmatch[0].rm_so; 115 stop = string + pmatch[0].rm_eo; 116 } else { 117 start = string; 118 stop = start + strlen((char *)start); 119 } 120 121 /* prescreening; this does wonders for this rather slow code */ 122 if (g->must != NULL) { 123 for (dp = start; dp < stop; dp++) 124 if (*dp == (uchar)g->must[0] && stop - dp >= g->mlen && 125 strncmp((char *)dp, g->must, (size_t)g->mlen) == 0) 126 break; 127 if (dp == stop) /* we didn't find g->must */ 128 return(REG_NOMATCH); 129 } 130 131 /* match struct setup */ 132 m->g = g; 133 m->eflags = eflags; 134 m->pmatch = NULL; 135 m->lastpos = NULL; 136 m->offp = string; 137 m->beginp = start; 138 m->endp = stop; 139 STATESETUP(m, 4); 140 SETUP(m->st); 141 SETUP(m->fresh); 142 SETUP(m->tmp); 143 SETUP(m->empty); 144 CLEAR(m->empty); 145 146 /* this loop does only one repetition except for backrefs */ 147 for (;;) { 148 endp = fast(m, start, stop, gf, gl); 149 if (endp == NULL) { /* a miss */ 150 STATETEARDOWN(m); 151 return(REG_NOMATCH); 152 } 153 if (nmatch == 0 && !g->backrefs) 154 break; /* no further info needed */ 155 156 /* where? */ 157 assert(m->coldp != NULL); 158 for (;;) { 159 NOTE("finding start"); 160 endp = slow(m, m->coldp, stop, gf, gl); 161 if (endp != NULL) 162 break; 163 assert(m->coldp < m->endp); 164 m->coldp++; 165 } 166 if (nmatch == 1 && !g->backrefs) 167 break; /* no further info needed */ 168 169 /* oh my, he wants the subexpressions... */ 170 if (m->pmatch == NULL) 171 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 172 sizeof(regmatch_t)); 173 if (m->pmatch == NULL) { 174 STATETEARDOWN(m); 175 return(REG_ESPACE); 176 } 177 for (i = 1; i <= m->g->nsub; i++) 178 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 179 if (!g->backrefs && !(m->eflags®_BACKR)) { 180 NOTE("dissecting"); 181 dp = dissect(m, m->coldp, endp, gf, gl); 182 } else { 183 if (g->nplus > 0 && m->lastpos == NULL) 184 m->lastpos = (uchar **)malloc((g->nplus+1) * 185 sizeof(uchar *)); 186 if (g->nplus > 0 && m->lastpos == NULL) { 187 free(m->pmatch); 188 STATETEARDOWN(m); 189 return(REG_ESPACE); 190 } 191 NOTE("backref dissect"); 192 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 193 } 194 if (dp != NULL) 195 break; 196 197 /* uh-oh... we couldn't find a subexpression-level match */ 198 assert(g->backrefs); /* must be back references doing it */ 199 assert(g->nplus == 0 || m->lastpos != NULL); 200 for (;;) { 201 if (dp != NULL || endp <= m->coldp) 202 break; /* defeat */ 203 NOTE("backoff"); 204 endp = slow(m, m->coldp, endp-1, gf, gl); 205 if (endp == NULL) 206 break; /* defeat */ 207 /* try it on a shorter possibility */ 208 #ifndef NDEBUG 209 for (i = 1; i <= m->g->nsub; i++) { 210 assert(m->pmatch[i].rm_so == -1); 211 assert(m->pmatch[i].rm_eo == -1); 212 } 213 #endif 214 NOTE("backoff dissect"); 215 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 216 } 217 assert(dp == NULL || dp == endp); 218 if (dp != NULL) /* found a shorter one */ 219 break; 220 221 /* despite initial appearances, there is no match here */ 222 NOTE("false alarm"); 223 start = m->coldp + 1; /* recycle starting later */ 224 assert(start <= stop); 225 } 226 227 /* fill in the details if requested */ 228 if (nmatch > 0) { 229 pmatch[0].rm_so = m->coldp - m->offp; 230 pmatch[0].rm_eo = endp - m->offp; 231 } 232 if (nmatch > 1) { 233 assert(m->pmatch != NULL); 234 for (i = 1; i < nmatch; i++) 235 if (i <= m->g->nsub) 236 pmatch[i] = m->pmatch[i]; 237 else { 238 pmatch[i].rm_so = -1; 239 pmatch[i].rm_eo = -1; 240 } 241 } 242 243 if (m->pmatch != NULL) 244 free((char *)m->pmatch); 245 if (m->lastpos != NULL) 246 free((char *)m->lastpos); 247 STATETEARDOWN(m); 248 return(0); 249 } 250 251 /* 252 - dissect - figure out what matched what, no back references 253 */ 254 static uchar * /* == stop (success) always */ 255 dissect(m, start, stop, startst, stopst) 256 register struct match *m; 257 uchar *start; 258 uchar *stop; 259 sopno startst; 260 sopno stopst; 261 { 262 register int i; 263 register sopno ss; /* start sop of current subRE */ 264 register sopno es; /* end sop of current subRE */ 265 register uchar *sp; /* start of string matched by it */ 266 register uchar *stp; /* string matched by it cannot pass here */ 267 register uchar *rest; /* start of rest of string */ 268 register uchar *tail; /* string unmatched by rest of RE */ 269 register sopno ssub; /* start sop of subsubRE */ 270 register sopno esub; /* end sop of subsubRE */ 271 register uchar *ssp; /* start of string matched by subsubRE */ 272 register uchar *sep; /* end of string matched by subsubRE */ 273 register uchar *oldssp; /* previous ssp */ 274 register uchar *dp; 275 276 AT("diss", start, stop, startst, stopst); 277 sp = start; 278 for (ss = startst; ss < stopst; ss = es) { 279 /* identify end of subRE */ 280 es = ss; 281 switch (OP(m->g->strip[es])) { 282 case OPLUS_: 283 case OQUEST_: 284 es += OPND(m->g->strip[es]); 285 break; 286 case OCH_: 287 while (OP(m->g->strip[es]) != O_CH) 288 es += OPND(m->g->strip[es]); 289 break; 290 } 291 es++; 292 293 /* figure out what it matched */ 294 switch (OP(m->g->strip[ss])) { 295 case OEND: 296 assert(0); 297 break; 298 case OCHAR: 299 sp++; 300 break; 301 case OBOL: 302 case OEOL: 303 break; 304 case OANY: 305 case OANYOF: 306 sp++; 307 break; 308 case OBACK_: 309 case O_BACK: 310 assert(0); 311 break; 312 /* cases where length of match is hard to find */ 313 case OQUEST_: 314 stp = stop; 315 for (;;) { 316 /* how long could this one be? */ 317 rest = slow(m, sp, stp, ss, es); 318 assert(rest != NULL); /* it did match */ 319 /* could the rest match the rest? */ 320 tail = slow(m, rest, stop, es, stopst); 321 if (tail == stop) 322 break; /* yes! */ 323 /* no -- try a shorter match for this one */ 324 stp = rest - 1; 325 assert(stp >= sp); /* it did work */ 326 } 327 ssub = ss + 1; 328 esub = es - 1; 329 /* did innards match? */ 330 if (slow(m, sp, rest, ssub, esub) != NULL) { 331 dp = dissect(m, sp, rest, ssub, esub); 332 assert(dp == rest); 333 } else /* no */ 334 assert(sp == rest); 335 sp = rest; 336 break; 337 case OPLUS_: 338 stp = stop; 339 for (;;) { 340 /* how long could this one be? */ 341 rest = slow(m, sp, stp, ss, es); 342 assert(rest != NULL); /* it did match */ 343 /* could the rest match the rest? */ 344 tail = slow(m, rest, stop, es, stopst); 345 if (tail == stop) 346 break; /* yes! */ 347 /* no -- try a shorter match for this one */ 348 stp = rest - 1; 349 assert(stp >= sp); /* it did work */ 350 } 351 ssub = ss + 1; 352 esub = es - 1; 353 ssp = sp; 354 oldssp = ssp; 355 for (;;) { /* find last match of innards */ 356 sep = slow(m, ssp, rest, ssub, esub); 357 if (sep == NULL || sep == ssp) 358 break; /* failed or matched null */ 359 oldssp = ssp; /* on to next try */ 360 ssp = sep; 361 } 362 if (sep == NULL) { 363 /* last successful match */ 364 sep = ssp; 365 ssp = oldssp; 366 } 367 assert(sep == rest); /* must exhaust substring */ 368 assert(slow(m, ssp, sep, ssub, esub) == rest); 369 dp = dissect(m, ssp, sep, ssub, esub); 370 assert(dp == sep); 371 sp = rest; 372 break; 373 case OCH_: 374 stp = stop; 375 for (;;) { 376 /* how long could this one be? */ 377 rest = slow(m, sp, stp, ss, es); 378 assert(rest != NULL); /* it did match */ 379 /* could the rest match the rest? */ 380 tail = slow(m, rest, stop, es, stopst); 381 if (tail == stop) 382 break; /* yes! */ 383 /* no -- try a shorter match for this one */ 384 stp = rest - 1; 385 assert(stp >= sp); /* it did work */ 386 } 387 ssub = ss + 1; 388 esub = ss + OPND(m->g->strip[ss]) - 1; 389 assert(OP(m->g->strip[esub]) == OOR1); 390 for (;;) { /* find first matching branch */ 391 if (slow(m, sp, rest, ssub, esub) == rest) 392 break; /* it matched all of it */ 393 /* that one missed, try next one */ 394 assert(OP(m->g->strip[esub]) == OOR1); 395 esub++; 396 assert(OP(m->g->strip[esub]) == OOR2); 397 ssub = esub + 1; 398 esub += OPND(m->g->strip[esub]); 399 if (OP(m->g->strip[esub]) == OOR2) 400 esub--; 401 else 402 assert(OP(m->g->strip[esub]) == O_CH); 403 } 404 dp = dissect(m, sp, rest, ssub, esub); 405 assert(dp == rest); 406 sp = rest; 407 break; 408 case O_PLUS: 409 case O_QUEST: 410 case OOR1: 411 case OOR2: 412 case O_CH: 413 assert(0); 414 break; 415 case OLPAREN: 416 i = OPND(m->g->strip[ss]); 417 m->pmatch[i].rm_so = sp - m->offp; 418 break; 419 case ORPAREN: 420 i = OPND(m->g->strip[ss]); 421 m->pmatch[i].rm_eo = sp - m->offp; 422 break; 423 default: /* uh oh */ 424 assert(0); 425 break; 426 } 427 } 428 429 assert(sp == stop); 430 return(sp); 431 } 432 433 /* 434 - backref - figure out what matched what, figuring in back references 435 */ 436 static uchar * /* == stop (success) or NULL (failure) */ 437 backref(m, start, stop, startst, stopst, lev) 438 register struct match *m; 439 uchar *start; 440 uchar *stop; 441 sopno startst; 442 sopno stopst; 443 sopno lev; /* PLUS nesting level */ 444 { 445 register int i; 446 register sopno ss; /* start sop of current subRE */ 447 register uchar *sp; /* start of string matched by it */ 448 register sopno ssub; /* start sop of subsubRE */ 449 register sopno esub; /* end sop of subsubRE */ 450 register uchar *ssp; /* start of string matched by subsubRE */ 451 register uchar *dp; 452 register size_t len; 453 register int hard; 454 register sop s; 455 register regoff_t offsave; 456 register cset *cs; 457 458 AT("back", start, stop, startst, stopst); 459 sp = start; 460 461 /* get as far as we can with easy stuff */ 462 hard = 0; 463 for (ss = startst; !hard && ss < stopst; ss++) 464 switch (OP(s = m->g->strip[ss])) { 465 case OCHAR: 466 if (sp == stop || *sp++ != OPND(s)) 467 return(NULL); 468 break; 469 case OANY: 470 if (sp == stop) 471 return(NULL); 472 sp++; 473 break; 474 case OANYOF: 475 cs = &m->g->sets[OPND(s)]; 476 if (sp == stop || !CHIN(cs, *sp++)) 477 return(NULL); 478 break; 479 case OBOL: 480 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 481 (sp < m->endp && *(sp-1) == '\n' && 482 (m->g->cflags®_NEWLINE)) ) 483 { /* yes */ } 484 else 485 return(NULL); 486 break; 487 case OEOL: 488 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 489 (sp < m->endp && *sp == '\n' && 490 (m->g->cflags®_NEWLINE)) ) 491 { /* yes */ } 492 else 493 return(NULL); 494 break; 495 case O_QUEST: 496 break; 497 case OOR1: /* matches null but needs to skip */ 498 ss++; 499 s = m->g->strip[ss]; 500 do { 501 assert(OP(s) == OOR2); 502 ss += OPND(s); 503 } while (OP(s = m->g->strip[ss]) != O_CH); 504 /* note that the ss++ gets us past the O_CH */ 505 break; 506 default: /* have to make a choice */ 507 hard = 1; 508 break; 509 } 510 if (!hard) { /* that was it! */ 511 if (sp != stop) 512 return(NULL); 513 return(sp); 514 } 515 ss--; /* adjust for the for's final increment */ 516 517 /* the hard stuff */ 518 AT("hard", sp, stop, ss, stopst); 519 s = m->g->strip[ss]; 520 switch (OP(s)) { 521 case OBACK_: /* the vilest depths */ 522 i = OPND(s); 523 if (m->pmatch[i].rm_eo == -1) 524 return(NULL); 525 assert(m->pmatch[i].rm_so != -1); 526 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 527 assert(stop - m->beginp >= len); 528 if (sp > stop - len) 529 return(NULL); /* not enough left to match */ 530 ssp = m->offp + m->pmatch[i].rm_so; 531 if (strncmp((char *)sp, (char *)ssp, len) != 0) 532 return(NULL); 533 while (m->g->strip[ss] != SOP(O_BACK, i)) 534 ss++; 535 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 536 break; 537 case OQUEST_: /* to null or not */ 538 dp = backref(m, sp, stop, ss+1, stopst, lev); 539 if (dp != NULL) 540 return(dp); /* not */ 541 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 542 break; 543 case OPLUS_: 544 assert(m->lastpos != NULL); 545 assert(lev+1 <= m->g->nplus); 546 m->lastpos[lev+1] = sp; 547 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 548 break; 549 case O_PLUS: 550 if (sp == m->lastpos[lev]) /* last pass matched null */ 551 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 552 /* try another pass */ 553 m->lastpos[lev] = sp; 554 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 555 if (dp == NULL) 556 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 557 else 558 return(dp); 559 break; 560 case OCH_: /* find the right one, if any */ 561 ssub = ss + 1; 562 esub = ss + OPND(s) - 1; 563 assert(OP(m->g->strip[esub]) == OOR1); 564 for (;;) { /* find first matching branch */ 565 dp = backref(m, sp, stop, ssub, esub, lev); 566 if (dp != NULL) 567 return(dp); 568 /* that one missed, try next one */ 569 if (OP(m->g->strip[esub]) == O_CH) 570 return(NULL); /* there is none */ 571 esub++; 572 assert(OP(m->g->strip[esub]) == OOR2); 573 ssub = esub + 1; 574 esub += OPND(m->g->strip[esub]); 575 if (OP(m->g->strip[esub]) == OOR2) 576 esub--; 577 else 578 assert(OP(m->g->strip[esub]) == O_CH); 579 } 580 break; 581 case OLPAREN: /* must undo assignment if rest fails */ 582 i = OPND(s); 583 offsave = m->pmatch[i].rm_so; 584 m->pmatch[i].rm_so = sp - m->offp; 585 dp = backref(m, sp, stop, ss+1, stopst, lev); 586 if (dp != NULL) 587 return(dp); 588 m->pmatch[i].rm_so = offsave; 589 return(NULL); 590 break; 591 case ORPAREN: /* must undo assignment if rest fails */ 592 i = OPND(s); 593 offsave = m->pmatch[i].rm_eo; 594 m->pmatch[i].rm_eo = sp - m->offp; 595 dp = backref(m, sp, stop, ss+1, stopst, lev); 596 if (dp != NULL) 597 return(dp); 598 m->pmatch[i].rm_eo = offsave; 599 return(NULL); 600 break; 601 default: /* uh oh */ 602 assert(0); 603 break; 604 } 605 606 /* "can't happen" */ 607 assert(0); 608 /* NOTREACHED */ 609 } 610 611 /* 612 - fast - step through the string at top speed 613 */ 614 static uchar * /* where tentative match ended, or NULL */ 615 fast(m, start, stop, startst, stopst) 616 register struct match *m; 617 uchar *start; 618 uchar *stop; 619 sopno startst; 620 sopno stopst; 621 { 622 register states st = m->st; 623 register states fresh = m->fresh; 624 register states tmp = m->tmp; 625 register uchar *p = start; 626 register uchar c = (start == m->beginp) ? '\0' : *(start-1); 627 register uchar lastc; /* previous c */ 628 register int atbol; 629 register int ateol; 630 register uchar *coldp; /* last p after which no match was underway */ 631 632 CLEAR(st); 633 SET1(st, startst); 634 st = expand(m->g, startst, stopst, st, 0, 0); 635 ASSIGN(fresh, st); 636 SP("start", st, *p); 637 coldp = NULL; 638 for (;;) { 639 /* next character */ 640 lastc = c; 641 c = (p == m->endp) ? '\0' : *p; 642 if (EQ(st, fresh)) 643 coldp = p; 644 645 /* is there an EOL and/or BOL between lastc and c? */ 646 atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 647 (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 648 ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 649 (c == '\0' && !(m->eflags®_NOTEOL)) ); 650 if (atbol || ateol) { 651 st = expand(m->g, startst, stopst, st, atbol, ateol); 652 SP("bef", st, c); 653 } 654 655 /* are we done? */ 656 if (ISSET(st, stopst) || p == stop) 657 break; /* NOTE BREAK OUT */ 658 659 /* no, we must deal with this character */ 660 ASSIGN(tmp, st); 661 ASSIGN(st, fresh); 662 st = step(m->g, startst, stopst, tmp, c, st); 663 SP("aft", st, c); 664 assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 665 p++; 666 } 667 668 assert(coldp != NULL); 669 m->coldp = coldp; 670 if (ISSET(st, stopst)) 671 return(p+1); 672 else 673 return(NULL); 674 } 675 676 /* 677 - slow - step through the string more deliberately 678 */ 679 static uchar * /* where it ended */ 680 slow(m, start, stop, startst, stopst) 681 register struct match *m; 682 uchar *start; 683 uchar *stop; 684 sopno startst; 685 sopno stopst; 686 { 687 register states st = m->st; 688 register states empty = m->empty; 689 register states tmp = m->tmp; 690 register uchar *p = start; 691 register uchar c = (start == m->beginp) ? '\0' : *(start-1); 692 register uchar lastc; /* previous c */ 693 register int atbol; 694 register int ateol; 695 register uchar *matchp; /* last p at which a match ended */ 696 697 AT("slow", start, stop, startst, stopst); 698 CLEAR(st); 699 SET1(st, startst); 700 SP("sstart", st, *p); 701 matchp = NULL; 702 for (;;) { 703 /* next character */ 704 lastc = c; 705 c = (p == m->endp) ? '\0' : *p; 706 707 /* is there an EOL and/or BOL between lastc and c? */ 708 atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 709 (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 710 ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 711 (c == '\0' && !(m->eflags®_NOTEOL)) ); 712 713 /* do we need an expansion before looking at the char? */ 714 if (p == start || atbol || ateol) { 715 st = expand(m->g, startst, stopst, st, atbol, ateol); 716 SP("sbef", st, c); 717 } 718 719 /* are we done? */ 720 if (ISSET(st, stopst)) 721 matchp = p; 722 if (EQ(st, empty) || p == stop) 723 break; /* NOTE BREAK OUT */ 724 725 /* no, we must deal with this character */ 726 ASSIGN(tmp, st); 727 ASSIGN(st, empty); 728 st = step(m->g, startst, stopst, tmp, c, st); 729 SP("saft", st, c); 730 assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 731 p++; 732 } 733 734 return(matchp); 735 } 736 737 738 /* 739 - expand - return set of states reachable from an initial set 740 */ 741 static states 742 expand(g, start, stop, st, atbol, ateol) 743 register struct re_guts *g; 744 int start; /* start state within strip */ 745 int stop; /* state after stop state within strip */ 746 register states st; 747 int atbol; /* at start or just after \n? (for BOL) */ 748 int ateol; /* just before \n or \0? (for EOL) */ 749 { 750 register sop s; 751 register sopno pc; 752 register onestate here; /* note, macros know this name */ 753 register sopno look; 754 register int i; 755 756 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 757 s = g->strip[pc]; 758 switch (OP(s)) { 759 case OEND: 760 assert(pc == stop-1); 761 break; 762 case OCHAR: /* can't get past this */ 763 break; 764 case OBOL: 765 if (atbol) 766 FWD(st, st, 1); 767 break; 768 case OEOL: 769 if (ateol) 770 FWD(st, st, 1); 771 break; 772 case OANY: /* can't get past this either */ 773 break; 774 case OANYOF: /* or this */ 775 break; 776 case OBACK_: /* not significant here */ 777 case O_BACK: 778 FWD(st, st, 1); 779 break; 780 case OPLUS_: /* forward, this is just an empty */ 781 FWD(st, st, 1); 782 break; 783 case O_PLUS: /* both forward and back */ 784 FWD(st, st, 1); 785 i = ISSETBACK(st, OPND(s)); 786 BACK(st, st, OPND(s)); 787 if (!i && ISSETBACK(st, OPND(s))) { 788 /* oho, must reconsider loop body */ 789 pc -= OPND(s) + 1; 790 INIT(here, pc); 791 } 792 break; 793 case OQUEST_: /* two branches, both forward */ 794 FWD(st, st, 1); 795 FWD(st, st, OPND(s)); 796 break; 797 case O_QUEST: /* just an empty */ 798 FWD(st, st, 1); 799 break; 800 case OLPAREN: /* not significant here */ 801 case ORPAREN: 802 FWD(st, st, 1); 803 break; 804 case OCH_: /* mark the first two branches */ 805 FWD(st, st, 1); 806 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 807 FWD(st, st, OPND(s)); 808 break; 809 case OOR1: /* done a branch, find the O_CH */ 810 if (ISSTATEIN(st, here)) { 811 for (look = 1; 812 OP(s = g->strip[pc+look]) != O_CH; 813 look += OPND(s)) 814 assert(OP(s) == OOR2); 815 FWD(st, st, look); 816 } 817 break; 818 case OOR2: /* propagate OCH_'s marking onward */ 819 FWD(st, st, 1); 820 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 821 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 822 FWD(st, st, OPND(s)); 823 } 824 break; 825 case O_CH: /* just an empty */ 826 FWD(st, st, 1); 827 break; 828 default: /* ooooops... */ 829 assert(0); 830 break; 831 } 832 } 833 834 return(st); 835 } 836 837 /* 838 - step - map set of states reachable before char to set reachable after 839 */ 840 static states 841 step(g, start, stop, bef, ch, aft) 842 register struct re_guts *g; 843 int start; /* start state within strip */ 844 int stop; /* state after stop state within strip */ 845 register states bef; /* states reachable before */ 846 u_int ch; 847 register states aft; /* states already known reachable after */ 848 { 849 register cset *cs; 850 register sop s; 851 register sopno pc; 852 register onestate here; /* note, macros know this name */ 853 register sopno look; 854 register int i; 855 856 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 857 s = g->strip[pc]; 858 switch (OP(s)) { 859 case OEND: 860 assert(pc == stop-1); 861 break; 862 case OCHAR: 863 if (ch == OPND(s)) 864 FWD(aft, bef, 1); 865 break; 866 case OBOL: /* these are looked after by expand() */ 867 case OEOL: 868 break; 869 case OANY: 870 FWD(aft, bef, 1); 871 break; 872 case OANYOF: 873 cs = &g->sets[OPND(s)]; 874 if (CHIN(cs, ch)) 875 FWD(aft, bef, 1); 876 break; 877 case OBACK_: /* ignored here */ 878 case O_BACK: 879 FWD(aft, aft, 1); 880 break; 881 case OPLUS_: /* forward, this is just an empty */ 882 FWD(aft, aft, 1); 883 break; 884 case O_PLUS: /* both forward and back */ 885 FWD(aft, aft, 1); 886 i = ISSETBACK(aft, OPND(s)); 887 BACK(aft, aft, OPND(s)); 888 if (!i && ISSETBACK(aft, OPND(s))) { 889 /* oho, must reconsider loop body */ 890 pc -= OPND(s) + 1; 891 INIT(here, pc); 892 } 893 break; 894 case OQUEST_: /* two branches, both forward */ 895 FWD(aft, aft, 1); 896 FWD(aft, aft, OPND(s)); 897 break; 898 case O_QUEST: /* just an empty */ 899 FWD(aft, aft, 1); 900 break; 901 case OLPAREN: /* not significant here */ 902 case ORPAREN: 903 FWD(aft, aft, 1); 904 break; 905 case OCH_: /* mark the first two branches */ 906 FWD(aft, aft, 1); 907 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 908 FWD(aft, aft, OPND(s)); 909 break; 910 case OOR1: /* done a branch, find the O_CH */ 911 if (ISSTATEIN(aft, here)) { 912 for (look = 1; 913 OP(s = g->strip[pc+look]) != O_CH; 914 look += OPND(s)) 915 assert(OP(s) == OOR2); 916 FWD(aft, aft, look); 917 } 918 break; 919 case OOR2: /* propagate OCH_'s marking */ 920 FWD(aft, aft, 1); 921 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 922 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 923 FWD(aft, aft, OPND(s)); 924 } 925 break; 926 case O_CH: /* just empty */ 927 FWD(aft, aft, 1); 928 break; 929 default: /* ooooops... */ 930 assert(0); 931 break; 932 } 933 } 934 935 return(aft); 936 } 937 938 #ifdef REDEBUG 939 /* 940 - print - print a set of states 941 */ 942 static void 943 print(m, caption, st, ch, d) 944 struct match *m; 945 char *caption; 946 states st; 947 u_int ch; 948 FILE *d; 949 { 950 register struct re_guts *g = m->g; 951 register int i; 952 register int first = 1; 953 954 if (!(m->eflags®_TRACE)) 955 return; 956 957 fprintf(d, "%s", caption); 958 if (ch != '\0') 959 fprintf(d, " %s", pchar(ch)); 960 for (i = 0; i < g->nstates; i++) 961 if (ISSET(st, i)) { 962 fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 963 first = 0; 964 } 965 fprintf(d, "\n"); 966 } 967 968 /* 969 - at - print current situation 970 */ 971 static void 972 at(m, title, start, stop, startst, stopst) 973 struct match *m; 974 char *title; 975 uchar *start; 976 uchar *stop; 977 sopno startst; 978 sopno stopst; 979 { 980 if (!(m->eflags®_TRACE)) 981 return; 982 983 printf("%s %s-", title, pchar(*start)); 984 printf("%s ", pchar(*stop)); 985 printf("%ld-%ld\n", (long)startst, (long)stopst); 986 } 987 988 #ifndef PCHARDONE 989 #define PCHARDONE /* never again */ 990 /* 991 - pchar - make a character printable 992 * 993 * Is this identical to regchar() over in debug.c? Well, yes. But a 994 * duplicate here avoids having a debugging-capable regexec.o tied to 995 * a matching debug.o, and this is convenient. It all disappears in 996 * the non-debug compilation anyway, so it doesn't matter much. 997 */ 998 static char * /* -> representation */ 999 pchar(ch) 1000 int ch; 1001 { 1002 static char pbuf[10]; 1003 1004 if (isprint(ch) || ch == ' ') 1005 sprintf(pbuf, "%c", ch); 1006 else 1007 sprintf(pbuf, "\\%o", ch); 1008 return(pbuf); 1009 } 1010 #endif 1011 #endif 1012 1013 #undef matcher 1014 #undef fast 1015 #undef slow 1016 #undef dissect 1017 #undef backref 1018 #undef expand 1019 #undef step 1020 #undef print 1021 #undef at 1022 #undef match 1023