1 /*- 2 * Copyright (c) 1992 Henry Spencer. 3 * Copyright (c) 1992 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer of the University of Toronto. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)engine.c 5.2 (Berkeley) 08/10/92 12 */ 13 14 /* 15 * The matching engine and friends. This file is #included by regexec.c 16 * after suitable #defines of a variety of macros used herein, so that 17 * different state representations can be used without duplicating masses 18 * of code. 19 */ 20 21 #ifdef SNAMES 22 #define matcher smatcher 23 #define fast sfast 24 #define slow sslow 25 #define dissect sdissect 26 #define backref sbackref 27 #define expand sexpand 28 #define step sstep 29 #define print sprint 30 #define match smat 31 #endif 32 #ifdef LNAMES 33 #define matcher lmatcher 34 #define fast lfast 35 #define slow lslow 36 #define dissect ldissect 37 #define backref lbackref 38 #define expand lexpand 39 #define step lstep 40 #define print lprint 41 #define match lmat 42 #endif 43 44 /* another structure passed up and down to avoid zillions of parameters */ 45 struct match { 46 struct re_guts *g; 47 int eflags; 48 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 49 uchar *offp; /* offsets work from here */ 50 uchar *beginp; /* start of string -- virtual NUL precedes */ 51 uchar *endp; /* end of string -- virtual NUL here */ 52 uchar *coldp; /* can be no match starting before here */ 53 uchar **lastpos; /* [nplus+1] */ 54 STATEVARS; 55 states st; /* current states */ 56 states fresh; /* states for a fresh start */ 57 states tmp; /* temporary */ 58 states empty; /* empty set of states */ 59 }; 60 61 #ifndef NDEBUG 62 STATIC void print(); 63 extern char *regchar(); 64 #define SP(t, s, c) { if (m->eflags®_TRACE) print(m->g, t, s, c, stdout); } 65 #define DO(t, p1, p2, s1, s2) { if (m->eflags®_TRACE) { \ 66 printf("%s %s-", t, regchar(*(p1))); \ 67 printf("%s ", regchar(*(p2))); \ 68 printf("%ld-%ld\n", s1, s2); } } 69 #else 70 #define SP(t, s, c) /* nothing */ 71 #define DO(t, p1, p2, s1, s2) /* nothing */ 72 #endif 73 74 STATIC uchar *fast(); 75 STATIC uchar *slow(); 76 STATIC uchar *dissect(); 77 STATIC uchar *backref(); 78 STATIC states expand(); 79 STATIC states step(); 80 81 /* 82 - matcher - the actual matching engine 83 */ 84 static int /* 0 success, REG_NOMATCH failure */ 85 matcher(g, string, nmatch, pmatch, eflags) 86 register struct re_guts *g; 87 uchar *string; 88 size_t nmatch; 89 regmatch_t pmatch[]; 90 int eflags; 91 { 92 register uchar *endp; 93 register int i; 94 struct match mv; 95 register struct match *m = &mv; 96 register uchar *dp; 97 const register sopno gf = g->firststate+1; /* +1 for OEND */ 98 const register sopno gl = g->laststate; 99 uchar *start; 100 uchar *stop; 101 102 if (g->cflags®_NOSUB) 103 nmatch = 0; /* simplify tests */ 104 if (eflags®_STARTEND) { 105 start = string + pmatch[0].rm_so; 106 stop = string + pmatch[0].rm_eo; 107 } else { 108 start = string; 109 stop = start + strlen((char *)start); 110 } 111 112 /* prescreening; this does wonders for this rather slow code */ 113 if (g->must != NULL) { 114 for (dp = start; dp < stop; dp++) 115 if (*dp == (uchar)g->must[0] && stop - dp >= g->mlen && 116 strncmp((char *)dp, g->must, (size_t)g->mlen) == 0) 117 break; 118 if (dp == stop) /* we didn't find g->must */ 119 return(REG_NOMATCH); 120 } 121 122 /* match struct setup */ 123 m->g = g; 124 m->eflags = eflags; 125 m->pmatch = NULL; 126 m->lastpos = NULL; 127 m->offp = string; 128 m->beginp = start; 129 m->endp = stop; 130 STATESETUP(m, 4); 131 SETUP(m->st); 132 SETUP(m->fresh); 133 SETUP(m->tmp); 134 SETUP(m->empty); 135 CLEAR(m->empty); 136 137 /* this loop does only one repetition except for backrefs */ 138 for (;;) { 139 endp = fast(m, start, stop, gf, gl); 140 if (endp == NULL) { /* a miss */ 141 STATETEARDOWN(m); 142 return(REG_NOMATCH); 143 } 144 if (nmatch == 0 && !g->backrefs) 145 break; /* no further info needed */ 146 147 /* where? */ 148 assert(m->coldp != NULL); 149 for (;;) { 150 endp = slow(m, m->coldp, stop, gf, gl); 151 if (endp != NULL) 152 break; 153 assert(*m->coldp != '\0'); 154 m->coldp++; 155 } 156 if (nmatch == 1 && !g->backrefs) 157 break; /* no further info needed */ 158 159 /* oh my, he wants the subexpressions... */ 160 if (m->pmatch == NULL) 161 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 162 sizeof(regmatch_t)); 163 if (m->pmatch == NULL) { 164 STATETEARDOWN(m); 165 return(REG_ESPACE); 166 } 167 for (i = 1; i <= m->g->nsub; i++) { 168 m->pmatch[i].rm_so = -1; 169 m->pmatch[i].rm_eo = -1; 170 } 171 if (!g->backrefs) 172 dp = dissect(m, m->coldp, endp, gf, gl); 173 else { 174 if (g->nplus > 0 && m->lastpos == NULL) 175 m->lastpos = (uchar **)malloc((g->nplus+1) * 176 sizeof(uchar *)); 177 if (g->nplus > 0 && m->lastpos == NULL) { 178 free(m->pmatch); 179 STATETEARDOWN(m); 180 return(REG_ESPACE); 181 } 182 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 183 } 184 if (dp != NULL) 185 break; 186 187 /* uh-oh... we couldn't find a subexpression-level match */ 188 assert(g->backrefs); /* must be back references doing it */ 189 assert(g->nplus == 0 || m->lastpos != NULL); 190 while (dp == NULL && endp > m->coldp && 191 (endp = slow(m, m->coldp, endp-1, gf, gl)) != NULL) { 192 /* try it on a shorter possibility */ 193 for (i = 1; i <= m->g->nsub; i++) { 194 m->pmatch[i].rm_so = -1; 195 m->pmatch[i].rm_eo = -1; 196 } 197 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 198 } 199 assert(dp == NULL || dp == endp); 200 if (dp != NULL) /* found a shorter one */ 201 break; 202 203 /* despite initial appearances, there is no match here */ 204 start = m->coldp + 1; /* recycle starting later */ 205 assert(start <= stop); 206 } 207 208 /* fill in the details if requested */ 209 if (nmatch > 0) { 210 pmatch[0].rm_so = m->coldp - m->offp; 211 pmatch[0].rm_eo = endp - m->offp; 212 } 213 if (nmatch > 1) { 214 assert(m->pmatch != NULL); 215 for (i = 1; i < nmatch; i++) 216 if (i <= m->g->nsub) 217 pmatch[i] = m->pmatch[i]; 218 else { 219 pmatch[i].rm_so = -1; 220 pmatch[i].rm_eo = -1; 221 } 222 } 223 224 if (m->pmatch != NULL) 225 free((char *)m->pmatch); 226 if (m->lastpos != NULL) 227 free((char *)m->lastpos); 228 STATETEARDOWN(m); 229 return(0); 230 } 231 232 /* 233 - dissect - figure out what matched what, no back references 234 */ 235 static uchar * /* == stop (success) always */ 236 dissect(m, start, stop, startst, stopst) 237 register struct match *m; 238 uchar *start; 239 uchar *stop; 240 sopno startst; 241 sopno stopst; 242 { 243 register int i; 244 register sopno ss; /* start sop of current subRE */ 245 register sopno es; /* end sop of current subRE */ 246 register uchar *sp; /* start of string matched by it */ 247 register uchar *stp; /* string matched by it cannot pass here */ 248 register uchar *rest; /* start of rest of string */ 249 register uchar *tail; /* string unmatched by rest of RE */ 250 register sopno ssub; /* start sop of subsubRE */ 251 register sopno esub; /* end sop of subsubRE */ 252 register uchar *ssp; /* start of string matched by subsubRE */ 253 register uchar *sep; /* end of string matched by subsubRE */ 254 register uchar *oldssp; /* previous ssp */ 255 register uchar *dp; 256 register size_t len; 257 258 DO("diss", start, stop, startst, stopst); 259 sp = start; 260 for (ss = startst; ss < stopst; ss = es) { 261 /* identify end of subRE */ 262 es = ss; 263 switch (OP(m->g->strip[es])) { 264 case OPLUS_: 265 case OQUEST_: 266 es += OPND(m->g->strip[es]); 267 break; 268 case OCH_: 269 while (OP(m->g->strip[es]) != O_CH) 270 es += OPND(m->g->strip[es]); 271 break; 272 } 273 es++; 274 275 /* figure out what it matched */ 276 switch (OP(m->g->strip[ss])) { 277 case OEND: 278 assert(0); 279 break; 280 case OCHAR: 281 sp++; 282 break; 283 case OBOL: 284 case OEOL: 285 break; 286 case OANY: 287 case OANYOF: 288 sp++; 289 break; 290 case OBACK_: 291 case O_BACK: 292 assert(0); 293 break; 294 /* cases where length of match is hard to find */ 295 case OQUEST_: 296 stp = stop; 297 for (;;) { 298 /* how long could this one be? */ 299 rest = slow(m, sp, stp, ss, es); 300 assert(rest != NULL); /* it did match */ 301 /* could the rest match the rest? */ 302 tail = slow(m, rest, stop, es, stopst); 303 if (tail == stop) 304 break; /* yes! */ 305 /* no -- try a shorter match for this one */ 306 stp = rest - 1; 307 assert(stp >= sp); /* it did work */ 308 } 309 ssub = ss + 1; 310 esub = es - 1; 311 /* did innards match? */ 312 if (slow(m, sp, rest, ssub, esub) != NULL) { 313 dp = dissect(m, sp, rest, ssub, esub); 314 assert(dp == rest); 315 } else /* no */ 316 assert(sp == rest); 317 sp = rest; 318 break; 319 case OPLUS_: 320 stp = stop; 321 for (;;) { 322 /* how long could this one be? */ 323 rest = slow(m, sp, stp, ss, es); 324 assert(rest != NULL); /* it did match */ 325 /* could the rest match the rest? */ 326 tail = slow(m, rest, stop, es, stopst); 327 if (tail == stop) 328 break; /* yes! */ 329 /* no -- try a shorter match for this one */ 330 stp = rest - 1; 331 assert(stp >= sp); /* it did work */ 332 } 333 ssub = ss + 1; 334 esub = es - 1; 335 ssp = sp; 336 oldssp = ssp; 337 for (;;) { /* find last match of innards */ 338 sep = slow(m, ssp, rest, ssub, esub); 339 if (sep == NULL || sep == ssp) 340 break; /* failed or matched null */ 341 oldssp = ssp; /* on to next try */ 342 ssp = sep; 343 } 344 if (sep == NULL) { 345 /* last successful match */ 346 sep = ssp; 347 ssp = oldssp; 348 } 349 assert(sep == rest); /* must exhaust substring */ 350 assert(slow(m, ssp, sep, ssub, esub) == rest); 351 dp = dissect(m, ssp, sep, ssub, esub); 352 assert(dp == sep); 353 sp = rest; 354 break; 355 case OCH_: 356 stp = stop; 357 for (;;) { 358 /* how long could this one be? */ 359 rest = slow(m, sp, stp, ss, es); 360 assert(rest != NULL); /* it did match */ 361 /* could the rest match the rest? */ 362 tail = slow(m, rest, stop, es, stopst); 363 if (tail == stop) 364 break; /* yes! */ 365 /* no -- try a shorter match for this one */ 366 stp = rest - 1; 367 assert(stp >= sp); /* it did work */ 368 } 369 ssub = ss + 1; 370 esub = ss + OPND(m->g->strip[ss]) - 1; 371 assert(OP(m->g->strip[esub]) == OOR1); 372 for (;;) { /* find first matching branch */ 373 if (slow(m, sp, rest, ssub, esub) == rest) 374 break; /* it matched all of it */ 375 /* that one missed, try next one */ 376 assert(OP(m->g->strip[esub]) == OOR1); 377 esub++; 378 assert(OP(m->g->strip[esub]) == OOR2); 379 ssub = esub + 1; 380 esub += OPND(m->g->strip[esub]); 381 if (OP(m->g->strip[esub]) == OOR2) 382 esub--; 383 else 384 assert(OP(m->g->strip[esub]) == O_CH); 385 } 386 dp = dissect(m, sp, rest, ssub, esub); 387 assert(dp == rest); 388 sp = rest; 389 break; 390 case O_PLUS: 391 case O_QUEST: 392 case OOR1: 393 case OOR2: 394 case O_CH: 395 assert(0); 396 break; 397 case OLPAREN: 398 i = OPND(m->g->strip[ss]); 399 m->pmatch[i].rm_so = sp - m->offp; 400 break; 401 case ORPAREN: 402 i = OPND(m->g->strip[ss]); 403 m->pmatch[i].rm_eo = sp - m->offp; 404 break; 405 default: /* uh oh */ 406 assert(0); 407 break; 408 } 409 } 410 411 assert(sp == stop); 412 return(sp); 413 } 414 415 /* 416 - backref - figure out what matched what, figuring in back references 417 */ 418 static uchar * /* == stop (success) or NULL (failure) */ 419 backref(m, start, stop, startst, stopst, lev) 420 register struct match *m; 421 uchar *start; 422 uchar *stop; 423 sopno startst; 424 sopno stopst; 425 sopno lev; /* PLUS nesting level */ 426 { 427 register int i; 428 register sopno ss; /* start sop of current subRE */ 429 register uchar *sp; /* start of string matched by it */ 430 register sopno ssub; /* start sop of subsubRE */ 431 register sopno esub; /* end sop of subsubRE */ 432 register uchar *ssp; /* start of string matched by subsubRE */ 433 register uchar *dp; 434 register size_t len; 435 register int hard; 436 register sop s; 437 register regoff_t offsave; 438 register cset *cs; 439 440 DO("back", start, stop, startst, stopst); 441 sp = start; 442 443 /* get as far as we can with easy stuff */ 444 hard = 0; 445 for (ss = startst; !hard && ss < stopst; ss++) 446 switch (OP(s = m->g->strip[ss])) { 447 case OCHAR: 448 if (sp == stop || *sp++ != OPND(s)) 449 return(NULL); 450 break; 451 case OANY: 452 if (sp == stop) 453 return(NULL); 454 sp++; 455 break; 456 case OANYOF: 457 cs = &m->g->sets[OPND(s)]; 458 if (sp == stop || !CHIN(cs, *sp++)) 459 return(NULL); 460 break; 461 case OBOL: 462 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 463 (sp < m->endp && *(sp-1) == '\n' && 464 (m->g->cflags®_NEWLINE)) ) 465 { /* yes */ } 466 else 467 return(NULL); 468 break; 469 case OEOL: 470 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 471 (sp < m->endp && *sp == '\n' && 472 (m->g->cflags®_NEWLINE)) ) 473 { /* yes */ } 474 else 475 return(NULL); 476 break; 477 case O_QUEST: 478 break; 479 case OOR1: /* matches null but needs to skip */ 480 ss++; 481 s = m->g->strip[ss]; 482 do { 483 assert(OP(s) == OOR2); 484 ss += OPND(s); 485 } while (OP(s = m->g->strip[ss]) != O_CH); 486 /* note that the ss++ gets us past the O_CH */ 487 break; 488 default: /* have to make a choice */ 489 hard = 1; 490 break; 491 } 492 if (!hard) { /* that was it! */ 493 if (sp != stop) 494 return(NULL); 495 return(sp); 496 } 497 ss--; /* adjust for the for's final increment */ 498 499 /* the hard stuff */ 500 DO("hard", sp, stop, ss, stopst); 501 s = m->g->strip[ss]; 502 switch (OP(s)) { 503 case OBACK_: /* the vilest depths */ 504 i = OPND(s); 505 if (m->pmatch[i].rm_eo == -1) 506 return(NULL); 507 assert(m->pmatch[i].rm_so != -1); 508 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 509 assert(stop - m->beginp >= len); 510 if (sp > stop - len) 511 return(NULL); /* not enough left to match */ 512 ssp = m->offp + m->pmatch[i].rm_so; 513 if (strncmp((char *)sp, (char *)ssp, len) != 0) 514 return(NULL); 515 while (m->g->strip[ss] != SOP(O_BACK, i)) 516 ss++; 517 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 518 break; 519 case OQUEST_: /* to null or not */ 520 dp = backref(m, sp, stop, ss+1, stopst, lev); 521 if (dp != NULL) 522 return(dp); /* not */ 523 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 524 break; 525 case OPLUS_: 526 assert(m->lastpos != NULL); 527 assert(lev+1 <= m->g->nplus); 528 m->lastpos[lev+1] = sp; 529 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 530 break; 531 case O_PLUS: 532 if (sp == m->lastpos[lev]) /* last pass matched null */ 533 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 534 /* try another pass */ 535 m->lastpos[lev] = sp; 536 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 537 if (dp == NULL) 538 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 539 else 540 return(dp); 541 break; 542 case OCH_: /* find the right one, if any */ 543 ssub = ss + 1; 544 esub = ss + OPND(s) - 1; 545 assert(OP(m->g->strip[esub]) == OOR1); 546 for (;;) { /* find first matching branch */ 547 dp = backref(m, sp, stop, ssub, esub, lev); 548 if (dp != NULL) 549 return(dp); 550 /* that one missed, try next one */ 551 if (OP(m->g->strip[esub]) == O_CH) 552 return(NULL); /* there is none */ 553 esub++; 554 assert(OP(m->g->strip[esub]) == OOR2); 555 ssub = esub + 1; 556 esub += OPND(m->g->strip[esub]); 557 if (OP(m->g->strip[esub]) == OOR2) 558 esub--; 559 else 560 assert(OP(m->g->strip[esub]) == O_CH); 561 } 562 break; 563 case OLPAREN: /* must undo assignment if rest fails */ 564 i = OPND(s); 565 offsave = m->pmatch[i].rm_so; 566 m->pmatch[i].rm_so = sp - m->offp; 567 dp = backref(m, sp, stop, ss+1, stopst, lev); 568 if (dp != NULL) 569 return(dp); 570 m->pmatch[i].rm_so = offsave; 571 return(NULL); 572 break; 573 case ORPAREN: /* must undo assignment if rest fails */ 574 i = OPND(s); 575 offsave = m->pmatch[i].rm_eo; 576 m->pmatch[i].rm_eo = sp - m->offp; 577 dp = backref(m, sp, stop, ss+1, stopst, lev); 578 if (dp != NULL) 579 return(dp); 580 m->pmatch[i].rm_eo = offsave; 581 return(NULL); 582 break; 583 default: /* uh oh */ 584 assert(0); 585 break; 586 } 587 588 /* "can't happen" */ 589 assert(0); 590 /* NOTREACHED */ 591 } 592 593 /* 594 - fast - step through the string at top speed 595 */ 596 static uchar * /* where tentative match ended, or NULL */ 597 fast(m, start, stop, startst, stopst) 598 register struct match *m; 599 uchar *start; 600 uchar *stop; 601 sopno startst; 602 sopno stopst; 603 { 604 register states st = m->st; 605 register states fresh = m->fresh; 606 register states tmp = m->tmp; 607 register uchar *p = start; 608 register uchar c; 609 register uchar lastc; /* previous c */ 610 register int atbol; 611 register int ateol; 612 register uchar *coldp; /* last p after which no match was underway */ 613 614 CLEAR(st); 615 SET1(st, startst); 616 st = expand(m->g, startst, stopst, st, 0, 0); 617 ASSIGN(fresh, st); 618 SP("start", st, *p); 619 c = '\0'; 620 coldp = NULL; 621 for (;;) { 622 /* next character */ 623 lastc = c; 624 c = (p == m->endp) ? '\0' : *p; 625 if (EQ(st, fresh)) 626 coldp = p; 627 628 /* is there an EOL and/or BOL between lastc and c? */ 629 atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 630 (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 631 ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 632 (c == '\0' && !(m->eflags®_NOTEOL)) ); 633 if (atbol || ateol) { 634 st = expand(m->g, startst, stopst, st, atbol, ateol); 635 SP("bef", st, c); 636 } 637 638 /* are we done? */ 639 if (ISSET(st, stopst) || p == stop) 640 break; /* NOTE BREAK OUT */ 641 642 /* no, we must deal with this character */ 643 ASSIGN(tmp, st); 644 ASSIGN(st, fresh); 645 st = step(m->g, startst, stopst, tmp, c, st); 646 SP("aft", st, c); 647 assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 648 p++; 649 } 650 651 assert(coldp != NULL); 652 m->coldp = coldp; 653 if (ISSET(st, stopst)) 654 return(p+1); 655 else 656 return(NULL); 657 } 658 659 /* 660 - slow - step through the string more deliberately 661 */ 662 static uchar * /* where it ended */ 663 slow(m, start, stop, startst, stopst) 664 register struct match *m; 665 uchar *start; 666 uchar *stop; 667 sopno startst; 668 sopno stopst; 669 { 670 register states st = m->st; 671 register states empty = m->empty; 672 register states tmp = m->tmp; 673 register uchar *p = start; 674 register uchar c = (start == m->beginp) ? '\0' : *(start-1); 675 register uchar lastc; /* previous c */ 676 register int atbol; 677 register int ateol; 678 register uchar *matchp; /* last p at which a match ended */ 679 680 DO("slow", start, stop, startst, stopst); 681 CLEAR(st); 682 SET1(st, startst); 683 SP("sstart", st, *p); 684 matchp = NULL; 685 for (;;) { 686 /* next character */ 687 lastc = c; 688 c = (p == m->endp) ? '\0' : *p; 689 690 /* is there an EOL and/or BOL between lastc and c? */ 691 atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 692 (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 693 ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 694 (c == '\0' && !(m->eflags®_NOTEOL)) ); 695 696 /* do we need an expansion before looking at the char? */ 697 if (p == start || atbol || ateol) { 698 st = expand(m->g, startst, stopst, st, atbol, ateol); 699 SP("sbef", st, c); 700 } 701 702 /* are we done? */ 703 if (ISSET(st, stopst)) 704 matchp = p; 705 if (EQ(st, empty) || p == stop) 706 break; /* NOTE BREAK OUT */ 707 708 /* no, we must deal with this character */ 709 ASSIGN(tmp, st); 710 ASSIGN(st, empty); 711 st = step(m->g, startst, stopst, tmp, c, st); 712 SP("saft", st, c); 713 assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 714 p++; 715 } 716 717 return(matchp); 718 } 719 720 721 /* 722 - expand - return set of states reachable from an initial set 723 */ 724 static states 725 expand(g, start, stop, st, atbol, ateol) 726 register struct re_guts *g; 727 int start; /* start state within strip */ 728 int stop; /* state after stop state within strip */ 729 register states st; 730 int atbol; /* at start or just after \n? (for BOL) */ 731 int ateol; /* just before \n or \0? (for EOL) */ 732 { 733 register sop s; 734 register sopno pc; 735 register onestate here; /* note, macros know this name */ 736 register sopno look; 737 register int i; 738 739 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 740 s = g->strip[pc]; 741 switch (OP(s)) { 742 case OEND: 743 assert(pc == stop-1); 744 break; 745 case OCHAR: /* can't get past this */ 746 break; 747 case OBOL: 748 if (atbol) 749 FWD(st, st, 1); 750 break; 751 case OEOL: 752 if (ateol) 753 FWD(st, st, 1); 754 break; 755 case OANY: /* can't get past this either */ 756 break; 757 case OANYOF: /* or this */ 758 break; 759 case OBACK_: /* not significant here */ 760 case O_BACK: 761 FWD(st, st, 1); 762 break; 763 case OPLUS_: /* forward, this is just an empty */ 764 FWD(st, st, 1); 765 break; 766 case O_PLUS: /* both forward and back */ 767 FWD(st, st, 1); 768 i = ISSETBACK(st, OPND(s)); 769 BACK(st, st, OPND(s)); 770 if (!i && ISSETBACK(st, OPND(s))) { 771 /* oho, must reconsider loop body */ 772 pc -= OPND(s) + 1; 773 INIT(here, pc); 774 } 775 break; 776 case OQUEST_: /* two branches, both forward */ 777 FWD(st, st, 1); 778 FWD(st, st, OPND(s)); 779 break; 780 case O_QUEST: /* just an empty */ 781 FWD(st, st, 1); 782 break; 783 case OLPAREN: /* not significant here */ 784 case ORPAREN: 785 FWD(st, st, 1); 786 break; 787 case OCH_: /* mark the first two branches */ 788 FWD(st, st, 1); 789 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 790 FWD(st, st, OPND(s)); 791 break; 792 case OOR1: /* done a branch, find the O_CH */ 793 if (ISSTATEIN(st, here)) { 794 for (look = 1; 795 OP(s = g->strip[pc+look]) != O_CH; 796 look += OPND(s)) 797 assert(OP(s) == OOR2); 798 FWD(st, st, look); 799 } 800 break; 801 case OOR2: /* propagate OCH_'s marking onward */ 802 FWD(st, st, 1); 803 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 804 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 805 FWD(st, st, OPND(s)); 806 } 807 break; 808 case O_CH: /* just an empty */ 809 FWD(st, st, 1); 810 break; 811 default: /* ooooops... */ 812 assert(0); 813 break; 814 } 815 } 816 817 return(st); 818 } 819 820 /* 821 - step - map set of states reachable before char to set reachable after 822 */ 823 static states 824 step(g, start, stop, bef, ch, aft) 825 register struct re_guts *g; 826 int start; /* start state within strip */ 827 int stop; /* state after stop state within strip */ 828 register states bef; /* states reachable before */ 829 uchar ch; 830 register states aft; /* states already known reachable after */ 831 { 832 register cset *cs; 833 register sop s; 834 register sopno pc; 835 register onestate here; /* note, macros know this name */ 836 register sopno look; 837 register int i; 838 839 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 840 s = g->strip[pc]; 841 switch (OP(s)) { 842 case OEND: 843 assert(pc == stop-1); 844 break; 845 case OCHAR: 846 if (ch == OPND(s)) 847 FWD(aft, bef, 1); 848 break; 849 case OBOL: /* these are looked after by expand() */ 850 case OEOL: 851 break; 852 case OANY: 853 FWD(aft, bef, 1); 854 break; 855 case OANYOF: 856 cs = &g->sets[OPND(s)]; 857 if (CHIN(cs, ch)) 858 FWD(aft, bef, 1); 859 break; 860 case OBACK_: /* ignored here */ 861 case O_BACK: 862 FWD(aft, aft, 1); 863 break; 864 case OPLUS_: /* forward, this is just an empty */ 865 FWD(aft, aft, 1); 866 break; 867 case O_PLUS: /* both forward and back */ 868 FWD(aft, aft, 1); 869 i = ISSETBACK(aft, OPND(s)); 870 BACK(aft, aft, OPND(s)); 871 if (!i && ISSETBACK(aft, OPND(s))) { 872 /* oho, must reconsider loop body */ 873 pc -= OPND(s) + 1; 874 INIT(here, pc); 875 } 876 break; 877 case OQUEST_: /* two branches, both forward */ 878 FWD(aft, aft, 1); 879 FWD(aft, aft, OPND(s)); 880 break; 881 case O_QUEST: /* just an empty */ 882 FWD(aft, aft, 1); 883 break; 884 case OLPAREN: /* not significant here */ 885 case ORPAREN: 886 FWD(aft, aft, 1); 887 break; 888 case OCH_: /* mark the first two branches */ 889 FWD(aft, aft, 1); 890 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 891 FWD(aft, aft, OPND(s)); 892 break; 893 case OOR1: /* done a branch, find the O_CH */ 894 if (ISSTATEIN(aft, here)) { 895 for (look = 1; 896 OP(s = g->strip[pc+look]) != O_CH; 897 look += OPND(s)) 898 assert(OP(s) == OOR2); 899 FWD(aft, aft, look); 900 } 901 break; 902 case OOR2: /* propagate OCH_'s marking */ 903 FWD(aft, aft, 1); 904 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 905 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 906 FWD(aft, aft, OPND(s)); 907 } 908 break; 909 case O_CH: /* just empty */ 910 FWD(aft, aft, 1); 911 break; 912 default: /* ooooops... */ 913 assert(0); 914 break; 915 } 916 } 917 918 return(aft); 919 } 920 921 #ifndef NDEBUG 922 /* 923 - print - print a set of states 924 */ 925 static void 926 print(g, caption, st, ch, d) 927 struct re_guts *g; 928 char *caption; 929 states st; 930 uchar ch; 931 FILE *d; 932 { 933 register int i; 934 register int first = 1; 935 936 fprintf(d, "%s", caption); 937 if (ch != '\0') 938 fprintf(d, " %s", regchar(ch)); 939 for (i = 0; i < g->nstates; i++) 940 if (ISSET(st, i)) { 941 fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 942 first = 0; 943 } 944 fprintf(d, "\n"); 945 } 946 #endif 947 948 #undef matcher 949 #undef fast 950 #undef slow 951 #undef dissect 952 #undef backref 953 #undef expand 954 #undef step 955 #undef print 956 #undef match 957