1 /* $NetBSD: engine.c,v 1.14 2002/07/22 12:56:17 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 5 * Copyright (c) 1992, 1993, 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Henry Spencer. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)engine.c 8.5 (Berkeley) 3/20/94 40 */ 41 42 /* 43 * The matching engine and friends. This file is #included by regexec.c 44 * after suitable #defines of a variety of macros used herein, so that 45 * different state representations can be used without duplicating masses 46 * of code. 47 */ 48 49 #ifdef SNAMES 50 #define matcher smatcher 51 #define fast sfast 52 #define slow sslow 53 #define dissect sdissect 54 #define backref sbackref 55 #define step sstep 56 #define print sprint 57 #define at sat 58 #define match smat 59 #define nope snope 60 #endif 61 #ifdef LNAMES 62 #define matcher lmatcher 63 #define fast lfast 64 #define slow lslow 65 #define dissect ldissect 66 #define backref lbackref 67 #define step lstep 68 #define print lprint 69 #define at lat 70 #define match lmat 71 #define nope lnope 72 #endif 73 74 /* another structure passed up and down to avoid zillions of parameters */ 75 struct match { 76 struct re_guts *g; 77 int eflags; 78 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 79 char *offp; /* offsets work from here */ 80 char *beginp; /* start of string -- virtual NUL precedes */ 81 char *endp; /* end of string -- virtual NUL here */ 82 char *coldp; /* can be no match starting before here */ 83 char **lastpos; /* [nplus+1] */ 84 STATEVARS; 85 states st; /* current states */ 86 states fresh; /* states for a fresh start */ 87 states tmp; /* temporary */ 88 states empty; /* empty set of states */ 89 }; 90 91 /* ========= begin header generated by ./mkh ========= */ 92 #ifdef __cplusplus 93 extern "C" { 94 #endif 95 96 /* === engine.c === */ 97 static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags)); 98 static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 99 static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev)); 100 static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 101 static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 102 static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft)); 103 #define BOL (OUT+1) 104 #define EOL (BOL+1) 105 #define BOLEOL (BOL+2) 106 #define NOTHING (BOL+3) 107 #define BOW (BOL+4) 108 #define EOW (BOL+5) 109 #define CODEMAX (BOL+5) /* highest code used */ 110 #define NONCHAR(c) ((c) > CHAR_MAX) 111 #define NNONCHAR (CODEMAX-CHAR_MAX) 112 #ifdef REDEBUG 113 static void print __P((struct match *m, char *caption, states st, int ch, FILE *d)); 114 #endif 115 #ifdef REDEBUG 116 static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst)); 117 #endif 118 #ifdef REDEBUG 119 static char *pchar __P((int ch)); 120 #endif 121 122 #ifdef __cplusplus 123 } 124 #endif 125 /* ========= end header generated by ./mkh ========= */ 126 127 #ifdef REDEBUG 128 #define SP(t, s, c) print(m, t, s, c, stdout) 129 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 130 #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 131 static int nope = 0; 132 #else 133 #define SP(t, s, c) /* nothing */ 134 #define AT(t, p1, p2, s1, s2) /* nothing */ 135 #define NOTE(s) /* nothing */ 136 #endif 137 138 /* 139 - matcher - the actual matching engine 140 == static int matcher(struct re_guts *g, char *string, \ 141 == size_t nmatch, regmatch_t pmatch[], int eflags); 142 */ 143 static int /* 0 success, REG_NOMATCH failure */ 144 matcher(g, string, nmatch, pmatch, eflags) 145 struct re_guts *g; 146 char *string; 147 size_t nmatch; 148 regmatch_t pmatch[]; 149 int eflags; 150 { 151 char *endp; 152 int i; 153 struct match mv; 154 struct match *m = &mv; 155 char *dp; 156 const sopno gf = g->firststate+1; /* +1 for OEND */ 157 const sopno gl = g->laststate; 158 char *start; 159 char *stop; 160 int error = 0; 161 162 _DIAGASSERT(g != NULL); 163 _DIAGASSERT(string != NULL); 164 /* pmatch checked below */ 165 166 /* simplify the situation where possible */ 167 if (g->cflags®_NOSUB) 168 nmatch = 0; 169 if (eflags®_STARTEND) { 170 _DIAGASSERT(pmatch != NULL); 171 start = string + (size_t)pmatch[0].rm_so; 172 stop = string + (size_t)pmatch[0].rm_eo; 173 } else { 174 start = string; 175 stop = start + strlen(start); 176 } 177 if (stop < start) 178 return(REG_INVARG); 179 180 /* prescreening; this does wonders for this rather slow code */ 181 if (g->must != NULL) { 182 for (dp = start; dp < stop; dp++) 183 if (*dp == g->must[0] && stop - dp >= g->mlen && 184 memcmp(dp, g->must, (size_t)g->mlen) == 0) 185 break; 186 if (dp == stop) /* we didn't find g->must */ 187 return(REG_NOMATCH); 188 } 189 190 /* match struct setup */ 191 m->g = g; 192 m->eflags = eflags; 193 m->pmatch = NULL; 194 m->lastpos = NULL; 195 m->offp = string; 196 m->beginp = start; 197 m->endp = stop; 198 STATESETUP(m, 4); 199 SETUP(m->st); 200 SETUP(m->fresh); 201 SETUP(m->tmp); 202 SETUP(m->empty); 203 CLEAR(m->empty); 204 205 /* this loop does only one repetition except for backrefs */ 206 for (;;) { 207 endp = fast(m, start, stop, gf, gl); 208 if (endp == NULL) { /* a miss */ 209 error = REG_NOMATCH; 210 goto done; 211 } 212 if (nmatch == 0 && !g->backrefs) 213 break; /* no further info needed */ 214 215 /* where? */ 216 assert(m->coldp != NULL); 217 for (;;) { 218 NOTE("finding start"); 219 endp = slow(m, m->coldp, stop, gf, gl); 220 if (endp != NULL) 221 break; 222 assert(m->coldp < m->endp); 223 m->coldp++; 224 } 225 if (nmatch == 1 && !g->backrefs) 226 break; /* no further info needed */ 227 228 /* oh my, he wants the subexpressions... */ 229 if (m->pmatch == NULL) 230 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 231 sizeof(regmatch_t)); 232 if (m->pmatch == NULL) { 233 error = REG_ESPACE; 234 goto done; 235 } 236 for (i = 1; i <= m->g->nsub; i++) 237 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1; 238 if (!g->backrefs && !(m->eflags®_BACKR)) { 239 NOTE("dissecting"); 240 dp = dissect(m, m->coldp, endp, gf, gl); 241 } else { 242 if (g->nplus > 0 && m->lastpos == NULL) 243 m->lastpos = (char **)malloc((g->nplus+1) * 244 sizeof(char *)); 245 if (g->nplus > 0 && m->lastpos == NULL) { 246 error = REG_ESPACE; 247 goto done; 248 } 249 NOTE("backref dissect"); 250 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 251 } 252 if (dp != NULL) 253 break; 254 255 /* uh-oh... we couldn't find a subexpression-level match */ 256 assert(g->backrefs); /* must be back references doing it */ 257 assert(g->nplus == 0 || m->lastpos != NULL); 258 for (;;) { 259 if (dp != NULL || endp <= m->coldp) 260 break; /* defeat */ 261 NOTE("backoff"); 262 endp = slow(m, m->coldp, endp-1, gf, gl); 263 if (endp == NULL) 264 break; /* defeat */ 265 /* try it on a shorter possibility */ 266 #ifndef NDEBUG 267 for (i = 1; i <= m->g->nsub; i++) { 268 assert(m->pmatch[i].rm_so == (regoff_t)-1); 269 assert(m->pmatch[i].rm_eo == (regoff_t)-1); 270 } 271 #endif 272 NOTE("backoff dissect"); 273 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 274 } 275 assert(dp == NULL || dp == endp); 276 if (dp != NULL) /* found a shorter one */ 277 break; 278 279 /* despite initial appearances, there is no match here */ 280 NOTE("false alarm"); 281 start = m->coldp + 1; /* recycle starting later */ 282 assert(start <= stop); 283 } 284 285 /* fill in the details if requested */ 286 if (nmatch > 0) { 287 _DIAGASSERT(pmatch != NULL); 288 pmatch[0].rm_so = m->coldp - m->offp; 289 pmatch[0].rm_eo = endp - m->offp; 290 } 291 if (nmatch > 1) { 292 assert(m->pmatch != NULL); 293 for (i = 1; i < nmatch; i++) 294 if (i <= m->g->nsub) 295 pmatch[i] = m->pmatch[i]; 296 else { 297 pmatch[i].rm_so = (regoff_t)-1; 298 pmatch[i].rm_eo = (regoff_t)-1; 299 } 300 } 301 302 done: 303 if (m->pmatch != NULL) { 304 free(m->pmatch); 305 m->pmatch = NULL; 306 } 307 if (m->lastpos != NULL) { 308 free(m->lastpos); 309 m->lastpos = NULL; 310 } 311 STATETEARDOWN(m); 312 return error; 313 } 314 315 /* 316 - dissect - figure out what matched what, no back references 317 == static char *dissect(struct match *m, char *start, \ 318 == char *stop, sopno startst, sopno stopst); 319 */ 320 static char * /* == stop (success) always */ 321 dissect(m, start, stop, startst, stopst) 322 struct match *m; 323 char *start; 324 char *stop; 325 sopno startst; 326 sopno stopst; 327 { 328 int i; 329 sopno ss; /* start sop of current subRE */ 330 sopno es; /* end sop of current subRE */ 331 char *sp; /* start of string matched by it */ 332 char *stp; /* string matched by it cannot pass here */ 333 char *rest; /* start of rest of string */ 334 char *tail; /* string unmatched by rest of RE */ 335 sopno ssub; /* start sop of subsubRE */ 336 sopno esub; /* end sop of subsubRE */ 337 char *ssp; /* start of string matched by subsubRE */ 338 char *sep; /* end of string matched by subsubRE */ 339 char *oldssp; /* previous ssp */ 340 #ifndef NDEBUG 341 char *dp; 342 #endif 343 344 _DIAGASSERT(m != NULL); 345 _DIAGASSERT(start != NULL); 346 _DIAGASSERT(stop != NULL); 347 348 AT("diss", start, stop, startst, stopst); 349 sp = start; 350 for (ss = startst; ss < stopst; ss = es) { 351 /* identify end of subRE */ 352 es = ss; 353 switch (OP(m->g->strip[es])) { 354 case OPLUS_: 355 case OQUEST_: 356 es += OPND(m->g->strip[es]); 357 break; 358 case OCH_: 359 while (OP(m->g->strip[es]) != O_CH) 360 es += OPND(m->g->strip[es]); 361 break; 362 } 363 es++; 364 365 /* figure out what it matched */ 366 switch (OP(m->g->strip[ss])) { 367 case OEND: 368 assert(nope); 369 break; 370 case OCHAR: 371 sp++; 372 break; 373 case OBOL: 374 case OEOL: 375 case OBOW: 376 case OEOW: 377 break; 378 case OANY: 379 case OANYOF: 380 sp++; 381 break; 382 case OBACK_: 383 case O_BACK: 384 assert(nope); 385 break; 386 /* cases where length of match is hard to find */ 387 case OQUEST_: 388 stp = stop; 389 for (;;) { 390 /* how long could this one be? */ 391 rest = slow(m, sp, stp, ss, es); 392 assert(rest != NULL); /* it did match */ 393 /* could the rest match the rest? */ 394 tail = slow(m, rest, stop, es, stopst); 395 if (tail == stop) 396 break; /* yes! */ 397 /* no -- try a shorter match for this one */ 398 stp = rest - 1; 399 assert(stp >= sp); /* it did work */ 400 } 401 ssub = ss + 1; 402 esub = es - 1; 403 /* did innards match? */ 404 if (slow(m, sp, rest, ssub, esub) != NULL) { 405 #ifdef NDEBUG 406 (void) 407 #else 408 dp = 409 #endif 410 dissect(m, sp, rest, ssub, esub); 411 assert(dp == rest); 412 } else /* no */ 413 assert(sp == rest); 414 sp = rest; 415 break; 416 case OPLUS_: 417 stp = stop; 418 for (;;) { 419 /* how long could this one be? */ 420 rest = slow(m, sp, stp, ss, es); 421 assert(rest != NULL); /* it did match */ 422 /* could the rest match the rest? */ 423 tail = slow(m, rest, stop, es, stopst); 424 if (tail == stop) 425 break; /* yes! */ 426 /* no -- try a shorter match for this one */ 427 stp = rest - 1; 428 assert(stp >= sp); /* it did work */ 429 } 430 ssub = ss + 1; 431 esub = es - 1; 432 ssp = sp; 433 oldssp = ssp; 434 for (;;) { /* find last match of innards */ 435 sep = slow(m, ssp, rest, ssub, esub); 436 if (sep == NULL || sep == ssp) 437 break; /* failed or matched null */ 438 oldssp = ssp; /* on to next try */ 439 ssp = sep; 440 } 441 if (sep == NULL) { 442 /* last successful match */ 443 sep = ssp; 444 ssp = oldssp; 445 } 446 assert(sep == rest); /* must exhaust substring */ 447 assert(slow(m, ssp, sep, ssub, esub) == rest); 448 #ifdef NDEBUG 449 (void) 450 #else 451 dp = 452 #endif 453 dissect(m, ssp, sep, ssub, esub); 454 assert(dp == sep); 455 sp = rest; 456 break; 457 case OCH_: 458 stp = stop; 459 for (;;) { 460 /* how long could this one be? */ 461 rest = slow(m, sp, stp, ss, es); 462 assert(rest != NULL); /* it did match */ 463 /* could the rest match the rest? */ 464 tail = slow(m, rest, stop, es, stopst); 465 if (tail == stop) 466 break; /* yes! */ 467 /* no -- try a shorter match for this one */ 468 stp = rest - 1; 469 assert(stp >= sp); /* it did work */ 470 } 471 ssub = ss + 1; 472 esub = ss + OPND(m->g->strip[ss]) - 1; 473 assert(OP(m->g->strip[esub]) == OOR1); 474 for (;;) { /* find first matching branch */ 475 if (slow(m, sp, rest, ssub, esub) == rest) 476 break; /* it matched all of it */ 477 /* that one missed, try next one */ 478 assert(OP(m->g->strip[esub]) == OOR1); 479 esub++; 480 assert(OP(m->g->strip[esub]) == OOR2); 481 ssub = esub + 1; 482 esub += OPND(m->g->strip[esub]); 483 if (OP(m->g->strip[esub]) == OOR2) 484 esub--; 485 else 486 assert(OP(m->g->strip[esub]) == O_CH); 487 } 488 #ifdef NDEBUG 489 (void) 490 #else 491 dp = 492 #endif 493 dissect(m, sp, rest, ssub, esub); 494 assert(dp == rest); 495 sp = rest; 496 break; 497 case O_PLUS: 498 case O_QUEST: 499 case OOR1: 500 case OOR2: 501 case O_CH: 502 assert(nope); 503 break; 504 case OLPAREN: 505 i = OPND(m->g->strip[ss]); 506 assert(0 < i && i <= m->g->nsub); 507 m->pmatch[i].rm_so = sp - m->offp; 508 break; 509 case ORPAREN: 510 i = OPND(m->g->strip[ss]); 511 assert(0 < i && i <= m->g->nsub); 512 m->pmatch[i].rm_eo = sp - m->offp; 513 break; 514 default: /* uh oh */ 515 assert(nope); 516 break; 517 } 518 } 519 520 assert(sp == stop); 521 return(sp); 522 } 523 524 /* 525 - backref - figure out what matched what, figuring in back references 526 == static char *backref(struct match *m, char *start, \ 527 == char *stop, sopno startst, sopno stopst, sopno lev); 528 */ 529 static char * /* == stop (success) or NULL (failure) */ 530 backref(m, start, stop, startst, stopst, lev) 531 struct match *m; 532 char *start; 533 char *stop; 534 sopno startst; 535 sopno stopst; 536 sopno lev; /* PLUS nesting level */ 537 { 538 int i; 539 sopno ss; /* start sop of current subRE */ 540 char *sp; /* start of string matched by it */ 541 sopno ssub; /* start sop of subsubRE */ 542 sopno esub; /* end sop of subsubRE */ 543 char *ssp; /* start of string matched by subsubRE */ 544 char *dp; 545 size_t len; 546 int hard; 547 sop s; 548 regoff_t offsave; 549 cset *cs; 550 551 _DIAGASSERT(m != NULL); 552 _DIAGASSERT(start != NULL); 553 _DIAGASSERT(stop != NULL); 554 555 AT("back", start, stop, startst, stopst); 556 sp = start; 557 558 /* get as far as we can with easy stuff */ 559 hard = 0; 560 for (ss = startst; !hard && ss < stopst; ss++) 561 switch (OP(s = m->g->strip[ss])) { 562 case OCHAR: 563 if (sp == stop || *sp++ != (char)OPND(s)) 564 return(NULL); 565 break; 566 case OANY: 567 if (sp == stop) 568 return(NULL); 569 sp++; 570 break; 571 case OANYOF: 572 cs = &m->g->sets[OPND(s)]; 573 if (sp == stop || !CHIN(cs, *sp++)) 574 return(NULL); 575 break; 576 case OBOL: 577 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 578 (sp < m->endp && *(sp-1) == '\n' && 579 (m->g->cflags®_NEWLINE)) ) 580 { /* yes */ } 581 else 582 return(NULL); 583 break; 584 case OEOL: 585 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 586 (sp < m->endp && *sp == '\n' && 587 (m->g->cflags®_NEWLINE)) ) 588 { /* yes */ } 589 else 590 return(NULL); 591 break; 592 case OBOW: 593 if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 594 (sp < m->endp && *(sp-1) == '\n' && 595 (m->g->cflags®_NEWLINE)) || 596 (sp > m->beginp && 597 !ISWORD(*(sp-1))) ) && 598 (sp < m->endp && ISWORD(*sp)) ) 599 { /* yes */ } 600 else 601 return(NULL); 602 break; 603 case OEOW: 604 if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 605 (sp < m->endp && *sp == '\n' && 606 (m->g->cflags®_NEWLINE)) || 607 (sp < m->endp && !ISWORD(*sp)) ) && 608 (sp > m->beginp && ISWORD(*(sp-1))) ) 609 { /* yes */ } 610 else 611 return(NULL); 612 break; 613 case O_QUEST: 614 break; 615 case OOR1: /* matches null but needs to skip */ 616 ss++; 617 s = m->g->strip[ss]; 618 do { 619 assert(OP(s) == OOR2); 620 ss += OPND(s); 621 } while (OP(s = m->g->strip[ss]) != O_CH); 622 /* note that the ss++ gets us past the O_CH */ 623 break; 624 default: /* have to make a choice */ 625 hard = 1; 626 break; 627 } 628 if (!hard) { /* that was it! */ 629 if (sp != stop) 630 return(NULL); 631 return(sp); 632 } 633 ss--; /* adjust for the for's final increment */ 634 635 /* the hard stuff */ 636 AT("hard", sp, stop, ss, stopst); 637 s = m->g->strip[ss]; 638 switch (OP(s)) { 639 case OBACK_: /* the vilest depths */ 640 i = OPND(s); 641 assert(0 < i && i <= m->g->nsub); 642 if (m->pmatch[i].rm_eo == (regoff_t)-1) 643 return(NULL); 644 assert(m->pmatch[i].rm_so != (regoff_t)-1); 645 len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so); 646 assert(stop - m->beginp >= len); 647 if (sp > stop - len) 648 return(NULL); /* not enough left to match */ 649 ssp = m->offp + (size_t)m->pmatch[i].rm_so; 650 if (memcmp(sp, ssp, len) != 0) 651 return(NULL); 652 while (m->g->strip[ss] != SOP(O_BACK, i)) 653 ss++; 654 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 655 656 case OQUEST_: /* to null or not */ 657 dp = backref(m, sp, stop, ss+1, stopst, lev); 658 if (dp != NULL) 659 return(dp); /* not */ 660 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 661 662 case OPLUS_: 663 assert(m->lastpos != NULL); 664 assert(lev+1 <= m->g->nplus); 665 m->lastpos[lev+1] = sp; 666 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 667 668 case O_PLUS: 669 if (sp == m->lastpos[lev]) /* last pass matched null */ 670 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 671 /* try another pass */ 672 m->lastpos[lev] = sp; 673 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 674 if (dp == NULL) 675 dp = backref(m, sp, stop, ss+1, stopst, lev-1); 676 return(dp); 677 678 case OCH_: /* find the right one, if any */ 679 ssub = ss + 1; 680 esub = ss + OPND(s) - 1; 681 assert(OP(m->g->strip[esub]) == OOR1); 682 for (;;) { /* find first matching branch */ 683 dp = backref(m, sp, stop, ssub, esub, lev); 684 if (dp != NULL) 685 return(dp); 686 /* that one missed, try next one */ 687 if (OP(m->g->strip[esub]) == O_CH) 688 return(NULL); /* there is none */ 689 esub++; 690 assert(OP(m->g->strip[esub]) == OOR2); 691 ssub = esub + 1; 692 esub += OPND(m->g->strip[esub]); 693 if (OP(m->g->strip[esub]) == OOR2) 694 esub--; 695 else 696 assert(OP(m->g->strip[esub]) == O_CH); 697 } 698 699 case OLPAREN: /* must undo assignment if rest fails */ 700 i = OPND(s); 701 assert(0 < i && i <= m->g->nsub); 702 offsave = m->pmatch[i].rm_so; 703 m->pmatch[i].rm_so = sp - m->offp; 704 dp = backref(m, sp, stop, ss+1, stopst, lev); 705 if (dp != NULL) 706 return(dp); 707 m->pmatch[i].rm_so = offsave; 708 return(NULL); 709 710 case ORPAREN: /* must undo assignment if rest fails */ 711 i = OPND(s); 712 assert(0 < i && i <= m->g->nsub); 713 offsave = m->pmatch[i].rm_eo; 714 m->pmatch[i].rm_eo = sp - m->offp; 715 dp = backref(m, sp, stop, ss+1, stopst, lev); 716 if (dp != NULL) 717 return(dp); 718 m->pmatch[i].rm_eo = offsave; 719 return(NULL); 720 721 default: /* uh oh */ 722 assert(nope); 723 break; 724 } 725 726 /* "can't happen" */ 727 assert(nope); 728 /* NOTREACHED */ 729 return NULL; 730 } 731 732 /* 733 - fast - step through the string at top speed 734 == static char *fast(struct match *m, char *start, \ 735 == char *stop, sopno startst, sopno stopst); 736 */ 737 static char * /* where tentative match ended, or NULL */ 738 fast(m, start, stop, startst, stopst) 739 struct match *m; 740 char *start; 741 char *stop; 742 sopno startst; 743 sopno stopst; 744 { 745 states st = m->st; 746 states fresh = m->fresh; 747 states tmp = m->tmp; 748 char *p = start; 749 int c = (start == m->beginp) ? OUT : *(start-1); 750 int lastc; /* previous c */ 751 int flagch; 752 int i; 753 char *coldp; /* last p after which no match was underway */ 754 755 _DIAGASSERT(m != NULL); 756 _DIAGASSERT(start != NULL); 757 _DIAGASSERT(stop != NULL); 758 759 CLEAR(st); 760 SET1(st, startst); 761 st = step(m->g, startst, stopst, st, NOTHING, st); 762 ASSIGN(fresh, st); 763 SP("start", st, *p); 764 coldp = NULL; 765 for (;;) { 766 /* next character */ 767 lastc = c; 768 c = (p == m->endp) ? OUT : *p; 769 if (EQ(st, fresh)) 770 coldp = p; 771 772 /* is there an EOL and/or BOL between lastc and c? */ 773 flagch = '\0'; 774 i = 0; 775 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 776 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 777 flagch = BOL; 778 i = m->g->nbol; 779 } 780 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 781 (c == OUT && !(m->eflags®_NOTEOL)) ) { 782 flagch = (flagch == BOL) ? BOLEOL : EOL; 783 i += m->g->neol; 784 } 785 if (i != 0) { 786 for (; i > 0; i--) 787 st = step(m->g, startst, stopst, st, flagch, st); 788 SP("boleol", st, c); 789 } 790 791 /* how about a word boundary? */ 792 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 793 (c != OUT && ISWORD(c)) ) { 794 flagch = BOW; 795 } 796 if ( (lastc != OUT && ISWORD(lastc)) && 797 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 798 flagch = EOW; 799 } 800 if (flagch == BOW || flagch == EOW) { 801 st = step(m->g, startst, stopst, st, flagch, st); 802 SP("boweow", st, c); 803 } 804 805 /* are we done? */ 806 if (ISSET(st, stopst) || p == stop) 807 break; /* NOTE BREAK OUT */ 808 809 /* no, we must deal with this character */ 810 ASSIGN(tmp, st); 811 ASSIGN(st, fresh); 812 assert(c != OUT); 813 st = step(m->g, startst, stopst, tmp, c, st); 814 SP("aft", st, c); 815 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 816 p++; 817 } 818 819 assert(coldp != NULL); 820 m->coldp = coldp; 821 if (ISSET(st, stopst)) 822 return(p+1); 823 else 824 return(NULL); 825 } 826 827 /* 828 - slow - step through the string more deliberately 829 == static char *slow(struct match *m, char *start, \ 830 == char *stop, sopno startst, sopno stopst); 831 */ 832 static char * /* where it ended */ 833 slow(m, start, stop, startst, stopst) 834 struct match *m; 835 char *start; 836 char *stop; 837 sopno startst; 838 sopno stopst; 839 { 840 states st = m->st; 841 states empty = m->empty; 842 states tmp = m->tmp; 843 char *p = start; 844 int c = (start == m->beginp) ? OUT : *(start-1); 845 int lastc; /* previous c */ 846 int flagch; 847 int i; 848 char *matchp; /* last p at which a match ended */ 849 850 _DIAGASSERT(m != NULL); 851 _DIAGASSERT(start != NULL); 852 _DIAGASSERT(stop != NULL); 853 854 AT("slow", start, stop, startst, stopst); 855 CLEAR(st); 856 SET1(st, startst); 857 SP("sstart", st, *p); 858 st = step(m->g, startst, stopst, st, NOTHING, st); 859 matchp = NULL; 860 for (;;) { 861 /* next character */ 862 lastc = c; 863 c = (p == m->endp) ? OUT : *p; 864 865 /* is there an EOL and/or BOL between lastc and c? */ 866 flagch = '\0'; 867 i = 0; 868 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 869 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 870 flagch = BOL; 871 i = m->g->nbol; 872 } 873 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 874 (c == OUT && !(m->eflags®_NOTEOL)) ) { 875 flagch = (flagch == BOL) ? BOLEOL : EOL; 876 i += m->g->neol; 877 } 878 if (i != 0) { 879 for (; i > 0; i--) 880 st = step(m->g, startst, stopst, st, flagch, st); 881 SP("sboleol", st, c); 882 } 883 884 /* how about a word boundary? */ 885 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 886 (c != OUT && ISWORD(c)) ) { 887 flagch = BOW; 888 } 889 if ( (lastc != OUT && ISWORD(lastc)) && 890 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 891 flagch = EOW; 892 } 893 if (flagch == BOW || flagch == EOW) { 894 st = step(m->g, startst, stopst, st, flagch, st); 895 SP("sboweow", st, c); 896 } 897 898 /* are we done? */ 899 if (ISSET(st, stopst)) 900 matchp = p; 901 if (EQ(st, empty) || p == stop) 902 break; /* NOTE BREAK OUT */ 903 904 /* no, we must deal with this character */ 905 ASSIGN(tmp, st); 906 ASSIGN(st, empty); 907 assert(c != OUT); 908 st = step(m->g, startst, stopst, tmp, c, st); 909 SP("saft", st, c); 910 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 911 p++; 912 } 913 914 return(matchp); 915 } 916 917 918 /* 919 - step - map set of states reachable before char to set reachable after 920 == static states step(struct re_guts *g, sopno start, sopno stop, \ 921 == states bef, int ch, states aft); 922 == #define BOL (OUT+1) 923 == #define EOL (BOL+1) 924 == #define BOLEOL (BOL+2) 925 == #define NOTHING (BOL+3) 926 == #define BOW (BOL+4) 927 == #define EOW (BOL+5) 928 == #define CODEMAX (BOL+5) // highest code used 929 == #define NONCHAR(c) ((c) > CHAR_MAX) 930 == #define NNONCHAR (CODEMAX-CHAR_MAX) 931 */ 932 static states 933 step(g, start, stop, bef, ch, aft) 934 struct re_guts *g; 935 sopno start; /* start state within strip */ 936 sopno stop; /* state after stop state within strip */ 937 states bef; /* states reachable before */ 938 int ch; /* character or NONCHAR code */ 939 states aft; /* states already known reachable after */ 940 { 941 cset *cs; 942 sop s; 943 sopno pc; 944 onestate here; /* note, macros know this name */ 945 sopno look; 946 int i; 947 948 _DIAGASSERT(g != NULL); 949 950 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 951 s = g->strip[pc]; 952 switch (OP(s)) { 953 case OEND: 954 assert(pc == stop-1); 955 break; 956 case OCHAR: 957 /* only characters can match */ 958 assert(!NONCHAR(ch) || ch != (char)OPND(s)); 959 if (ch == (char)OPND(s)) 960 FWD(aft, bef, 1); 961 break; 962 case OBOL: 963 if (ch == BOL || ch == BOLEOL) 964 FWD(aft, bef, 1); 965 break; 966 case OEOL: 967 if (ch == EOL || ch == BOLEOL) 968 FWD(aft, bef, 1); 969 break; 970 case OBOW: 971 if (ch == BOW) 972 FWD(aft, bef, 1); 973 break; 974 case OEOW: 975 if (ch == EOW) 976 FWD(aft, bef, 1); 977 break; 978 case OANY: 979 if (!NONCHAR(ch)) 980 FWD(aft, bef, 1); 981 break; 982 case OANYOF: 983 cs = &g->sets[OPND(s)]; 984 if (!NONCHAR(ch) && CHIN(cs, ch)) 985 FWD(aft, bef, 1); 986 break; 987 case OBACK_: /* ignored here */ 988 case O_BACK: 989 FWD(aft, aft, 1); 990 break; 991 case OPLUS_: /* forward, this is just an empty */ 992 FWD(aft, aft, 1); 993 break; 994 case O_PLUS: /* both forward and back */ 995 FWD(aft, aft, 1); 996 i = ISSETBACK(aft, OPND(s)); 997 BACK(aft, aft, OPND(s)); 998 if (!i && ISSETBACK(aft, OPND(s))) { 999 /* oho, must reconsider loop body */ 1000 pc -= OPND(s) + 1; 1001 INIT(here, pc); 1002 } 1003 break; 1004 case OQUEST_: /* two branches, both forward */ 1005 FWD(aft, aft, 1); 1006 FWD(aft, aft, OPND(s)); 1007 break; 1008 case O_QUEST: /* just an empty */ 1009 FWD(aft, aft, 1); 1010 break; 1011 case OLPAREN: /* not significant here */ 1012 case ORPAREN: 1013 FWD(aft, aft, 1); 1014 break; 1015 case OCH_: /* mark the first two branches */ 1016 FWD(aft, aft, 1); 1017 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 1018 FWD(aft, aft, OPND(s)); 1019 break; 1020 case OOR1: /* done a branch, find the O_CH */ 1021 if (ISSTATEIN(aft, here)) { 1022 for (look = 1; 1023 OP(s = g->strip[pc+look]) != O_CH; 1024 look += OPND(s)) 1025 assert(OP(s) == OOR2); 1026 FWD(aft, aft, look); 1027 } 1028 break; 1029 case OOR2: /* propagate OCH_'s marking */ 1030 FWD(aft, aft, 1); 1031 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 1032 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 1033 FWD(aft, aft, OPND(s)); 1034 } 1035 break; 1036 case O_CH: /* just empty */ 1037 FWD(aft, aft, 1); 1038 break; 1039 default: /* ooooops... */ 1040 assert(nope); 1041 break; 1042 } 1043 } 1044 1045 return(aft); 1046 } 1047 1048 #ifdef REDEBUG 1049 /* 1050 - print - print a set of states 1051 == #ifdef REDEBUG 1052 == static void print(struct match *m, char *caption, states st, \ 1053 == int ch, FILE *d); 1054 == #endif 1055 */ 1056 static void 1057 print(m, caption, st, ch, d) 1058 struct match *m; 1059 char *caption; 1060 states st; 1061 int ch; 1062 FILE *d; 1063 { 1064 struct re_guts *g = m->g; 1065 int i; 1066 int first = 1; 1067 1068 _DIAGASSERT(m != NULL); 1069 _DIAGASSERT(caption != NULL); 1070 1071 if (!(m->eflags®_TRACE)) 1072 return; 1073 1074 _DIAGASSERT(d != NULL); 1075 1076 fprintf(d, "%s", caption); 1077 if (ch != '\0') 1078 fprintf(d, " %s", pchar(ch)); 1079 for (i = 0; i < g->nstates; i++) 1080 if (ISSET(st, i)) { 1081 fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 1082 first = 0; 1083 } 1084 fprintf(d, "\n"); 1085 } 1086 1087 /* 1088 - at - print current situation 1089 == #ifdef REDEBUG 1090 == static void at(struct match *m, char *title, char *start, char *stop, \ 1091 == sopno startst, sopno stopst); 1092 == #endif 1093 */ 1094 static void 1095 at(m, title, start, stop, startst, stopst) 1096 struct match *m; 1097 char *title; 1098 char *start; 1099 char *stop; 1100 sopno startst; 1101 sopno stopst; 1102 { 1103 1104 _DIAGASSERT(m != NULL); 1105 _DIAGASSERT(title != NULL); 1106 _DIAGASSERT(start != NULL); 1107 _DIAGASSERT(stop != NULL); 1108 1109 if (!(m->eflags®_TRACE)) 1110 return; 1111 1112 printf("%s %s-", title, pchar(*start)); 1113 printf("%s ", pchar(*stop)); 1114 printf("%ld-%ld\n", (long)startst, (long)stopst); 1115 } 1116 1117 #ifndef PCHARDONE 1118 #define PCHARDONE /* never again */ 1119 /* 1120 - pchar - make a character printable 1121 == #ifdef REDEBUG 1122 == static char *pchar(int ch); 1123 == #endif 1124 * 1125 * Is this identical to regchar() over in debug.c? Well, yes. But a 1126 * duplicate here avoids having a debugging-capable regexec.o tied to 1127 * a matching debug.o, and this is convenient. It all disappears in 1128 * the non-debug compilation anyway, so it doesn't matter much. 1129 */ 1130 static char * /* -> representation */ 1131 pchar(ch) 1132 int ch; 1133 { 1134 static char pbuf[10]; 1135 1136 if (isprint(ch) || ch == ' ') 1137 (void)snprintf(pbuf, sizeof pbuf, "%c", ch); 1138 else 1139 (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); 1140 return(pbuf); 1141 } 1142 #endif 1143 #endif 1144 1145 #undef matcher 1146 #undef fast 1147 #undef slow 1148 #undef dissect 1149 #undef backref 1150 #undef step 1151 #undef print 1152 #undef at 1153 #undef match 1154 #undef nope 1155