1 /* $OpenBSD: regcomp.c,v 1.35 2020/10/13 04:42:28 guenther Exp $ */ 2 /*- 3 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Henry Spencer. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 35 */ 36 37 #include <sys/types.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <ctype.h> 41 #include <limits.h> 42 #include <stdlib.h> 43 #include <regex.h> 44 45 #include "utils.h" 46 #include "regex2.h" 47 48 #include "cclass.h" 49 #include "cname.h" 50 51 /* 52 * parse structure, passed up and down to avoid global variables and 53 * other clumsinesses 54 */ 55 struct parse { 56 char *next; /* next character in RE */ 57 char *end; /* end of string (-> NUL normally) */ 58 int error; /* has an error been seen? */ 59 sop *strip; /* malloced strip */ 60 sopno ssize; /* malloced strip size (allocated) */ 61 sopno slen; /* malloced strip length (used) */ 62 int ncsalloc; /* number of csets allocated */ 63 struct re_guts *g; 64 # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 65 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 66 sopno pend[NPAREN]; /* -> ) ([0] unused) */ 67 }; 68 69 static void p_ere(struct parse *, int); 70 static void p_ere_exp(struct parse *); 71 static void p_str(struct parse *); 72 static void p_bre(struct parse *, int, int); 73 static int p_simp_re(struct parse *, int); 74 static int p_count(struct parse *); 75 static void p_bracket(struct parse *); 76 static void p_b_term(struct parse *, cset *); 77 static void p_b_cclass(struct parse *, cset *); 78 static void p_b_eclass(struct parse *, cset *); 79 static char p_b_symbol(struct parse *); 80 static char p_b_coll_elem(struct parse *, int); 81 static char othercase(int); 82 static void bothcases(struct parse *, int); 83 static void ordinary(struct parse *, int); 84 static void backslash(struct parse *, int); 85 static void nonnewline(struct parse *); 86 static void repeat(struct parse *, sopno, int, int); 87 static int seterr(struct parse *, int); 88 static cset *allocset(struct parse *); 89 static void freeset(struct parse *, cset *); 90 static int freezeset(struct parse *, cset *); 91 static int firstch(struct parse *, cset *); 92 static int nch(struct parse *, cset *); 93 static void mcadd(struct parse *, cset *, const char *); 94 static void mcinvert(struct parse *, cset *); 95 static void mccase(struct parse *, cset *); 96 static int isinsets(struct re_guts *, int); 97 static int samesets(struct re_guts *, int, int); 98 static void categorize(struct parse *, struct re_guts *); 99 static sopno dupl(struct parse *, sopno, sopno); 100 static void doemit(struct parse *, sop, size_t); 101 static void doinsert(struct parse *, sop, size_t, sopno); 102 static void dofwd(struct parse *, sopno, sop); 103 static int enlarge(struct parse *, sopno); 104 static void stripsnug(struct parse *, struct re_guts *); 105 static void findmust(struct parse *, struct re_guts *); 106 static sopno pluscount(struct parse *, struct re_guts *); 107 108 static char nuls[10]; /* place to point scanner in event of error */ 109 110 /* 111 * macros for use with parse structure 112 * BEWARE: these know that the parse structure is named `p' !!! 113 */ 114 #define PEEK() (*p->next) 115 #define PEEK2() (*(p->next+1)) 116 #define MORE() (p->next < p->end) 117 #define MORE2() (p->next+1 < p->end) 118 #define SEE(c) (MORE() && PEEK() == (c)) 119 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 120 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 121 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 122 #define NEXT() (p->next++) 123 #define NEXT2() (p->next += 2) 124 #define NEXTn(n) (p->next += (n)) 125 #define GETNEXT() (*p->next++) 126 #define SETERROR(e) seterr(p, (e)) 127 #define REQUIRE(co, e) (void) ((co) || SETERROR(e)) 128 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 129 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 130 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 131 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 132 #define HERE() (p->slen) 133 #define THERE() (p->slen - 1) 134 #define THERETHERE() (p->slen - 2) 135 #define DROP(n) (p->slen -= (n)) 136 137 #ifndef NDEBUG 138 static int never = 0; /* for use in asserts; shuts lint up */ 139 #else 140 #define never 0 /* some <assert.h>s have bugs too */ 141 #endif 142 143 /* 144 - regcomp - interface for parser and compilation 145 */ 146 int /* 0 success, otherwise REG_something */ 147 regcomp(regex_t *preg, const char *pattern, int cflags) 148 { 149 struct parse pa; 150 struct re_guts *g; 151 struct parse *p = &pa; 152 int i; 153 size_t len; 154 #ifdef REDEBUG 155 # define GOODFLAGS(f) (f) 156 #else 157 # define GOODFLAGS(f) ((f)&~REG_DUMP) 158 #endif 159 160 cflags = GOODFLAGS(cflags); 161 if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 162 return(REG_INVARG); 163 164 if (cflags®_PEND) { 165 if (preg->re_endp < pattern) 166 return(REG_INVARG); 167 len = preg->re_endp - pattern; 168 } else 169 len = strlen((char *)pattern); 170 171 /* do the mallocs early so failure handling is easy */ 172 g = malloc(sizeof(struct re_guts)); 173 if (g == NULL) 174 return(REG_ESPACE); 175 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 176 p->strip = reallocarray(NULL, p->ssize, sizeof(sop)); 177 p->slen = 0; 178 if (p->strip == NULL) { 179 free(g); 180 return(REG_ESPACE); 181 } 182 183 /* set things up */ 184 p->g = g; 185 p->next = (char *)pattern; /* convenience; we do not modify it */ 186 p->end = p->next + len; 187 p->error = 0; 188 p->ncsalloc = 0; 189 for (i = 0; i < NPAREN; i++) { 190 p->pbegin[i] = 0; 191 p->pend[i] = 0; 192 } 193 g->csetsize = NC; 194 g->sets = NULL; 195 g->setbits = NULL; 196 g->ncsets = 0; 197 g->cflags = cflags; 198 g->iflags = 0; 199 g->nbol = 0; 200 g->neol = 0; 201 g->must = NULL; 202 g->mlen = 0; 203 g->nsub = 0; 204 g->ncategories = 1; /* category 0 is "everything else" */ 205 g->categories = &g->catspace[-(CHAR_MIN)]; 206 memset(g->catspace, 0, sizeof(g->catspace)); 207 g->backrefs = 0; 208 209 /* do it */ 210 EMIT(OEND, 0); 211 g->firststate = THERE(); 212 if (cflags®_EXTENDED) 213 p_ere(p, OUT); 214 else if (cflags®_NOSPEC) 215 p_str(p); 216 else 217 p_bre(p, OUT, OUT); 218 EMIT(OEND, 0); 219 g->laststate = THERE(); 220 221 /* tidy up loose ends and fill things in */ 222 categorize(p, g); 223 stripsnug(p, g); 224 findmust(p, g); 225 g->nplus = pluscount(p, g); 226 g->magic = MAGIC2; 227 preg->re_nsub = g->nsub; 228 preg->re_g = g; 229 preg->re_magic = MAGIC1; 230 #ifndef REDEBUG 231 /* not debugging, so can't rely on the assert() in regexec() */ 232 if (g->iflags&BAD) 233 SETERROR(REG_ASSERT); 234 #endif 235 236 /* win or lose, we're done */ 237 if (p->error != 0) /* lose */ 238 regfree(preg); 239 return(p->error); 240 } 241 242 /* 243 - p_ere - ERE parser top level, concatenation and alternation 244 */ 245 static void 246 p_ere(struct parse *p, int stop) /* character this ERE should end at */ 247 { 248 char c; 249 sopno prevback; 250 sopno prevfwd; 251 sopno conc; 252 int first = 1; /* is this the first alternative? */ 253 254 for (;;) { 255 /* do a bunch of concatenated expressions */ 256 conc = HERE(); 257 while (MORE() && (c = PEEK()) != '|' && c != stop) 258 p_ere_exp(p); 259 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 260 261 if (!EAT('|')) 262 break; /* NOTE BREAK OUT */ 263 264 if (first) { 265 INSERT(OCH_, conc); /* offset is wrong */ 266 prevfwd = conc; 267 prevback = conc; 268 first = 0; 269 } 270 ASTERN(OOR1, prevback); 271 prevback = THERE(); 272 AHEAD(prevfwd); /* fix previous offset */ 273 prevfwd = HERE(); 274 EMIT(OOR2, 0); /* offset is very wrong */ 275 } 276 277 if (!first) { /* tail-end fixups */ 278 AHEAD(prevfwd); 279 ASTERN(O_CH, prevback); 280 } 281 282 assert(!MORE() || SEE(stop)); 283 } 284 285 /* 286 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 287 */ 288 static void 289 p_ere_exp(struct parse *p) 290 { 291 char c; 292 sopno pos; 293 int count; 294 int count2; 295 sopno subno; 296 int wascaret = 0; 297 298 assert(MORE()); /* caller should have ensured this */ 299 c = GETNEXT(); 300 301 pos = HERE(); 302 switch (c) { 303 case '(': 304 REQUIRE(MORE(), REG_EPAREN); 305 p->g->nsub++; 306 subno = p->g->nsub; 307 if (subno < NPAREN) 308 p->pbegin[subno] = HERE(); 309 EMIT(OLPAREN, subno); 310 if (!SEE(')')) 311 p_ere(p, ')'); 312 if (subno < NPAREN) { 313 p->pend[subno] = HERE(); 314 assert(p->pend[subno] != 0); 315 } 316 EMIT(ORPAREN, subno); 317 REQUIRE(MORE() && GETNEXT() == ')', REG_EPAREN); 318 break; 319 case '^': 320 EMIT(OBOL, 0); 321 p->g->iflags |= USEBOL; 322 p->g->nbol++; 323 wascaret = 1; 324 break; 325 case '$': 326 EMIT(OEOL, 0); 327 p->g->iflags |= USEEOL; 328 p->g->neol++; 329 break; 330 case '|': 331 SETERROR(REG_EMPTY); 332 break; 333 case '*': 334 case '+': 335 case '?': 336 SETERROR(REG_BADRPT); 337 break; 338 case '.': 339 if (p->g->cflags®_NEWLINE) 340 nonnewline(p); 341 else 342 EMIT(OANY, 0); 343 break; 344 case '[': 345 p_bracket(p); 346 break; 347 case '\\': 348 REQUIRE(MORE(), REG_EESCAPE); 349 c = GETNEXT(); 350 backslash(p, c); 351 break; 352 case '{': /* okay as ordinary except if digit follows */ 353 REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); 354 /* FALLTHROUGH */ 355 default: 356 if (p->error != 0) 357 return; 358 ordinary(p, c); 359 break; 360 } 361 362 if (!MORE()) 363 return; 364 c = PEEK(); 365 /* we call { a repetition if followed by a digit */ 366 if (!( c == '*' || c == '+' || c == '?' || 367 (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) 368 return; /* no repetition, we're done */ 369 NEXT(); 370 371 REQUIRE(!wascaret, REG_BADRPT); 372 switch (c) { 373 case '*': /* implemented as +? */ 374 /* this case does not require the (y|) trick, noKLUDGE */ 375 INSERT(OPLUS_, pos); 376 ASTERN(O_PLUS, pos); 377 INSERT(OQUEST_, pos); 378 ASTERN(O_QUEST, pos); 379 break; 380 case '+': 381 INSERT(OPLUS_, pos); 382 ASTERN(O_PLUS, pos); 383 break; 384 case '?': 385 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 386 INSERT(OCH_, pos); /* offset slightly wrong */ 387 ASTERN(OOR1, pos); /* this one's right */ 388 AHEAD(pos); /* fix the OCH_ */ 389 EMIT(OOR2, 0); /* offset very wrong... */ 390 AHEAD(THERE()); /* ...so fix it */ 391 ASTERN(O_CH, THERETHERE()); 392 break; 393 case '{': 394 count = p_count(p); 395 if (EAT(',')) { 396 if (isdigit((uch)PEEK())) { 397 count2 = p_count(p); 398 REQUIRE(count <= count2, REG_BADBR); 399 } else /* single number with comma */ 400 count2 = INFINITY; 401 } else /* just a single number */ 402 count2 = count; 403 repeat(p, pos, count, count2); 404 if (!EAT('}')) { /* error heuristics */ 405 while (MORE() && PEEK() != '}') 406 NEXT(); 407 REQUIRE(MORE(), REG_EBRACE); 408 SETERROR(REG_BADBR); 409 } 410 break; 411 } 412 413 if (!MORE()) 414 return; 415 c = PEEK(); 416 if (!( c == '*' || c == '+' || c == '?' || 417 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) 418 return; 419 SETERROR(REG_BADRPT); 420 } 421 422 /* 423 - p_str - string (no metacharacters) "parser" 424 */ 425 static void 426 p_str(struct parse *p) 427 { 428 REQUIRE(MORE(), REG_EMPTY); 429 while (MORE()) 430 ordinary(p, GETNEXT()); 431 } 432 433 /* 434 - p_bre - BRE parser top level, anchoring and concatenation 435 * Giving end1 as OUT essentially eliminates the end1/end2 check. 436 * 437 * This implementation is a bit of a kludge, in that a trailing $ is first 438 * taken as an ordinary character and then revised to be an anchor. The 439 * only undesirable side effect is that '$' gets included as a character 440 * category in such cases. This is fairly harmless; not worth fixing. 441 * The amount of lookahead needed to avoid this kludge is excessive. 442 */ 443 static void 444 p_bre(struct parse *p, 445 int end1, /* first terminating character */ 446 int end2) /* second terminating character */ 447 { 448 sopno start = HERE(); 449 int first = 1; /* first subexpression? */ 450 int wasdollar = 0; 451 452 if (EAT('^')) { 453 EMIT(OBOL, 0); 454 p->g->iflags |= USEBOL; 455 p->g->nbol++; 456 } 457 while (MORE() && !SEETWO(end1, end2)) { 458 wasdollar = p_simp_re(p, first); 459 first = 0; 460 } 461 if (wasdollar) { /* oops, that was a trailing anchor */ 462 DROP(1); 463 EMIT(OEOL, 0); 464 p->g->iflags |= USEEOL; 465 p->g->neol++; 466 } 467 468 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 469 } 470 471 /* 472 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 473 */ 474 static int /* was the simple RE an unbackslashed $? */ 475 p_simp_re(struct parse *p, 476 int starordinary) /* is a leading * an ordinary character? */ 477 { 478 int c; 479 int count; 480 int count2; 481 sopno pos; 482 int i; 483 sopno subno; 484 # define BACKSL (1<<CHAR_BIT) 485 486 pos = HERE(); /* repetion op, if any, covers from here */ 487 488 assert(MORE()); /* caller should have ensured this */ 489 c = GETNEXT(); 490 if (c == '\\') { 491 REQUIRE(MORE(), REG_EESCAPE); 492 c = BACKSL | GETNEXT(); 493 } 494 switch (c) { 495 case '.': 496 if (p->g->cflags®_NEWLINE) 497 nonnewline(p); 498 else 499 EMIT(OANY, 0); 500 break; 501 case '[': 502 p_bracket(p); 503 break; 504 case BACKSL|'<': 505 EMIT(OBOW, 0); 506 break; 507 case BACKSL|'>': 508 EMIT(OEOW, 0); 509 break; 510 case BACKSL|'{': 511 SETERROR(REG_BADRPT); 512 break; 513 case BACKSL|'(': 514 p->g->nsub++; 515 subno = p->g->nsub; 516 if (subno < NPAREN) 517 p->pbegin[subno] = HERE(); 518 EMIT(OLPAREN, subno); 519 /* the MORE here is an error heuristic */ 520 if (MORE() && !SEETWO('\\', ')')) 521 p_bre(p, '\\', ')'); 522 if (subno < NPAREN) { 523 p->pend[subno] = HERE(); 524 assert(p->pend[subno] != 0); 525 } 526 EMIT(ORPAREN, subno); 527 REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 528 break; 529 case BACKSL|')': /* should not get here -- must be user */ 530 case BACKSL|'}': 531 SETERROR(REG_EPAREN); 532 break; 533 case BACKSL|'1': 534 case BACKSL|'2': 535 case BACKSL|'3': 536 case BACKSL|'4': 537 case BACKSL|'5': 538 case BACKSL|'6': 539 case BACKSL|'7': 540 case BACKSL|'8': 541 case BACKSL|'9': 542 i = (c&~BACKSL) - '0'; 543 assert(i < NPAREN); 544 if (p->pend[i] != 0) { 545 assert(i <= p->g->nsub); 546 EMIT(OBACK_, i); 547 assert(p->pbegin[i] != 0); 548 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 549 assert(OP(p->strip[p->pend[i]]) == ORPAREN); 550 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 551 EMIT(O_BACK, i); 552 } else 553 SETERROR(REG_ESUBREG); 554 p->g->backrefs = 1; 555 break; 556 case '*': 557 REQUIRE(starordinary, REG_BADRPT); 558 /* FALLTHROUGH */ 559 default: 560 if (p->error != 0) 561 return(0); /* Definitely not $... */ 562 ordinary(p, (char)c); 563 break; 564 } 565 566 if (EAT('*')) { /* implemented as +? */ 567 /* this case does not require the (y|) trick, noKLUDGE */ 568 INSERT(OPLUS_, pos); 569 ASTERN(O_PLUS, pos); 570 INSERT(OQUEST_, pos); 571 ASTERN(O_QUEST, pos); 572 } else if (EATTWO('\\', '{')) { 573 count = p_count(p); 574 if (EAT(',')) { 575 if (MORE() && isdigit((uch)PEEK())) { 576 count2 = p_count(p); 577 REQUIRE(count <= count2, REG_BADBR); 578 } else /* single number with comma */ 579 count2 = INFINITY; 580 } else /* just a single number */ 581 count2 = count; 582 repeat(p, pos, count, count2); 583 if (!EATTWO('\\', '}')) { /* error heuristics */ 584 while (MORE() && !SEETWO('\\', '}')) 585 NEXT(); 586 REQUIRE(MORE(), REG_EBRACE); 587 SETERROR(REG_BADBR); 588 } 589 } else if (c == '$') /* $ (but not \$) ends it */ 590 return(1); 591 592 return(0); 593 } 594 595 /* 596 - p_count - parse a repetition count 597 */ 598 static int /* the value */ 599 p_count(struct parse *p) 600 { 601 int count = 0; 602 int ndigits = 0; 603 604 while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { 605 count = count*10 + (GETNEXT() - '0'); 606 ndigits++; 607 } 608 609 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 610 return(count); 611 } 612 613 /* 614 - p_bracket - parse a bracketed character list 615 * 616 * Note a significant property of this code: if the allocset() did SETERROR, 617 * no set operations are done. 618 */ 619 static void 620 p_bracket(struct parse *p) 621 { 622 cset *cs; 623 int invert = 0; 624 625 /* Dept of Truly Sickening Special-Case Kludges */ 626 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 627 EMIT(OBOW, 0); 628 NEXTn(6); 629 return; 630 } 631 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 632 EMIT(OEOW, 0); 633 NEXTn(6); 634 return; 635 } 636 637 if ((cs = allocset(p)) == NULL) { 638 /* allocset did set error status in p */ 639 return; 640 } 641 642 if (EAT('^')) 643 invert++; /* make note to invert set at end */ 644 if (EAT(']')) 645 CHadd(cs, ']'); 646 else if (EAT('-')) 647 CHadd(cs, '-'); 648 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 649 p_b_term(p, cs); 650 if (EAT('-')) 651 CHadd(cs, '-'); 652 REQUIRE(MORE() && GETNEXT() == ']', REG_EBRACK); 653 654 if (p->error != 0) { /* don't mess things up further */ 655 freeset(p, cs); 656 return; 657 } 658 659 if (p->g->cflags®_ICASE) { 660 int i; 661 int ci; 662 663 for (i = p->g->csetsize - 1; i >= 0; i--) 664 if (CHIN(cs, i) && isalpha(i)) { 665 ci = othercase(i); 666 if (ci != i) 667 CHadd(cs, ci); 668 } 669 if (cs->multis != NULL) 670 mccase(p, cs); 671 } 672 if (invert) { 673 int i; 674 675 for (i = p->g->csetsize - 1; i >= 0; i--) 676 if (CHIN(cs, i)) 677 CHsub(cs, i); 678 else 679 CHadd(cs, i); 680 if (p->g->cflags®_NEWLINE) 681 CHsub(cs, '\n'); 682 if (cs->multis != NULL) 683 mcinvert(p, cs); 684 } 685 686 assert(cs->multis == NULL); /* xxx */ 687 688 if (nch(p, cs) == 1) { /* optimize singleton sets */ 689 ordinary(p, firstch(p, cs)); 690 freeset(p, cs); 691 } else 692 EMIT(OANYOF, freezeset(p, cs)); 693 } 694 695 /* 696 - p_b_term - parse one term of a bracketed character list 697 */ 698 static void 699 p_b_term(struct parse *p, cset *cs) 700 { 701 char c; 702 char start, finish; 703 int i; 704 705 /* classify what we've got */ 706 switch ((MORE()) ? PEEK() : '\0') { 707 case '[': 708 c = (MORE2()) ? PEEK2() : '\0'; 709 break; 710 case '-': 711 SETERROR(REG_ERANGE); 712 return; /* NOTE RETURN */ 713 break; 714 default: 715 c = '\0'; 716 break; 717 } 718 719 switch (c) { 720 case ':': /* character class */ 721 NEXT2(); 722 REQUIRE(MORE(), REG_EBRACK); 723 c = PEEK(); 724 REQUIRE(c != '-' && c != ']', REG_ECTYPE); 725 p_b_cclass(p, cs); 726 REQUIRE(MORE(), REG_EBRACK); 727 REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 728 break; 729 case '=': /* equivalence class */ 730 NEXT2(); 731 REQUIRE(MORE(), REG_EBRACK); 732 c = PEEK(); 733 REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 734 p_b_eclass(p, cs); 735 REQUIRE(MORE(), REG_EBRACK); 736 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 737 break; 738 default: /* symbol, ordinary character, or range */ 739 /* xxx revision needed for multichar stuff */ 740 start = p_b_symbol(p); 741 if (SEE('-') && MORE2() && PEEK2() != ']') { 742 /* range */ 743 NEXT(); 744 if (EAT('-')) 745 finish = '-'; 746 else 747 finish = p_b_symbol(p); 748 } else 749 finish = start; 750 /* xxx what about signed chars here... */ 751 REQUIRE(start <= finish, REG_ERANGE); 752 for (i = start; i <= finish; i++) 753 CHadd(cs, i); 754 break; 755 } 756 } 757 758 /* 759 - p_b_cclass - parse a character-class name and deal with it 760 */ 761 static void 762 p_b_cclass(struct parse *p, cset *cs) 763 { 764 char *sp = p->next; 765 const struct cclass *cp; 766 size_t len; 767 const char *u; 768 char c; 769 770 while (MORE() && isalpha((uch)PEEK())) 771 NEXT(); 772 len = p->next - sp; 773 for (cp = cclasses; cp->name != NULL; cp++) 774 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 775 break; 776 if (cp->name == NULL) { 777 /* oops, didn't find it */ 778 SETERROR(REG_ECTYPE); 779 return; 780 } 781 782 u = cp->chars; 783 while ((c = *u++) != '\0') 784 CHadd(cs, c); 785 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 786 MCadd(p, cs, u); 787 } 788 789 /* 790 - p_b_eclass - parse an equivalence-class name and deal with it 791 * 792 * This implementation is incomplete. xxx 793 */ 794 static void 795 p_b_eclass(struct parse *p, cset *cs) 796 { 797 char c; 798 799 c = p_b_coll_elem(p, '='); 800 CHadd(cs, c); 801 } 802 803 /* 804 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 805 */ 806 static char /* value of symbol */ 807 p_b_symbol(struct parse *p) 808 { 809 char value; 810 811 REQUIRE(MORE(), REG_EBRACK); 812 if (!EATTWO('[', '.')) 813 return(GETNEXT()); 814 815 /* collating symbol */ 816 value = p_b_coll_elem(p, '.'); 817 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 818 return(value); 819 } 820 821 /* 822 - p_b_coll_elem - parse a collating-element name and look it up 823 */ 824 static char /* value of collating element */ 825 p_b_coll_elem(struct parse *p, 826 int endc) /* name ended by endc,']' */ 827 { 828 char *sp = p->next; 829 struct cname *cp; 830 size_t len; 831 832 while (MORE() && !SEETWO(endc, ']')) 833 NEXT(); 834 if (!MORE()) { 835 SETERROR(REG_EBRACK); 836 return(0); 837 } 838 len = p->next - sp; 839 for (cp = cnames; cp->name != NULL; cp++) 840 if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len) 841 return(cp->code); /* known name */ 842 if (len == 1) 843 return(*sp); /* single character */ 844 SETERROR(REG_ECOLLATE); /* neither */ 845 return(0); 846 } 847 848 /* 849 - othercase - return the case counterpart of an alphabetic 850 */ 851 static char /* if no counterpart, return ch */ 852 othercase(int ch) 853 { 854 ch = (uch)ch; 855 assert(isalpha(ch)); 856 if (isupper(ch)) 857 return ((uch)tolower(ch)); 858 else if (islower(ch)) 859 return ((uch)toupper(ch)); 860 else /* peculiar, but could happen */ 861 return(ch); 862 } 863 864 /* 865 - bothcases - emit a dualcase version of a two-case character 866 * 867 * Boy, is this implementation ever a kludge... 868 */ 869 static void 870 bothcases(struct parse *p, int ch) 871 { 872 char *oldnext = p->next; 873 char *oldend = p->end; 874 char bracket[3]; 875 876 ch = (uch)ch; 877 assert(othercase(ch) != ch); /* p_bracket() would recurse */ 878 p->next = bracket; 879 p->end = bracket+2; 880 bracket[0] = ch; 881 bracket[1] = ']'; 882 bracket[2] = '\0'; 883 p_bracket(p); 884 assert(p->next == bracket+2); 885 p->next = oldnext; 886 p->end = oldend; 887 } 888 889 /* 890 - ordinary - emit an ordinary character 891 */ 892 static void 893 ordinary(struct parse *p, int ch) 894 { 895 cat_t *cap = p->g->categories; 896 897 if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) 898 bothcases(p, ch); 899 else { 900 EMIT(OCHAR, (uch)ch); 901 if (cap[ch] == 0) 902 cap[ch] = p->g->ncategories++; 903 } 904 } 905 906 /* 907 * do something magic with this character, but only if it's extra magic 908 */ 909 static void 910 backslash(struct parse *p, int ch) 911 { 912 switch (ch) { 913 case '<': 914 EMIT(OBOW, 0); 915 break; 916 case '>': 917 EMIT(OEOW, 0); 918 break; 919 default: 920 ordinary(p, ch); 921 break; 922 } 923 } 924 925 /* 926 - nonnewline - emit REG_NEWLINE version of OANY 927 * 928 * Boy, is this implementation ever a kludge... 929 */ 930 static void 931 nonnewline(struct parse *p) 932 { 933 char *oldnext = p->next; 934 char *oldend = p->end; 935 char bracket[4]; 936 937 p->next = bracket; 938 p->end = bracket+3; 939 bracket[0] = '^'; 940 bracket[1] = '\n'; 941 bracket[2] = ']'; 942 bracket[3] = '\0'; 943 p_bracket(p); 944 assert(p->next == bracket+3); 945 p->next = oldnext; 946 p->end = oldend; 947 } 948 949 /* 950 - repeat - generate code for a bounded repetition, recursively if needed 951 */ 952 static void 953 repeat(struct parse *p, 954 sopno start, /* operand from here to end of strip */ 955 int from, /* repeated from this number */ 956 int to) /* to this number of times (maybe INFINITY) */ 957 { 958 sopno finish = HERE(); 959 # define N 2 960 # define INF 3 961 # define REP(f, t) ((f)*8 + (t)) 962 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 963 sopno copy; 964 965 if (p->error != 0) /* head off possible runaway recursion */ 966 return; 967 968 assert(from <= to); 969 970 switch (REP(MAP(from), MAP(to))) { 971 case REP(0, 0): /* must be user doing this */ 972 DROP(finish-start); /* drop the operand */ 973 break; 974 case REP(0, 1): /* as x{1,1}? */ 975 case REP(0, N): /* as x{1,n}? */ 976 case REP(0, INF): /* as x{1,}? */ 977 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 978 INSERT(OCH_, start); /* offset is wrong... */ 979 repeat(p, start+1, 1, to); 980 ASTERN(OOR1, start); 981 AHEAD(start); /* ... fix it */ 982 EMIT(OOR2, 0); 983 AHEAD(THERE()); 984 ASTERN(O_CH, THERETHERE()); 985 break; 986 case REP(1, 1): /* trivial case */ 987 /* done */ 988 break; 989 case REP(1, N): /* as x?x{1,n-1} */ 990 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 991 INSERT(OCH_, start); 992 ASTERN(OOR1, start); 993 AHEAD(start); 994 EMIT(OOR2, 0); /* offset very wrong... */ 995 AHEAD(THERE()); /* ...so fix it */ 996 ASTERN(O_CH, THERETHERE()); 997 copy = dupl(p, start+1, finish+1); 998 assert(copy == finish+4); 999 repeat(p, copy, 1, to-1); 1000 break; 1001 case REP(1, INF): /* as x+ */ 1002 INSERT(OPLUS_, start); 1003 ASTERN(O_PLUS, start); 1004 break; 1005 case REP(N, N): /* as xx{m-1,n-1} */ 1006 copy = dupl(p, start, finish); 1007 repeat(p, copy, from-1, to-1); 1008 break; 1009 case REP(N, INF): /* as xx{n-1,INF} */ 1010 copy = dupl(p, start, finish); 1011 repeat(p, copy, from-1, to); 1012 break; 1013 default: /* "can't happen" */ 1014 SETERROR(REG_ASSERT); /* just in case */ 1015 break; 1016 } 1017 } 1018 1019 /* 1020 - seterr - set an error condition 1021 */ 1022 static int /* useless but makes type checking happy */ 1023 seterr(struct parse *p, int e) 1024 { 1025 if (p->error == 0) /* keep earliest error condition */ 1026 p->error = e; 1027 p->next = nuls; /* try to bring things to a halt */ 1028 p->end = nuls; 1029 return(0); /* make the return value well-defined */ 1030 } 1031 1032 /* 1033 - allocset - allocate a set of characters for [] 1034 */ 1035 static cset * 1036 allocset(struct parse *p) 1037 { 1038 int no = p->g->ncsets++; 1039 size_t nc; 1040 size_t nbytes; 1041 cset *cs; 1042 size_t css = (size_t)p->g->csetsize; 1043 int i; 1044 1045 if (no >= p->ncsalloc) { /* need another column of space */ 1046 void *ptr; 1047 1048 p->ncsalloc += CHAR_BIT; 1049 nc = p->ncsalloc; 1050 assert(nc % CHAR_BIT == 0); 1051 1052 ptr = reallocarray(p->g->sets, nc, sizeof(cset)); 1053 if (ptr == NULL) 1054 goto nomem; 1055 p->g->sets = ptr; 1056 1057 ptr = reallocarray(p->g->setbits, nc / CHAR_BIT, css); 1058 if (ptr == NULL) 1059 goto nomem; 1060 nbytes = (nc / CHAR_BIT) * css; 1061 p->g->setbits = ptr; 1062 1063 for (i = 0; i < no; i++) 1064 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1065 1066 (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); 1067 } 1068 /* XXX should not happen */ 1069 if (p->g->sets == NULL || p->g->setbits == NULL) 1070 goto nomem; 1071 1072 cs = &p->g->sets[no]; 1073 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1074 cs->mask = 1 << ((no) % CHAR_BIT); 1075 cs->hash = 0; 1076 cs->smultis = 0; 1077 cs->multis = NULL; 1078 1079 return(cs); 1080 nomem: 1081 free(p->g->sets); 1082 p->g->sets = NULL; 1083 free(p->g->setbits); 1084 p->g->setbits = NULL; 1085 1086 SETERROR(REG_ESPACE); 1087 /* caller's responsibility not to do set ops */ 1088 return(NULL); 1089 } 1090 1091 /* 1092 - freeset - free a now-unused set 1093 */ 1094 static void 1095 freeset(struct parse *p, cset *cs) 1096 { 1097 int i; 1098 cset *top = &p->g->sets[p->g->ncsets]; 1099 size_t css = (size_t)p->g->csetsize; 1100 1101 for (i = 0; i < css; i++) 1102 CHsub(cs, i); 1103 if (cs == top-1) /* recover only the easy case */ 1104 p->g->ncsets--; 1105 } 1106 1107 /* 1108 - freezeset - final processing on a set of characters 1109 * 1110 * The main task here is merging identical sets. This is usually a waste 1111 * of time (although the hash code minimizes the overhead), but can win 1112 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1113 * is done using addition rather than xor -- all ASCII [aA] sets xor to 1114 * the same value! 1115 */ 1116 static int /* set number */ 1117 freezeset(struct parse *p, cset *cs) 1118 { 1119 uch h = cs->hash; 1120 int i; 1121 cset *top = &p->g->sets[p->g->ncsets]; 1122 cset *cs2; 1123 size_t css = (size_t)p->g->csetsize; 1124 1125 /* look for an earlier one which is the same */ 1126 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1127 if (cs2->hash == h && cs2 != cs) { 1128 /* maybe */ 1129 for (i = 0; i < css; i++) 1130 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1131 break; /* no */ 1132 if (i == css) 1133 break; /* yes */ 1134 } 1135 1136 if (cs2 < top) { /* found one */ 1137 freeset(p, cs); 1138 cs = cs2; 1139 } 1140 1141 return((int)(cs - p->g->sets)); 1142 } 1143 1144 /* 1145 - firstch - return first character in a set (which must have at least one) 1146 */ 1147 static int /* character; there is no "none" value */ 1148 firstch(struct parse *p, cset *cs) 1149 { 1150 int i; 1151 size_t css = (size_t)p->g->csetsize; 1152 1153 for (i = 0; i < css; i++) 1154 if (CHIN(cs, i)) 1155 return((char)i); 1156 assert(never); 1157 return(0); /* arbitrary */ 1158 } 1159 1160 /* 1161 - nch - number of characters in a set 1162 */ 1163 static int 1164 nch(struct parse *p, cset *cs) 1165 { 1166 int i; 1167 size_t css = (size_t)p->g->csetsize; 1168 int n = 0; 1169 1170 for (i = 0; i < css; i++) 1171 if (CHIN(cs, i)) 1172 n++; 1173 return(n); 1174 } 1175 1176 /* 1177 - mcadd - add a collating element to a cset 1178 */ 1179 static void 1180 mcadd( struct parse *p, cset *cs, const char *cp) 1181 { 1182 size_t oldend = cs->smultis; 1183 void *np; 1184 1185 cs->smultis += strlen(cp) + 1; 1186 np = realloc(cs->multis, cs->smultis); 1187 if (np == NULL) { 1188 free(cs->multis); 1189 cs->multis = NULL; 1190 SETERROR(REG_ESPACE); 1191 return; 1192 } 1193 cs->multis = np; 1194 1195 strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); 1196 } 1197 1198 /* 1199 - mcinvert - invert the list of collating elements in a cset 1200 * 1201 * This would have to know the set of possibilities. Implementation 1202 * is deferred. 1203 */ 1204 static void 1205 mcinvert(struct parse *p, cset *cs) 1206 { 1207 assert(cs->multis == NULL); /* xxx */ 1208 } 1209 1210 /* 1211 - mccase - add case counterparts of the list of collating elements in a cset 1212 * 1213 * This would have to know the set of possibilities. Implementation 1214 * is deferred. 1215 */ 1216 static void 1217 mccase(struct parse *p, cset *cs) 1218 { 1219 assert(cs->multis == NULL); /* xxx */ 1220 } 1221 1222 /* 1223 - isinsets - is this character in any sets? 1224 */ 1225 static int /* predicate */ 1226 isinsets(struct re_guts *g, int c) 1227 { 1228 uch *col; 1229 int i; 1230 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1231 unsigned uc = (uch)c; 1232 1233 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1234 if (col[uc] != 0) 1235 return(1); 1236 return(0); 1237 } 1238 1239 /* 1240 - samesets - are these two characters in exactly the same sets? 1241 */ 1242 static int /* predicate */ 1243 samesets(struct re_guts *g, int c1, int c2) 1244 { 1245 uch *col; 1246 int i; 1247 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1248 unsigned uc1 = (uch)c1; 1249 unsigned uc2 = (uch)c2; 1250 1251 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1252 if (col[uc1] != col[uc2]) 1253 return(0); 1254 return(1); 1255 } 1256 1257 /* 1258 - categorize - sort out character categories 1259 */ 1260 static void 1261 categorize(struct parse *p, struct re_guts *g) 1262 { 1263 cat_t *cats = g->categories; 1264 int c; 1265 int c2; 1266 cat_t cat; 1267 1268 /* avoid making error situations worse */ 1269 if (p->error != 0) 1270 return; 1271 1272 for (c = CHAR_MIN; c <= CHAR_MAX; c++) 1273 if (cats[c] == 0 && isinsets(g, c)) { 1274 cat = g->ncategories++; 1275 cats[c] = cat; 1276 for (c2 = c+1; c2 <= CHAR_MAX; c2++) 1277 if (cats[c2] == 0 && samesets(g, c, c2)) 1278 cats[c2] = cat; 1279 } 1280 } 1281 1282 /* 1283 - dupl - emit a duplicate of a bunch of sops 1284 */ 1285 static sopno /* start of duplicate */ 1286 dupl(struct parse *p, 1287 sopno start, /* from here */ 1288 sopno finish) /* to this less one */ 1289 { 1290 sopno ret = HERE(); 1291 sopno len = finish - start; 1292 1293 assert(finish >= start); 1294 if (len == 0) 1295 return(ret); 1296 if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */ 1297 return(ret); 1298 (void) memcpy(p->strip + p->slen, p->strip + start, len * sizeof(sop)); 1299 p->slen += len; 1300 return(ret); 1301 } 1302 1303 /* 1304 - doemit - emit a strip operator 1305 * 1306 * It might seem better to implement this as a macro with a function as 1307 * hard-case backup, but it's just too big and messy unless there are 1308 * some changes to the data structures. Maybe later. 1309 */ 1310 static void 1311 doemit(struct parse *p, sop op, size_t opnd) 1312 { 1313 /* avoid making error situations worse */ 1314 if (p->error != 0) 1315 return; 1316 1317 /* deal with oversize operands ("can't happen", more or less) */ 1318 assert(opnd < 1<<OPSHIFT); 1319 1320 /* deal with undersized strip */ 1321 if (p->slen >= p->ssize) 1322 if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ 1323 return; 1324 1325 /* finally, it's all reduced to the easy case */ 1326 p->strip[p->slen++] = SOP(op, opnd); 1327 } 1328 1329 /* 1330 - doinsert - insert a sop into the strip 1331 */ 1332 static void 1333 doinsert(struct parse *p, sop op, size_t opnd, sopno pos) 1334 { 1335 sopno sn; 1336 sop s; 1337 int i; 1338 1339 /* avoid making error situations worse */ 1340 if (p->error != 0) 1341 return; 1342 1343 sn = HERE(); 1344 EMIT(op, opnd); /* do checks, ensure space */ 1345 assert(HERE() == sn+1); 1346 s = p->strip[sn]; 1347 1348 /* adjust paren pointers */ 1349 assert(pos > 0); 1350 for (i = 1; i < NPAREN; i++) { 1351 if (p->pbegin[i] >= pos) { 1352 p->pbegin[i]++; 1353 } 1354 if (p->pend[i] >= pos) { 1355 p->pend[i]++; 1356 } 1357 } 1358 1359 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1360 (HERE()-pos-1)*sizeof(sop)); 1361 p->strip[pos] = s; 1362 } 1363 1364 /* 1365 - dofwd - complete a forward reference 1366 */ 1367 static void 1368 dofwd(struct parse *p, sopno pos, sop value) 1369 { 1370 /* avoid making error situations worse */ 1371 if (p->error != 0) 1372 return; 1373 1374 assert(value < 1<<OPSHIFT); 1375 p->strip[pos] = OP(p->strip[pos]) | value; 1376 } 1377 1378 /* 1379 - enlarge - enlarge the strip 1380 */ 1381 static int 1382 enlarge(struct parse *p, sopno size) 1383 { 1384 sop *sp; 1385 1386 if (p->ssize >= size) 1387 return 1; 1388 1389 sp = reallocarray(p->strip, size, sizeof(sop)); 1390 if (sp == NULL) { 1391 SETERROR(REG_ESPACE); 1392 return 0; 1393 } 1394 p->strip = sp; 1395 p->ssize = size; 1396 return 1; 1397 } 1398 1399 /* 1400 - stripsnug - compact the strip 1401 */ 1402 static void 1403 stripsnug(struct parse *p, struct re_guts *g) 1404 { 1405 g->nstates = p->slen; 1406 g->strip = reallocarray(p->strip, p->slen, sizeof(sop)); 1407 if (g->strip == NULL) { 1408 SETERROR(REG_ESPACE); 1409 g->strip = p->strip; 1410 } 1411 } 1412 1413 /* 1414 - findmust - fill in must and mlen with longest mandatory literal string 1415 * 1416 * This algorithm could do fancy things like analyzing the operands of | 1417 * for common subsequences. Someday. This code is simple and finds most 1418 * of the interesting cases. 1419 * 1420 * Note that must and mlen got initialized during setup. 1421 */ 1422 static void 1423 findmust(struct parse *p, struct re_guts *g) 1424 { 1425 sop *scan; 1426 sop *start; /* start initialized in the default case, after that */ 1427 sop *newstart; /* newstart was initialized in the OCHAR case */ 1428 sopno newlen; 1429 sop s; 1430 char *cp; 1431 sopno i; 1432 1433 /* avoid making error situations worse */ 1434 if (p->error != 0) 1435 return; 1436 1437 /* find the longest OCHAR sequence in strip */ 1438 newlen = 0; 1439 scan = g->strip + 1; 1440 do { 1441 s = *scan++; 1442 switch (OP(s)) { 1443 case OCHAR: /* sequence member */ 1444 if (newlen == 0) /* new sequence */ 1445 newstart = scan - 1; 1446 newlen++; 1447 break; 1448 case OPLUS_: /* things that don't break one */ 1449 case OLPAREN: 1450 case ORPAREN: 1451 break; 1452 case OQUEST_: /* things that must be skipped */ 1453 case OCH_: 1454 scan--; 1455 do { 1456 scan += OPND(s); 1457 s = *scan; 1458 /* assert() interferes w debug printouts */ 1459 if (OP(s) != O_QUEST && OP(s) != O_CH && 1460 OP(s) != OOR2) { 1461 g->iflags |= BAD; 1462 return; 1463 } 1464 } while (OP(s) != O_QUEST && OP(s) != O_CH); 1465 /* fallthrough */ 1466 default: /* things that break a sequence */ 1467 if (newlen > g->mlen) { /* ends one */ 1468 start = newstart; 1469 g->mlen = newlen; 1470 } 1471 newlen = 0; 1472 break; 1473 } 1474 } while (OP(s) != OEND); 1475 1476 if (g->mlen == 0) /* there isn't one */ 1477 return; 1478 1479 /* turn it into a character string */ 1480 g->must = malloc((size_t)g->mlen + 1); 1481 if (g->must == NULL) { /* argh; just forget it */ 1482 g->mlen = 0; 1483 return; 1484 } 1485 cp = g->must; 1486 scan = start; 1487 for (i = g->mlen; i > 0; i--) { 1488 while (OP(s = *scan++) != OCHAR) 1489 continue; 1490 assert(cp < g->must + g->mlen); 1491 *cp++ = (char)OPND(s); 1492 } 1493 assert(cp == g->must + g->mlen); 1494 *cp++ = '\0'; /* just on general principles */ 1495 } 1496 1497 /* 1498 - pluscount - count + nesting 1499 */ 1500 static sopno /* nesting depth */ 1501 pluscount(struct parse *p, struct re_guts *g) 1502 { 1503 sop *scan; 1504 sop s; 1505 sopno plusnest = 0; 1506 sopno maxnest = 0; 1507 1508 if (p->error != 0) 1509 return(0); /* there may not be an OEND */ 1510 1511 scan = g->strip + 1; 1512 do { 1513 s = *scan++; 1514 switch (OP(s)) { 1515 case OPLUS_: 1516 plusnest++; 1517 break; 1518 case O_PLUS: 1519 if (plusnest > maxnest) 1520 maxnest = plusnest; 1521 plusnest--; 1522 break; 1523 } 1524 } while (OP(s) != OEND); 1525 if (plusnest != 0) 1526 g->iflags |= BAD; 1527 return(maxnest); 1528 } 1529