1 /*- 2 * Copyright (c) 1992 Henry Spencer. 3 * Copyright (c) 1992, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer of the University of Toronto. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)regcomp.c 8.1 (Berkeley) 06/04/93 12 */ 13 14 #if defined(LIBC_SCCS) && !defined(lint) 15 static char sccsid[] = "@(#)regcomp.c 8.1 (Berkeley) 06/04/93"; 16 #endif /* LIBC_SCCS and not lint */ 17 18 #include <sys/types.h> 19 #include <stdio.h> 20 #include <string.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stdlib.h> 24 #include <regex.h> 25 26 #include "utils.h" 27 #include "regex2.h" 28 29 #include "cclass.h" 30 #include "cname.h" 31 32 /* 33 * parse structure, passed up and down to avoid global variables and 34 * other clumsinesses 35 */ 36 struct parse { 37 char *next; /* next character in RE */ 38 char *end; /* end of string (-> NUL normally) */ 39 int error; /* has an error been seen? */ 40 sop *strip; /* malloced strip */ 41 sopno ssize; /* malloced strip size (allocated) */ 42 sopno slen; /* malloced strip length (used) */ 43 int ncsalloc; /* number of csets allocated */ 44 struct re_guts *g; 45 # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 46 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 47 sopno pend[NPAREN]; /* -> ) ([0] unused) */ 48 }; 49 50 51 static void p_ere(/*register struct parse *p, int stop*/); 52 static void p_ere_exp(/*register struct parse *p*/); 53 static void p_str(/*register struct parse *p*/); 54 static void p_bre(/*register struct parse *p, register int end1, register int end2*/); 55 static int p_simp_re(/*register struct parse *p, int starordinary*/); 56 static int p_count(/*register struct parse *p*/); 57 static void p_bracket(/*register struct parse *p*/); 58 static void p_b_term(/*register struct parse *p, register cset *cs*/); 59 static void p_b_cclass(/*register struct parse *p, register cset *cs*/); 60 static void p_b_eclass(/*register struct parse *p, register cset *cs*/); 61 static char p_b_symbol(/*register struct parse *p*/); 62 static char p_b_coll_elem(/*register struct parse *p, int endc*/); 63 static char othercase(/*int ch*/); 64 static void bothcases(/*register struct parse *p, int ch*/); 65 static void ordinary(/*register struct parse *p, register int ch*/); 66 static void nonnewline(/*register struct parse *p*/); 67 static void repeat(/*register struct parse *p, sopno start, int from, int to*/); 68 static int seterr(/*register struct parse *p, int e*/); 69 static cset *allocset(/*register struct parse *p*/); 70 static void freeset(/*register struct parse *p, register cset *cs*/); 71 static int freezeset(/*register struct parse *p, register cset *cs*/); 72 static int firstch(/*register struct parse *p, register cset *cs*/); 73 static int nch(/*register struct parse *p, register cset *cs*/); 74 static void mcadd(/*register struct parse *p, register cset *cs, register char *cp*/); 75 static void mcsub(/*register cset *cs, register char *cp*/); 76 static int mcin(/*register cset *cs, register char *cp*/); 77 static char *mcfind(/*register cset *cs, register char *cp*/); 78 static void mcinvert(/*register cset *cs*/); 79 static void mccase(/*register cset *cs*/); 80 static int isinsets(/*register struct re_guts *g, int c*/); 81 static int samesets(/*register struct re_guts *g, int c1, int c2*/); 82 static void categorize(/*struct parse *p, register struct re_guts *g*/); 83 static sopno dupl(/*register struct parse *p, sopno start, sopno finish*/); 84 static void doemit(/*register struct parse *p, sop op, size_t opnd*/); 85 static void doinsert(/*register struct parse *p, sop op, size_t opnd, sopno pos*/); 86 static void dofwd(/*register struct parse *p, sopno pos, sop value*/); 87 static void enlarge(/*register struct parse *p, sopno size*/); 88 static void stripsnug(/*register struct parse *p, register struct re_guts *g*/); 89 static void findmust(/*register struct parse *p, register struct re_guts *g*/); 90 static sopno pluscount(/*register struct parse *p, register struct re_guts *g*/); 91 92 static char nuls[10]; /* place to point scanner in event of error */ 93 94 /* 95 * macros for use with parse structure 96 * BEWARE: these know that the parse structure is named `p' !!! 97 */ 98 #define PEEK() (*p->next) 99 #define PEEK2() (*(p->next+1)) 100 #define MORE() (p->next < p->end) 101 #define MORE2() (p->next+1 < p->end) 102 #define SEE(c) (MORE() && PEEK() == (c)) 103 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 104 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 105 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 106 #define NEXT() (p->next++) 107 #define NEXT2() (p->next += 2) 108 #define NEXTn(n) (p->next += (n)) 109 #define GETNEXT() (*p->next++) 110 #define SETERROR(e) seterr(p, (e)) 111 #define REQUIRE(co, e) ((co) || SETERROR(e)) 112 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 113 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 114 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 115 #define EMIT(sop, sopnd) doemit(p, sop, (size_t)(sopnd)) 116 #define INSERT(sop, pos) doinsert(p, sop, HERE()-(pos)+1, pos) 117 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 118 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 119 #define HERE() (p->slen) 120 #define THERE() (p->slen - 1) 121 #define DROP(n) (p->slen -= (n)) 122 123 #ifndef NDEBUG 124 static int never = 0; /* for use in asserts; shuts lint up */ 125 #endif 126 127 /* 128 - regcomp - interface for parser and compilation 129 = extern int regcomp(regex_t *preg, const char *pattern, int cflags); 130 = #define REG_BASIC 0000 131 = #define REG_EXTENDED 0001 132 = #define REG_ICASE 0002 133 = #define REG_NOSUB 0004 134 = #define REG_NEWLINE 0010 135 = #define REG_NOSPEC 0020 136 = #define REG_PEND 0040 137 = #define REG_DUMP 0200 138 */ 139 int /* 0 success, otherwise REG_something */ 140 regcomp(preg, pattern, cflags) 141 regex_t *preg; 142 const char *pattern; 143 int cflags; 144 { 145 struct parse pa; 146 register struct re_guts *g; 147 register struct parse *p = &pa; 148 register int i; 149 register size_t len; 150 151 if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 152 return(REG_INVARG); 153 154 if (cflags®_PEND) { 155 if (preg->re_endp < pattern) 156 return(REG_INVARG); 157 len = preg->re_endp - pattern; 158 } else 159 len = strlen((char *)pattern); 160 161 /* do the mallocs early so failure handling is easy */ 162 g = (struct re_guts *)malloc(sizeof(struct re_guts) + 163 (NC-1)*sizeof(cat_t)); 164 if (g == NULL) 165 return(REG_ESPACE); 166 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 167 p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 168 p->slen = 0; 169 if (p->strip == NULL) { 170 free((char *)g); 171 return(REG_ESPACE); 172 } 173 174 /* set things up */ 175 p->g = g; 176 p->next = (char *)pattern; /* convenience; we do not modify it */ 177 p->end = p->next + len; 178 p->error = 0; 179 p->ncsalloc = 0; 180 for (i = 0; i < NPAREN; i++) { 181 p->pbegin[i] = 0; 182 p->pend[i] = 0; 183 } 184 g->csetsize = NC; 185 g->sets = NULL; 186 g->setbits = NULL; 187 g->ncsets = 0; 188 g->cflags = cflags; 189 g->iflags = 0; 190 g->nbol = 0; 191 g->neol = 0; 192 g->must = NULL; 193 g->mlen = 0; 194 g->nsub = 0; 195 g->ncategories = 1; /* category 0 is "everything else" */ 196 g->categories = &g->catspace[-(CHAR_MIN)]; 197 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 198 g->backrefs = 0; 199 200 /* do it */ 201 EMIT(OEND, 0); 202 g->firststate = THERE(); 203 if (cflags®_EXTENDED) 204 p_ere(p, OUT); 205 else if (cflags®_NOSPEC) 206 p_str(p); 207 else 208 p_bre(p, OUT, OUT); 209 EMIT(OEND, 0); 210 g->laststate = THERE(); 211 212 /* tidy up loose ends and fill things in */ 213 categorize(p, g); 214 stripsnug(p, g); 215 findmust(p, g); 216 g->nplus = pluscount(p, g); 217 g->magic = MAGIC2; 218 preg->re_nsub = g->nsub; 219 preg->re_g = g; 220 preg->re_magic = MAGIC1; 221 #ifndef REDEBUG 222 /* not debugging, so can't rely on the assert() in regexec() */ 223 if (g->iflags&BAD) 224 SETERROR(REG_ASSERT); 225 #endif 226 227 /* win or lose, we're done */ 228 if (p->error != 0) /* lose */ 229 regfree(preg); 230 return(p->error); 231 } 232 233 /* 234 - p_ere - ERE parser top level, concatenation and alternation 235 == static void p_ere(register struct parse *p, int stop); 236 */ 237 static void 238 p_ere(p, stop) 239 register struct parse *p; 240 int stop; /* character this ERE should end at */ 241 { 242 register char c; 243 register sopno prevback; 244 register sopno prevfwd; 245 register sopno conc; 246 register int first = 1; /* is this the first alternative? */ 247 248 for (;;) { 249 /* do a bunch of concatenated expressions */ 250 conc = HERE(); 251 while (MORE() && (c = PEEK()) != '|' && c != stop) 252 p_ere_exp(p); 253 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 254 255 if (!EAT('|')) 256 break; /* NOTE BREAK OUT */ 257 258 if (first) { 259 INSERT(OCH_, conc); /* offset is wrong */ 260 prevfwd = conc; 261 prevback = conc; 262 first = 0; 263 } 264 ASTERN(OOR1, prevback); 265 prevback = THERE(); 266 AHEAD(prevfwd); /* fix previous offset */ 267 prevfwd = HERE(); 268 EMIT(OOR2, 0); /* offset is very wrong */ 269 } 270 271 if (!first) { /* tail-end fixups */ 272 AHEAD(prevfwd); 273 ASTERN(O_CH, prevback); 274 } 275 276 assert(!MORE() || SEE(stop)); 277 } 278 279 /* 280 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 281 == static void p_ere_exp(register struct parse *p); 282 */ 283 static void 284 p_ere_exp(p) 285 register struct parse *p; 286 { 287 register char c; 288 register sopno pos; 289 register int count; 290 register int count2; 291 register sopno subno; 292 int wascaret = 0; 293 294 assert(MORE()); /* caller should have ensured this */ 295 c = GETNEXT(); 296 297 pos = HERE(); 298 switch (c) { 299 case '(': 300 REQUIRE(MORE(), REG_EPAREN); 301 p->g->nsub++; 302 subno = p->g->nsub; 303 if (subno < NPAREN) 304 p->pbegin[subno] = HERE(); 305 EMIT(OLPAREN, subno); 306 if (!SEE(')')) 307 p_ere(p, ')'); 308 if (subno < NPAREN) { 309 p->pend[subno] = HERE(); 310 assert(p->pend[subno] != 0); 311 } 312 EMIT(ORPAREN, subno); 313 MUSTEAT(')', REG_EPAREN); 314 break; 315 #ifndef POSIX_MISTAKE 316 case ')': /* happens only if no current unmatched ( */ 317 /* 318 * You may ask, why the ifndef? Because I didn't notice 319 * this until slightly too late for 1003.2, and none of the 320 * other 1003.2 regular-expression reviewers noticed it at 321 * all. So an unmatched ) is legal POSIX, at least until 322 * we can get it fixed. 323 */ 324 SETERROR(REG_EPAREN); 325 break; 326 #endif 327 case '^': 328 EMIT(OBOL, 0); 329 p->g->iflags |= USEBOL; 330 p->g->nbol++; 331 wascaret = 1; 332 break; 333 case '$': 334 EMIT(OEOL, 0); 335 p->g->iflags |= USEEOL; 336 p->g->neol++; 337 break; 338 case '|': 339 SETERROR(REG_EMPTY); 340 break; 341 case '*': 342 case '+': 343 case '?': 344 SETERROR(REG_BADRPT); 345 break; 346 case '.': 347 if (p->g->cflags®_NEWLINE) 348 nonnewline(p); 349 else 350 EMIT(OANY, 0); 351 break; 352 case '[': 353 p_bracket(p); 354 break; 355 case '\\': 356 REQUIRE(MORE(), REG_EESCAPE); 357 c = GETNEXT(); 358 ordinary(p, c); 359 break; 360 case '{': /* okay as ordinary except if digit follows */ 361 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); 362 /* FALLTHROUGH */ 363 default: 364 ordinary(p, c); 365 break; 366 } 367 368 if (!MORE()) 369 return; 370 c = PEEK(); 371 /* we call { a repetition if followed by a digit */ 372 if (!( c == '*' || c == '+' || c == '?' || 373 (c == '{' && MORE2() && isdigit(PEEK2())) )) 374 return; /* no repetition, we're done */ 375 NEXT(); 376 377 REQUIRE(!wascaret, REG_BADRPT); 378 switch (c) { 379 case '*': /* implemented as +? */ 380 INSERT(OPLUS_, pos); 381 ASTERN(O_PLUS, pos); 382 INSERT(OQUEST_, pos); 383 ASTERN(O_QUEST, pos); 384 break; 385 case '+': 386 INSERT(OPLUS_, pos); 387 ASTERN(O_PLUS, pos); 388 break; 389 case '?': 390 INSERT(OQUEST_, pos); 391 ASTERN(O_QUEST, pos); 392 break; 393 case '{': 394 count = p_count(p); 395 if (EAT(',')) { 396 if (isdigit(PEEK())) { 397 count2 = p_count(p); 398 REQUIRE(count <= count2, REG_BADBR); 399 } else /* single number with comma */ 400 count2 = INFINITY; 401 } else /* just a single number */ 402 count2 = count; 403 repeat(p, pos, count, count2); 404 if (!EAT('}')) { /* error heuristics */ 405 while (MORE() && PEEK() != '}') 406 NEXT(); 407 REQUIRE(MORE(), REG_EBRACE); 408 SETERROR(REG_BADBR); 409 } 410 break; 411 } 412 413 if (!MORE()) 414 return; 415 c = PEEK(); 416 if (!( c == '*' || c == '+' || c == '?' || 417 (c == '{' && MORE2() && isdigit(PEEK2())) ) ) 418 return; 419 SETERROR(REG_BADRPT); 420 } 421 422 /* 423 - p_str - string (no metacharacters) "parser" 424 == static void p_str(register struct parse *p); 425 */ 426 static void 427 p_str(p) 428 register struct parse *p; 429 { 430 REQUIRE(MORE(), REG_EMPTY); 431 while (MORE()) 432 ordinary(p, GETNEXT()); 433 } 434 435 /* 436 - p_bre - BRE parser top level, anchoring and concatenation 437 == static void p_bre(register struct parse *p, register int end1, \ 438 == register int end2); 439 * Giving end1 as OUT essentially eliminates the end1/end2 check. 440 * 441 * This implementation is a bit of a kludge, in that a trailing $ is first 442 * taken as an ordinary character and then revised to be an anchor. The 443 * only undesirable side effect is that '$' gets included as a character 444 * category in such cases. This is fairly harmless; not worth fixing. 445 * The amount of lookahead needed to avoid this kludge is excessive. 446 */ 447 static void 448 p_bre(p, end1, end2) 449 register struct parse *p; 450 register int end1; /* first terminating character */ 451 register int end2; /* second terminating character */ 452 { 453 register sopno start = HERE(); 454 register int first = 1; /* first subexpression? */ 455 register int wasdollar = 0; 456 457 if (EAT('^')) { 458 EMIT(OBOL, 0); 459 p->g->iflags |= USEBOL; 460 p->g->nbol++; 461 } 462 while (MORE() && !SEETWO(end1, end2)) { 463 wasdollar = p_simp_re(p, first); 464 first = 0; 465 } 466 if (wasdollar) { /* oops, that was a trailing anchor */ 467 DROP(1); 468 EMIT(OEOL, 0); 469 p->g->iflags |= USEEOL; 470 p->g->neol++; 471 } 472 473 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 474 } 475 476 /* 477 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 478 == static int p_simp_re(register struct parse *p, int starordinary); 479 */ 480 static int /* was the simple RE an unbackslashed $? */ 481 p_simp_re(p, starordinary) 482 register struct parse *p; 483 int starordinary; /* is a leading * an ordinary character? */ 484 { 485 register int c; 486 register int count; 487 register int count2; 488 register sopno pos; 489 register int i; 490 register sopno subno; 491 # define BACKSL (1<<CHAR_BIT) 492 493 pos = HERE(); /* repetion op, if any, covers from here */ 494 495 assert(MORE()); /* caller should have ensured this */ 496 c = GETNEXT(); 497 if (c == '\\') { 498 REQUIRE(MORE(), REG_EESCAPE); 499 c = BACKSL | (unsigned char)GETNEXT(); 500 } 501 switch (c) { 502 case '.': 503 if (p->g->cflags®_NEWLINE) 504 nonnewline(p); 505 else 506 EMIT(OANY, 0); 507 break; 508 case '[': 509 p_bracket(p); 510 break; 511 case BACKSL|'{': 512 SETERROR(REG_BADRPT); 513 break; 514 case BACKSL|'(': 515 p->g->nsub++; 516 subno = p->g->nsub; 517 if (subno < NPAREN) 518 p->pbegin[subno] = HERE(); 519 EMIT(OLPAREN, subno); 520 /* the MORE here is an error heuristic */ 521 if (MORE() && !SEETWO('\\', ')')) 522 p_bre(p, '\\', ')'); 523 if (subno < NPAREN) { 524 p->pend[subno] = HERE(); 525 assert(p->pend[subno] != 0); 526 } 527 EMIT(ORPAREN, subno); 528 REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 529 break; 530 case BACKSL|')': /* should not get here -- must be user */ 531 case BACKSL|'}': 532 SETERROR(REG_EPAREN); 533 break; 534 case BACKSL|'1': 535 case BACKSL|'2': 536 case BACKSL|'3': 537 case BACKSL|'4': 538 case BACKSL|'5': 539 case BACKSL|'6': 540 case BACKSL|'7': 541 case BACKSL|'8': 542 case BACKSL|'9': 543 i = (c&~BACKSL) - '0'; 544 assert(i < NPAREN); 545 if (p->pend[i] != 0) { 546 assert(i <= p->g->nsub); 547 EMIT(OBACK_, i); 548 assert(p->pbegin[i] != 0); 549 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 550 assert(OP(p->strip[p->pend[i]]) == ORPAREN); 551 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 552 EMIT(O_BACK, i); 553 } else 554 SETERROR(REG_ESUBREG); 555 p->g->backrefs = 1; 556 break; 557 case '*': 558 REQUIRE(starordinary, REG_BADRPT); 559 /* FALLTHROUGH */ 560 default: 561 ordinary(p, c &~ BACKSL); 562 break; 563 } 564 565 if (EAT('*')) { /* implemented as +? */ 566 INSERT(OPLUS_, pos); 567 ASTERN(O_PLUS, pos); 568 INSERT(OQUEST_, pos); 569 ASTERN(O_QUEST, pos); 570 } else if (EATTWO('\\', '{')) { 571 count = p_count(p); 572 if (EAT(',')) { 573 if (MORE() && isdigit(PEEK())) { 574 count2 = p_count(p); 575 REQUIRE(count <= count2, REG_BADBR); 576 } else /* single number with comma */ 577 count2 = INFINITY; 578 } else /* just a single number */ 579 count2 = count; 580 repeat(p, pos, count, count2); 581 if (!EATTWO('\\', '}')) { /* error heuristics */ 582 while (MORE() && !SEETWO('\\', '}')) 583 NEXT(); 584 REQUIRE(MORE(), REG_EBRACE); 585 SETERROR(REG_BADBR); 586 } 587 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 588 return(1); 589 590 return(0); 591 } 592 593 /* 594 - p_count - parse a repetition count 595 == static int p_count(register struct parse *p); 596 */ 597 static int /* the value */ 598 p_count(p) 599 register struct parse *p; 600 { 601 register int count = 0; 602 register int ndigits = 0; 603 604 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { 605 count = count*10 + (GETNEXT() - '0'); 606 ndigits++; 607 } 608 609 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 610 return(count); 611 } 612 613 /* 614 - p_bracket - parse a bracketed character list 615 == static void p_bracket(register struct parse *p); 616 * 617 * Note a significant property of this code: if the allocset() did SETERROR, 618 * no set operations are done. 619 */ 620 static void 621 p_bracket(p) 622 register struct parse *p; 623 { 624 register char c; 625 register cset *cs = allocset(p); 626 register int invert = 0; 627 628 /* Dept of Truly Sickening Special-Case Kludges */ 629 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 630 EMIT(OBOW, 0); 631 NEXTn(6); 632 return; 633 } 634 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 635 EMIT(OEOW, 0); 636 NEXTn(6); 637 return; 638 } 639 640 if (EAT('^')) 641 invert++; /* make note to invert set at end */ 642 if (EAT(']')) 643 CHadd(cs, ']'); 644 else if (EAT('-')) 645 CHadd(cs, '-'); 646 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 647 p_b_term(p, cs); 648 if (EAT('-')) 649 CHadd(cs, '-'); 650 MUSTEAT(']', REG_EBRACK); 651 652 if (p->error != 0) /* don't mess things up further */ 653 return; 654 655 if (p->g->cflags®_ICASE) { 656 register int i; 657 register int ci; 658 659 for (i = p->g->csetsize - 1; i >= 0; i--) 660 if (CHIN(cs, i) && isalpha(i)) { 661 ci = othercase(i); 662 if (ci != i) 663 CHadd(cs, ci); 664 } 665 if (cs->multis != NULL) 666 mccase(p, cs); 667 } 668 if (invert) { 669 register int i; 670 671 for (i = p->g->csetsize - 1; i >= 0; i--) 672 if (CHIN(cs, i)) 673 CHsub(cs, i); 674 else 675 CHadd(cs, i); 676 if (p->g->cflags®_NEWLINE) 677 CHsub(cs, '\n'); 678 if (cs->multis != NULL) 679 mcinvert(p, cs); 680 } 681 682 assert(cs->multis == NULL); /* xxx */ 683 684 if (nch(p, cs) == 1) { /* optimize singleton sets */ 685 ordinary(p, firstch(p, cs)); 686 freeset(p, cs); 687 } else 688 EMIT(OANYOF, freezeset(p, cs)); 689 } 690 691 /* 692 - p_b_term - parse one term of a bracketed character list 693 == static void p_b_term(register struct parse *p, register cset *cs); 694 */ 695 static void 696 p_b_term(p, cs) 697 register struct parse *p; 698 register cset *cs; 699 { 700 register char c; 701 register char start, finish; 702 register int i; 703 704 /* classify what we've got */ 705 switch ((MORE()) ? PEEK() : '\0') { 706 case '[': 707 c = (MORE2()) ? PEEK2() : '\0'; 708 break; 709 case '-': 710 SETERROR(REG_ERANGE); 711 return; /* NOTE RETURN */ 712 break; 713 default: 714 c = '\0'; 715 break; 716 } 717 718 switch (c) { 719 case ':': /* character class */ 720 NEXT2(); 721 REQUIRE(MORE(), REG_EBRACK); 722 c = PEEK(); 723 REQUIRE(c != '-' && c != ']', REG_ECTYPE); 724 p_b_cclass(p, cs); 725 REQUIRE(MORE(), REG_EBRACK); 726 REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 727 break; 728 case '=': /* equivalence class */ 729 NEXT2(); 730 REQUIRE(MORE(), REG_EBRACK); 731 c = PEEK(); 732 REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 733 p_b_eclass(p, cs); 734 REQUIRE(MORE(), REG_EBRACK); 735 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 736 break; 737 default: /* symbol, ordinary character, or range */ 738 /* xxx revision needed for multichar stuff */ 739 start = p_b_symbol(p); 740 if (SEE('-') && MORE2() && PEEK2() != ']') { 741 /* range */ 742 NEXT(); 743 if (EAT('-')) 744 finish = '-'; 745 else 746 finish = p_b_symbol(p); 747 } else 748 finish = start; 749 /* xxx what about signed chars here... */ 750 REQUIRE(start <= finish, REG_ERANGE); 751 for (i = start; i <= finish; i++) 752 CHadd(cs, i); 753 break; 754 } 755 } 756 757 /* 758 - p_b_cclass - parse a character-class name and deal with it 759 == static void p_b_cclass(register struct parse *p, register cset *cs); 760 */ 761 static void 762 p_b_cclass(p, cs) 763 register struct parse *p; 764 register cset *cs; 765 { 766 register char *sp = p->next; 767 register struct cclass *cp; 768 register size_t len; 769 register char *u; 770 register char c; 771 772 while (MORE() && isalpha(PEEK())) 773 NEXT(); 774 len = p->next - sp; 775 for (cp = cclasses; cp->name != NULL; cp++) 776 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 777 break; 778 if (cp->name == NULL) { 779 /* oops, didn't find it */ 780 SETERROR(REG_ECTYPE); 781 return; 782 } 783 784 u = cp->chars; 785 while ((c = *u++) != '\0') 786 CHadd(cs, c); 787 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 788 MCadd(cs, u); 789 } 790 791 /* 792 - p_b_eclass - parse an equivalence-class name and deal with it 793 == static void p_b_eclass(register struct parse *p, register cset *cs); 794 * 795 * This implementation is incomplete. xxx 796 */ 797 static void 798 p_b_eclass(p, cs) 799 register struct parse *p; 800 register cset *cs; 801 { 802 register char c; 803 804 c = p_b_coll_elem(p, '='); 805 CHadd(cs, c); 806 } 807 808 /* 809 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 810 == static char p_b_symbol(register struct parse *p); 811 */ 812 static char /* value of symbol */ 813 p_b_symbol(p) 814 register struct parse *p; 815 { 816 register char value; 817 818 REQUIRE(MORE(), REG_EBRACK); 819 if (!EATTWO('[', '.')) 820 return(GETNEXT()); 821 822 /* collating symbol */ 823 value = p_b_coll_elem(p, '.'); 824 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 825 return(value); 826 } 827 828 /* 829 - p_b_coll_elem - parse a collating-element name and look it up 830 == static char p_b_coll_elem(register struct parse *p, int endc); 831 */ 832 static char /* value of collating element */ 833 p_b_coll_elem(p, endc) 834 register struct parse *p; 835 int endc; /* name ended by endc,']' */ 836 { 837 register char *sp = p->next; 838 register struct cname *cp; 839 register int len; 840 register char c; 841 842 while (MORE() && !SEETWO(endc, ']')) 843 NEXT(); 844 if (!MORE()) { 845 SETERROR(REG_EBRACK); 846 return(0); 847 } 848 len = p->next - sp; 849 for (cp = cnames; cp->name != NULL; cp++) 850 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 851 return(cp->code); /* known name */ 852 if (len == 1) 853 return(*sp); /* single character */ 854 SETERROR(REG_ECOLLATE); /* neither */ 855 return(0); 856 } 857 858 /* 859 - othercase - return the case counterpart of an alphabetic 860 == static char othercase(int ch); 861 */ 862 static char /* if no counterpart, return ch */ 863 othercase(ch) 864 int ch; 865 { 866 assert(isalpha(ch)); 867 if (isupper(ch)) 868 return(tolower(ch)); 869 else if (islower(ch)) 870 return(toupper(ch)); 871 else /* peculiar, but could happen */ 872 return(ch); 873 } 874 875 /* 876 - bothcases - emit a dualcase version of a two-case character 877 == static void bothcases(register struct parse *p, int ch); 878 * 879 * Boy, is this implementation ever a kludge... 880 */ 881 static void 882 bothcases(p, ch) 883 register struct parse *p; 884 int ch; 885 { 886 register char *oldnext = p->next; 887 register char *oldend = p->end; 888 char bracket[3]; 889 890 assert(othercase(ch) != ch); /* p_bracket() would recurse */ 891 p->next = bracket; 892 p->end = bracket+2; 893 bracket[0] = ch; 894 bracket[1] = ']'; 895 bracket[2] = '\0'; 896 p_bracket(p); 897 assert(p->next == bracket+2); 898 p->next = oldnext; 899 p->end = oldend; 900 } 901 902 /* 903 - ordinary - emit an ordinary character 904 == static void ordinary(register struct parse *p, register int ch); 905 */ 906 static void 907 ordinary(p, ch) 908 register struct parse *p; 909 register int ch; 910 { 911 register cat_t *cap = p->g->categories; 912 913 if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) 914 bothcases(p, ch); 915 else { 916 EMIT(OCHAR, (unsigned char)ch); 917 if (cap[ch] == 0) 918 cap[ch] = p->g->ncategories++; 919 } 920 } 921 922 /* 923 - nonnewline - emit REG_NEWLINE version of OANY 924 == static void nonnewline(register struct parse *p); 925 * 926 * Boy, is this implementation ever a kludge... 927 */ 928 static void 929 nonnewline(p) 930 register struct parse *p; 931 { 932 register char *oldnext = p->next; 933 register char *oldend = p->end; 934 char bracket[4]; 935 936 p->next = bracket; 937 p->end = bracket+3; 938 bracket[0] = '^'; 939 bracket[1] = '\n'; 940 bracket[2] = ']'; 941 bracket[3] = '\0'; 942 p_bracket(p); 943 assert(p->next == bracket+3); 944 p->next = oldnext; 945 p->end = oldend; 946 } 947 948 /* 949 - repeat - generate code for a bounded repetition, recursively if needed 950 == static void repeat(register struct parse *p, sopno start, int from, int to); 951 */ 952 static void 953 repeat(p, start, from, to) 954 register struct parse *p; 955 sopno start; /* operand from here to end of strip */ 956 int from; /* repeated from this number */ 957 int to; /* to this number of times (maybe INFINITY) */ 958 { 959 register sopno finish = HERE(); 960 # define N 2 961 # define INF 3 962 # define REP(f, t) ((f)*8 + (t)) 963 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 964 register sopno copy; 965 966 if (p->error != 0) /* head off possible runaway recursion */ 967 return; 968 969 assert(from <= to); 970 971 switch (REP(MAP(from), MAP(to))) { 972 case REP(0, 0): /* must be user doing this */ 973 DROP(finish-start); /* drop the operand */ 974 break; 975 case REP(0, 1): /* as x{1,1}? */ 976 case REP(0, N): /* as x{1,n}? */ 977 case REP(0, INF): /* as x{1,}? */ 978 INSERT(OQUEST_, start); /* offset is wrong... */ 979 repeat(p, start+1, 1, to); 980 AHEAD(start); /* ... fix it */ 981 ASTERN(O_QUEST, start); 982 break; 983 case REP(1, 1): /* trivial case */ 984 /* done */ 985 break; 986 case REP(1, N): /* as x?x{1,n-1} */ 987 INSERT(OQUEST_, start); 988 ASTERN(O_QUEST, start); 989 copy = dupl(p, start+1, finish+1); 990 assert(copy == finish+2); 991 repeat(p, copy, 1, to-1); 992 break; 993 case REP(1, INF): /* as x+ */ 994 INSERT(OPLUS_, start); 995 ASTERN(O_PLUS, start); 996 break; 997 case REP(N, N): /* as xx{m-1,n-1} */ 998 copy = dupl(p, start, finish); 999 repeat(p, copy, from-1, to-1); 1000 break; 1001 case REP(N, INF): /* as xx{n-1,INF} */ 1002 copy = dupl(p, start, finish); 1003 repeat(p, copy, from-1, to); 1004 break; 1005 default: /* "can't happen" */ 1006 SETERROR(REG_ASSERT); /* just in case */ 1007 break; 1008 } 1009 } 1010 1011 /* 1012 - seterr - set an error condition 1013 == static int seterr(register struct parse *p, int e); 1014 */ 1015 static int /* useless but makes type checking happy */ 1016 seterr(p, e) 1017 register struct parse *p; 1018 int e; 1019 { 1020 if (p->error == 0) /* keep earliest error condition */ 1021 p->error = e; 1022 p->next = nuls; /* try to bring things to a halt */ 1023 p->end = nuls; 1024 return(0); /* make the return value well-defined */ 1025 } 1026 1027 /* 1028 - allocset - allocate a set of characters for [] 1029 == static cset *allocset(register struct parse *p); 1030 */ 1031 static cset * 1032 allocset(p) 1033 register struct parse *p; 1034 { 1035 register int no = p->g->ncsets++; 1036 register size_t nc; 1037 register size_t nbytes; 1038 register cset *cs; 1039 register size_t css = (size_t)p->g->csetsize; 1040 1041 if (no >= p->ncsalloc) { /* need another column of space */ 1042 p->ncsalloc += CHAR_BIT; 1043 nc = p->ncsalloc; 1044 assert(nc % CHAR_BIT == 0); 1045 nbytes = nc / CHAR_BIT * css; 1046 if (p->g->sets == NULL) 1047 p->g->sets = (cset *)malloc(nc * sizeof(cset)); 1048 else 1049 p->g->sets = (cset *)realloc((char *)p->g->sets, 1050 nc * sizeof(cset)); 1051 if (p->g->setbits == NULL) 1052 p->g->setbits = (uchar *)malloc(nbytes); 1053 else 1054 p->g->setbits = (uchar *)realloc((char *)p->g->setbits, 1055 nbytes); 1056 if (p->g->sets != NULL && p->g->setbits != NULL) 1057 (void) memset((char *)p->g->setbits + (nbytes - css), 1058 0, css); 1059 else { 1060 no = 0; 1061 SETERROR(REG_ESPACE); 1062 /* caller's responsibility not to do set ops */ 1063 } 1064 } 1065 1066 assert(p->g->sets != NULL); /* xxx */ 1067 cs = &p->g->sets[no]; 1068 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1069 cs->mask = 1 << ((no) % CHAR_BIT); 1070 cs->hash = 0; 1071 cs->smultis = 0; 1072 cs->multis = NULL; 1073 1074 return(cs); 1075 } 1076 1077 /* 1078 - freeset - free a now-unused set 1079 == static void freeset(register struct parse *p, register cset *cs); 1080 */ 1081 static void 1082 freeset(p, cs) 1083 register struct parse *p; 1084 register cset *cs; 1085 { 1086 register int i; 1087 register cset *top = &p->g->sets[p->g->ncsets]; 1088 register size_t css = (size_t)p->g->csetsize; 1089 1090 for (i = 0; i < css; i++) 1091 CHsub(cs, i); 1092 if (cs == top-1) /* recover only the easy case */ 1093 p->g->ncsets--; 1094 } 1095 1096 /* 1097 - freezeset - final processing on a set of characters 1098 == static int freezeset(register struct parse *p, register cset *cs); 1099 * 1100 * The main task here is merging identical sets. This is usually a waste 1101 * of time (although the hash code minimizes the overhead), but can win 1102 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1103 * is done using addition rather than xor -- all ASCII [aA] sets xor to 1104 * the same value! 1105 */ 1106 static int /* set number */ 1107 freezeset(p, cs) 1108 register struct parse *p; 1109 register cset *cs; 1110 { 1111 register uchar h = cs->hash; 1112 register int i; 1113 register cset *top = &p->g->sets[p->g->ncsets]; 1114 register cset *cs2; 1115 register size_t css = (size_t)p->g->csetsize; 1116 1117 /* look for an earlier one which is the same */ 1118 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1119 if (cs2->hash == h && cs2 != cs) { 1120 /* maybe */ 1121 for (i = 0; i < css; i++) 1122 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1123 break; /* no */ 1124 if (i == css) 1125 break; /* yes */ 1126 } 1127 1128 if (cs2 < top) { /* found one */ 1129 freeset(p, cs); 1130 cs = cs2; 1131 } 1132 1133 return((int)(cs - p->g->sets)); 1134 } 1135 1136 /* 1137 - firstch - return first character in a set (which must have at least one) 1138 == static int firstch(register struct parse *p, register cset *cs); 1139 */ 1140 static int /* character; there is no "none" value */ 1141 firstch(p, cs) 1142 register struct parse *p; 1143 register cset *cs; 1144 { 1145 register int i; 1146 register size_t css = (size_t)p->g->csetsize; 1147 1148 for (i = 0; i < css; i++) 1149 if (CHIN(cs, i)) 1150 return((char)i); 1151 assert(never); 1152 return(0); /* arbitrary */ 1153 } 1154 1155 /* 1156 - nch - number of characters in a set 1157 == static int nch(register struct parse *p, register cset *cs); 1158 */ 1159 static int 1160 nch(p, cs) 1161 register struct parse *p; 1162 register cset *cs; 1163 { 1164 register int i; 1165 register size_t css = (size_t)p->g->csetsize; 1166 register int n = 0; 1167 1168 for (i = 0; i < css; i++) 1169 if (CHIN(cs, i)) 1170 n++; 1171 return(n); 1172 } 1173 1174 /* 1175 - mcadd - add a collating element to a cset 1176 == static void mcadd(register struct parse *p, register cset *cs, \ 1177 == register char *cp); 1178 */ 1179 static void 1180 mcadd(p, cs, cp) 1181 register struct parse *p; 1182 register cset *cs; 1183 register char *cp; 1184 { 1185 register size_t oldend = cs->smultis; 1186 1187 cs->smultis += strlen(cp) + 1; 1188 if (cs->multis == NULL) 1189 cs->multis = malloc(cs->smultis); 1190 else 1191 cs->multis = realloc(cs->multis, cs->smultis); 1192 if (cs->multis == NULL) { 1193 SETERROR(REG_ESPACE); 1194 return; 1195 } 1196 1197 (void) strcpy(cs->multis + oldend - 1, cp); 1198 cs->multis[cs->smultis - 1] = '\0'; 1199 } 1200 1201 /* 1202 - mcsub - subtract a collating element from a cset 1203 == static void mcsub(register cset *cs, register char *cp); 1204 */ 1205 static void 1206 mcsub(cs, cp) 1207 register cset *cs; 1208 register char *cp; 1209 { 1210 register char *fp = mcfind(cs, cp); 1211 register size_t len = strlen(fp); 1212 1213 assert(fp != NULL); 1214 (void) memmove(fp, fp + len + 1, 1215 cs->smultis - (fp + len + 1 - cs->multis)); 1216 cs->smultis -= len; 1217 1218 if (cs->smultis == 0) { 1219 free(cs->multis); 1220 cs->multis = NULL; 1221 return; 1222 } 1223 1224 cs->multis = realloc(cs->multis, cs->smultis); 1225 assert(cs->multis != NULL); 1226 } 1227 1228 /* 1229 - mcin - is a collating element in a cset? 1230 == static int mcin(register cset *cs, register char *cp); 1231 */ 1232 static int 1233 mcin(cs, cp) 1234 register cset *cs; 1235 register char *cp; 1236 { 1237 return(mcfind(cs, cp) != NULL); 1238 } 1239 1240 /* 1241 - mcfind - find a collating element in a cset 1242 == static char *mcfind(register cset *cs, register char *cp); 1243 */ 1244 static char * 1245 mcfind(cs, cp) 1246 register cset *cs; 1247 register char *cp; 1248 { 1249 register char *p; 1250 1251 if (cs->multis == NULL) 1252 return(NULL); 1253 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 1254 if (strcmp(cp, p) == 0) 1255 return(p); 1256 return(NULL); 1257 } 1258 1259 /* 1260 - mcinvert - invert the list of collating elements in a cset 1261 == static void mcinvert(register cset *cs); 1262 * 1263 * This would have to know the set of possibilities. Implementation 1264 * is deferred. 1265 */ 1266 static void 1267 mcinvert(cs) 1268 register cset *cs; 1269 { 1270 assert(cs->multis == NULL); /* xxx */ 1271 } 1272 1273 /* 1274 - mccase - add case counterparts of the list of collating elements in a cset 1275 == static void mccase(register cset *cs); 1276 * 1277 * This would have to know the set of possibilities. Implementation 1278 * is deferred. 1279 */ 1280 static void 1281 mccase(cs) 1282 register cset *cs; 1283 { 1284 assert(cs->multis == NULL); /* xxx */ 1285 } 1286 1287 /* 1288 - isinsets - is this character in any sets? 1289 == static int isinsets(register struct re_guts *g, int c); 1290 */ 1291 static int /* predicate */ 1292 isinsets(g, c) 1293 register struct re_guts *g; 1294 int c; 1295 { 1296 register uchar *col; 1297 register int i; 1298 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1299 register unsigned uc = (unsigned char)c; 1300 1301 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1302 if (col[uc] != 0) 1303 return(1); 1304 return(0); 1305 } 1306 1307 /* 1308 - samesets - are these two characters in exactly the same sets? 1309 == static int samesets(register struct re_guts *g, int c1, int c2); 1310 */ 1311 static int /* predicate */ 1312 samesets(g, c1, c2) 1313 register struct re_guts *g; 1314 int c1; 1315 int c2; 1316 { 1317 register uchar *col; 1318 register int i; 1319 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1320 register unsigned uc1 = (unsigned char)c1; 1321 register unsigned uc2 = (unsigned char)c2; 1322 1323 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1324 if (col[uc1] != col[uc2]) 1325 return(0); 1326 return(1); 1327 } 1328 1329 /* 1330 - categorize - sort out character categories 1331 == static void categorize(struct parse *p, register struct re_guts *g); 1332 */ 1333 static void 1334 categorize(p, g) 1335 struct parse *p; 1336 register struct re_guts *g; 1337 { 1338 register cat_t *cats = g->categories; 1339 register int c; 1340 register int c2; 1341 register cat_t cat; 1342 1343 /* avoid making error situations worse */ 1344 if (p->error != 0) 1345 return; 1346 1347 for (c = CHAR_MIN; c <= CHAR_MAX; c++) 1348 if (cats[c] == 0 && isinsets(g, c)) { 1349 cat = g->ncategories++; 1350 cats[c] = cat; 1351 for (c2 = c+1; c2 <= CHAR_MAX; c2++) 1352 if (cats[c2] == 0 && samesets(g, c, c2)) 1353 cats[c2] = cat; 1354 } 1355 } 1356 1357 /* 1358 - dupl - emit a duplicate of a bunch of sops 1359 == static sopno dupl(register struct parse *p, sopno start, sopno finish); 1360 */ 1361 static sopno /* start of duplicate */ 1362 dupl(p, start, finish) 1363 register struct parse *p; 1364 sopno start; /* from here */ 1365 sopno finish; /* to this less one */ 1366 { 1367 register sopno ret = HERE(); 1368 register sopno len = finish - start; 1369 1370 assert(finish >= start); 1371 if (len == 0) 1372 return(ret); 1373 enlarge(p, p->ssize + len); /* this many unexpected additions */ 1374 assert(p->ssize >= p->slen + len); 1375 (void) memcpy((char *)(p->strip + p->slen), 1376 (char *)(p->strip + start), (size_t)len*sizeof(sop)); 1377 p->slen += len; 1378 return(ret); 1379 } 1380 1381 /* 1382 - doemit - emit a strip operator 1383 == static void doemit(register struct parse *p, sop op, size_t opnd); 1384 * 1385 * It might seem better to implement this as a macro with a function as 1386 * hard-case backup, but it's just too big and messy unless there are 1387 * some changes to the data structures. Maybe later. 1388 */ 1389 static void 1390 doemit(p, op, opnd) 1391 register struct parse *p; 1392 sop op; 1393 size_t opnd; 1394 { 1395 /* avoid making error situations worse */ 1396 if (p->error != 0) 1397 return; 1398 1399 /* deal with oversize operands ("can't happen", more or less) */ 1400 assert(opnd < 1<<OPSHIFT); 1401 1402 /* deal with undersized strip */ 1403 if (p->slen >= p->ssize) 1404 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 1405 assert(p->slen < p->ssize); 1406 1407 /* finally, it's all reduced to the easy case */ 1408 p->strip[p->slen++] = SOP(op, opnd); 1409 } 1410 1411 /* 1412 - doinsert - insert a sop into the strip 1413 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 1414 */ 1415 static void 1416 doinsert(p, op, opnd, pos) 1417 register struct parse *p; 1418 sop op; 1419 size_t opnd; 1420 sopno pos; 1421 { 1422 register sopno sn; 1423 register sop s; 1424 register int i; 1425 1426 /* avoid making error situations worse */ 1427 if (p->error != 0) 1428 return; 1429 1430 sn = HERE(); 1431 EMIT(op, opnd); /* do checks, ensure space */ 1432 assert(HERE() == sn+1); 1433 s = p->strip[sn]; 1434 1435 /* adjust paren pointers */ 1436 assert(pos > 0); 1437 for (i = 1; i < NPAREN; i++) { 1438 if (p->pbegin[i] >= pos) { 1439 p->pbegin[i]++; 1440 } 1441 if (p->pend[i] >= pos) { 1442 p->pend[i]++; 1443 } 1444 } 1445 1446 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1447 (HERE()-pos-1)*sizeof(sop)); 1448 p->strip[pos] = s; 1449 } 1450 1451 /* 1452 - dofwd - complete a forward reference 1453 == static void dofwd(register struct parse *p, sopno pos, sop value); 1454 */ 1455 static void 1456 dofwd(p, pos, value) 1457 register struct parse *p; 1458 register sopno pos; 1459 sop value; 1460 { 1461 /* avoid making error situations worse */ 1462 if (p->error != 0) 1463 return; 1464 1465 assert(value < 1<<OPSHIFT); 1466 p->strip[pos] = OP(p->strip[pos]) | value; 1467 } 1468 1469 /* 1470 - enlarge - enlarge the strip 1471 == static void enlarge(register struct parse *p, sopno size); 1472 */ 1473 static void 1474 enlarge(p, size) 1475 register struct parse *p; 1476 register sopno size; 1477 { 1478 register sop *sp; 1479 1480 if (p->ssize >= size) 1481 return; 1482 1483 sp = (sop *)realloc(p->strip, size*sizeof(sop)); 1484 if (sp == NULL) { 1485 SETERROR(REG_ESPACE); 1486 return; 1487 } 1488 p->strip = sp; 1489 p->ssize = size; 1490 } 1491 1492 /* 1493 - stripsnug - compact the strip 1494 == static void stripsnug(register struct parse *p, register struct re_guts *g); 1495 */ 1496 static void 1497 stripsnug(p, g) 1498 register struct parse *p; 1499 register struct re_guts *g; 1500 { 1501 g->nstates = p->slen; 1502 g->strip = (sop *)realloc((sop *)p->strip, p->slen * sizeof(sop)); 1503 if (g->strip == NULL) { 1504 SETERROR(REG_ESPACE); 1505 g->strip = p->strip; 1506 } 1507 } 1508 1509 /* 1510 - findmust - fill in must and mlen with longest mandatory literal string 1511 == static void findmust(register struct parse *p, register struct re_guts *g); 1512 * 1513 * This algorithm could do fancy things like analyzing the operands of | 1514 * for common subsequences. Someday. This code is simple and finds most 1515 * of the interesting cases. 1516 * 1517 * Note that must and mlen got initialized during setup. 1518 */ 1519 static void 1520 findmust(p, g) 1521 struct parse *p; 1522 register struct re_guts *g; 1523 { 1524 register sop *scan; 1525 sop *start; 1526 register sop *newstart; 1527 register sopno newlen; 1528 register sop s; 1529 register char *cp; 1530 register sopno i; 1531 1532 /* avoid making error situations worse */ 1533 if (p->error != 0) 1534 return; 1535 1536 /* find the longest OCHAR sequence in strip */ 1537 newlen = 0; 1538 scan = g->strip + 1; 1539 do { 1540 s = *scan++; 1541 switch (OP(s)) { 1542 case OCHAR: /* sequence member */ 1543 if (newlen == 0) /* new sequence */ 1544 newstart = scan - 1; 1545 newlen++; 1546 break; 1547 case OPLUS_: /* things that don't break one */ 1548 case OLPAREN: 1549 case ORPAREN: 1550 break; 1551 case OQUEST_: /* things that must be skipped */ 1552 case OCH_: 1553 scan--; 1554 do { 1555 scan += OPND(s); 1556 s = *scan; 1557 /* assert() interferes w debug printouts */ 1558 if (OP(s) != O_QUEST && OP(s) != O_CH && 1559 OP(s) != OOR2) { 1560 g->iflags |= BAD; 1561 return; 1562 } 1563 } while (OP(s) != O_QUEST && OP(s) != O_CH); 1564 /* fallthrough */ 1565 default: /* things that break a sequence */ 1566 if (newlen > g->mlen) { /* ends one */ 1567 start = newstart; 1568 g->mlen = newlen; 1569 } 1570 newlen = 0; 1571 break; 1572 } 1573 } while (OP(s) != OEND); 1574 1575 if (g->mlen == 0) /* there isn't one */ 1576 return; 1577 1578 /* turn it into a character string */ 1579 g->must = malloc((size_t)g->mlen + 1); 1580 if (g->must == NULL) { /* argh; just forget it */ 1581 g->mlen = 0; 1582 return; 1583 } 1584 cp = g->must; 1585 scan = start; 1586 for (i = g->mlen; i > 0; i--) { 1587 while (OP(s = *scan++) != OCHAR) 1588 continue; 1589 *cp++ = (char)OPND(s); 1590 } 1591 *cp++ = '\0'; /* just on general principles */ 1592 } 1593 1594 /* 1595 - pluscount - count + nesting 1596 == static sopno pluscount(register struct parse *p, register struct re_guts *g); 1597 */ 1598 static sopno /* nesting depth */ 1599 pluscount(p, g) 1600 struct parse *p; 1601 register struct re_guts *g; 1602 { 1603 register sop *scan; 1604 register sop s; 1605 register sopno plusnest = 0; 1606 register sopno maxnest = 0; 1607 1608 if (p->error != 0) 1609 return(0); /* there may not be an OEND */ 1610 1611 scan = g->strip + 1; 1612 do { 1613 s = *scan++; 1614 switch (OP(s)) { 1615 case OPLUS_: 1616 plusnest++; 1617 break; 1618 case O_PLUS: 1619 if (plusnest > maxnest) 1620 maxnest = plusnest; 1621 plusnest--; 1622 break; 1623 } 1624 } while (OP(s) != OEND); 1625 if (plusnest != 0) 1626 g->iflags |= BAD; 1627 return(maxnest); 1628 } 1629