1 /* 2 * egrep -- print lines containing (or not containing) a regular expression 3 * 4 * status returns: 5 * 0 - ok, and some matches 6 * 1 - ok, but no matches 7 * 2 - some error 8 */ 9 %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10 %left OR 11 %left CHAR DOT CCL NCCL '(' 12 %left CAT 13 %left STAR PLUS QUEST 14 15 %{ 16 /*- 17 * Copyright (c) 1991, 1993 18 * The Regents of the University of California. All rights reserved. 19 * 20 * %sccs.include.proprietary.c% 21 */ 22 23 #ifndef lint 24 static char copyright[] = 25 "@(#) Copyright (c) 1991, 1993\n\ 26 The Regents of the University of California. All rights reserved.\n"; 27 #endif /* not lint */ 28 29 #ifndef lint 30 static char sccsid[] = "@(#)old.egrep.y 8.1 (Berkeley) 06/06/93"; 31 #endif /* not lint */ 32 33 #include <stdio.h> 34 #include <sys/types.h> 35 #include <sys/stat.h> 36 #include <ctype.h> 37 38 #define BLKSIZE 8192 39 #define MAXLIN 350 40 #define MAXPOS 4000 41 #define NCHARS 128 42 #define NSTATES 128 43 #define FINAL -1 44 char gotofn[NSTATES][NCHARS]; 45 char cmap[256]; 46 int state[NSTATES]; 47 char out[NSTATES]; 48 int line = 1; 49 int name[MAXLIN]; 50 int left[MAXLIN]; 51 int right[MAXLIN]; 52 int parent[MAXLIN]; 53 int foll[MAXLIN]; 54 int positions[MAXPOS]; 55 char chars[MAXLIN]; 56 int nxtpos; 57 int nxtchar = 0; 58 int tmpstat[MAXLIN]; 59 int initstat[MAXLIN]; 60 int xstate; 61 int count; 62 int icount; 63 char *input; 64 FILE *exprfile; 65 66 long lnum; 67 int bflag; 68 int cflag; 69 int fflag; 70 int iflag; 71 int lflag; 72 int nflag; 73 int hflag = 1; 74 int oflag; 75 int sflag; 76 int vflag; 77 int retcode = 0; 78 int nfile; 79 int blkno; 80 long tln; 81 int nsucc; 82 83 int f; 84 char *fname; 85 %} 86 87 %% 88 s: t 89 ={ unary(FINAL, $1); 90 line--; 91 } 92 ; 93 t: b r 94 ={ $$ = node(CAT, $1, $2); } 95 | OR b r OR 96 ={ $$ = node(CAT, $2, $3); } 97 | OR b r 98 ={ $$ = node(CAT, $2, $3); } 99 | b r OR 100 ={ $$ = node(CAT, $1, $2); } 101 ; 102 b: 103 ={ $$ = enter(DOT); 104 $$ = unary(STAR, $$); } 105 ; 106 r: CHAR 107 ={ $$ = enter($1); } 108 | DOT 109 ={ $$ = enter(DOT); } 110 | CCL 111 ={ $$ = cclenter(CCL); } 112 | NCCL 113 ={ $$ = cclenter(NCCL); } 114 ; 115 116 r: r OR r 117 ={ $$ = node(OR, $1, $3); } 118 | r r %prec CAT 119 ={ $$ = node(CAT, $1, $2); } 120 | r STAR 121 ={ $$ = unary(STAR, $1); } 122 | r PLUS 123 ={ $$ = unary(PLUS, $1); } 124 | r QUEST 125 ={ $$ = unary(QUEST, $1); } 126 | '(' r ')' 127 ={ $$ = $2; } 128 | error 129 ; 130 131 %% 132 yyerror(s) { 133 fprintf(stderr, "egrep: %s\n", s); 134 exit(2); 135 } 136 137 yylex() { 138 extern int yylval; 139 int cclcnt, x; 140 register char c, d; 141 switch(c = nextch()) { 142 case '$': 143 case '^': c = '\n'; 144 goto defchar; 145 case '|': return (OR); 146 case '*': return (STAR); 147 case '+': return (PLUS); 148 case '?': return (QUEST); 149 case '(': return (c); 150 case ')': return (c); 151 case '.': return (DOT); 152 case '\0': return (0); 153 case '\n': return (OR); 154 case '[': 155 x = CCL; 156 cclcnt = 0; 157 count = nxtchar++; 158 if ((c = nextch()) == '^') { 159 x = NCCL; 160 c = nextch(); 161 } 162 do { 163 if (c == '\0') synerror(); 164 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 165 if ((d = nextch()) != 0) { 166 c = chars[nxtchar-1]; 167 while (c < d) { 168 if (nxtchar >= MAXLIN) overflo(); 169 chars[nxtchar++] = ++c; 170 cclcnt++; 171 } 172 continue; 173 } 174 } 175 if (nxtchar >= MAXLIN) overflo(); 176 chars[nxtchar++] = c; 177 cclcnt++; 178 } while ((c = nextch()) != ']'); 179 chars[count] = cclcnt; 180 return (x); 181 case '\\': 182 if ((c = nextch()) == '\0') synerror(); 183 defchar: 184 default: yylval = c; return (CHAR); 185 } 186 } 187 nextch() { 188 register int c; 189 if (fflag) { 190 if ((c = getc(exprfile)) == EOF) { 191 fclose(exprfile); 192 return(0); 193 } 194 } 195 else c = *input++; 196 return(c); 197 } 198 199 synerror() { 200 fprintf(stderr, "egrep: syntax error\n"); 201 exit(2); 202 } 203 204 enter(x) int x; { 205 if(line >= MAXLIN) overflo(); 206 name[line] = x; 207 left[line] = 0; 208 right[line] = 0; 209 return(line++); 210 } 211 212 cclenter(x) int x; { 213 register linno; 214 linno = enter(x); 215 right[linno] = count; 216 return (linno); 217 } 218 219 node(x, l, r) { 220 if(line >= MAXLIN) overflo(); 221 name[line] = x; 222 left[line] = l; 223 right[line] = r; 224 parent[l] = line; 225 parent[r] = line; 226 return(line++); 227 } 228 229 unary(x, d) { 230 if(line >= MAXLIN) overflo(); 231 name[line] = x; 232 left[line] = d; 233 right[line] = 0; 234 parent[d] = line; 235 return(line++); 236 } 237 overflo() { 238 fprintf(stderr, "egrep: regular expression too long\n"); 239 exit(2); 240 } 241 242 cfoll(v) { 243 register i; 244 if (left[v] == 0) { 245 count = 0; 246 for (i=1; i<=line; i++) tmpstat[i] = 0; 247 follow(v); 248 add(foll, v); 249 } 250 else if (right[v] == 0) cfoll(left[v]); 251 else { 252 cfoll(left[v]); 253 cfoll(right[v]); 254 } 255 } 256 cgotofn() { 257 register c, i, k; 258 int n, s; 259 char symbol[NCHARS]; 260 int j, nc, pc, pos; 261 int curpos, num; 262 int number, newpos; 263 count = 0; 264 for (n=3; n<=line; n++) tmpstat[n] = 0; 265 if (cstate(line-1)==0) { 266 tmpstat[line] = 1; 267 count++; 268 out[0] = 1; 269 } 270 for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 271 count--; /*leave out position 1 */ 272 icount = count; 273 tmpstat[1] = 0; 274 add(state, 0); 275 n = 0; 276 for (s=0; s<=n; s++) { 277 if (out[s] == 1) continue; 278 for (i=0; i<NCHARS; i++) symbol[i] = 0; 279 num = positions[state[s]]; 280 count = icount; 281 for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 282 pos = state[s] + 1; 283 for (i=0; i<num; i++) { 284 curpos = positions[pos]; 285 if ((c = name[curpos]) >= 0) { 286 if (c < NCHARS) symbol[c] = 1; 287 else if (c == DOT) { 288 for (k=0; k<NCHARS; k++) 289 if (k!='\n') symbol[k] = 1; 290 } 291 else if (c == CCL) { 292 nc = chars[right[curpos]]; 293 pc = right[curpos] + 1; 294 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 295 } 296 else if (c == NCCL) { 297 nc = chars[right[curpos]]; 298 for (j = 0; j < NCHARS; j++) { 299 pc = right[curpos] + 1; 300 for (k = 0; k < nc; k++) 301 if (j==chars[pc++]) goto cont; 302 if (j!='\n') symbol[j] = 1; 303 cont:; 304 } 305 } 306 else printf("something's funny\n"); 307 } 308 pos++; 309 } 310 for (c=0; c<NCHARS; c++) { 311 if (symbol[c] == 1) { /* nextstate(s,c) */ 312 count = icount; 313 for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 314 pos = state[s] + 1; 315 for (i=0; i<num; i++) { 316 curpos = positions[pos]; 317 if ((k = name[curpos]) >= 0) 318 if ( 319 (k == c) 320 | (k == DOT) 321 | (k == CCL && member(c, right[curpos], 1)) 322 | (k == NCCL && member(c, right[curpos], 0)) 323 ) { 324 number = positions[foll[curpos]]; 325 newpos = foll[curpos] + 1; 326 for (k=0; k<number; k++) { 327 if (tmpstat[positions[newpos]] != 1) { 328 tmpstat[positions[newpos]] = 1; 329 count++; 330 } 331 newpos++; 332 } 333 } 334 pos++; 335 } /* end nextstate */ 336 if (notin(n)) { 337 if (n >= NSTATES) overflo(); 338 add(state, ++n); 339 if (tmpstat[line] == 1) out[n] = 1; 340 gotofn[s][c] = n; 341 } 342 else { 343 gotofn[s][c] = xstate; 344 } 345 } 346 } 347 } 348 } 349 350 cstate(v) { 351 register b; 352 if (left[v] == 0) { 353 if (tmpstat[v] != 1) { 354 tmpstat[v] = 1; 355 count++; 356 } 357 return(1); 358 } 359 else if (right[v] == 0) { 360 if (cstate(left[v]) == 0) return (0); 361 else if (name[v] == PLUS) return (1); 362 else return (0); 363 } 364 else if (name[v] == CAT) { 365 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 366 else return (1); 367 } 368 else { /* name[v] == OR */ 369 b = cstate(right[v]); 370 if (cstate(left[v]) == 0 || b == 0) return (0); 371 else return (1); 372 } 373 } 374 375 376 member(symb, set, torf) { 377 register i, num, pos; 378 num = chars[set]; 379 pos = set + 1; 380 for (i=0; i<num; i++) 381 if (symb == chars[pos++]) return (torf); 382 return (!torf); 383 } 384 385 notin(n) { 386 register i, j, pos; 387 for (i=0; i<=n; i++) { 388 if (positions[state[i]] == count) { 389 pos = state[i] + 1; 390 for (j=0; j < count; j++) 391 if (tmpstat[positions[pos++]] != 1) goto nxt; 392 xstate = i; 393 return (0); 394 } 395 nxt: ; 396 } 397 return (1); 398 } 399 400 add(array, n) int *array; { 401 register i; 402 if (nxtpos + count > MAXPOS) overflo(); 403 array[n] = nxtpos; 404 positions[nxtpos++] = count; 405 for (i=3; i <= line; i++) { 406 if (tmpstat[i] == 1) { 407 positions[nxtpos++] = i; 408 } 409 } 410 } 411 412 follow(v) int v; { 413 int p; 414 if (v == line) return; 415 p = parent[v]; 416 switch(name[p]) { 417 case STAR: 418 case PLUS: cstate(v); 419 follow(p); 420 return; 421 422 case OR: 423 case QUEST: follow(p); 424 return; 425 426 case CAT: if (v == left[p]) { 427 if (cstate(right[p]) == 0) { 428 follow(p); 429 return; 430 } 431 } 432 else follow(p); 433 return; 434 case FINAL: if (tmpstat[line] != 1) { 435 tmpstat[line] = 1; 436 count++; 437 } 438 return; 439 } 440 } 441 442 443 main(argc, argv) 444 char **argv; 445 { 446 register int i; 447 448 while (--argc > 0 && (++argv)[0][0]=='-') 449 switch (argv[0][1]) { 450 451 case 's': 452 sflag++; 453 continue; 454 455 case 'h': 456 hflag = 0; 457 continue; 458 459 case 'o': 460 oflag++; 461 continue; 462 463 case 'b': 464 bflag++; 465 continue; 466 467 case 'c': 468 cflag++; 469 continue; 470 471 case 'e': 472 argc--; 473 argv++; 474 goto out; 475 476 case 'f': 477 fflag++; 478 continue; 479 480 case 'i': 481 iflag++; 482 for ( i = 'A'; i <= 'Z'; i++ ) 483 cmap[i] = (char) tolower ( i ); 484 continue; 485 486 case 'l': 487 lflag++; 488 continue; 489 490 case 'n': 491 nflag++; 492 continue; 493 494 case 'v': 495 vflag++; 496 continue; 497 498 default: 499 fprintf(stderr, "egrep: unknown flag\n"); 500 continue; 501 } 502 out: 503 if (argc<=0) 504 exit(2); 505 506 for (i = 0; i < 256; ++i) 507 cmap[i] = (char)i; 508 509 if (fflag) { 510 fname = *argv; 511 exprfile = fopen(fname, "r"); 512 if (exprfile == (FILE *)NULL) { 513 fprintf(stderr, "egrep: can't open %s\n", fname); 514 exit(2); 515 } 516 } 517 else input = *argv; 518 if ( iflag ) { 519 register char *s; 520 for ( s = input; *s != '\0'; s++ ) 521 if ( isupper ( (int)(*s) ) ) 522 *s = (char) tolower ( (int)(*s) ); 523 } 524 argc--; 525 argv++; 526 527 yyparse(); 528 529 cfoll(line-1); 530 cgotofn(); 531 nfile = argc; 532 if (argc<=0) { 533 if (lflag) exit(1); 534 execute(0); 535 } 536 else while (--argc >= 0) { 537 execute(*argv); 538 argv++; 539 } 540 exit(retcode != 0 ? retcode : nsucc == 0); 541 } 542 543 execute(file) 544 char *file; 545 { 546 register char *p; 547 register cstat; 548 register ccount; 549 register char *cmapr = cmap; 550 static char *buf; 551 static int blksize; 552 struct stat stb; 553 char *nlp; 554 int istat; 555 if (file) { 556 if ((f = open(file, 0)) < 0) { 557 fprintf(stderr, "egrep: can't open %s\n", file); 558 retcode = 2; 559 return; 560 } 561 } 562 else f = 0; 563 if (buf == NULL) { 564 if (fstat(f, &stb) > 0 && stb.st_blksize > 0) 565 blksize = stb.st_blksize; 566 else 567 blksize = BLKSIZE; 568 buf = (char *)malloc(2*blksize); 569 if (buf == NULL) { 570 fprintf(stderr, "egrep: no memory for %s\n", file); 571 retcode = 2; 572 return; 573 } 574 } 575 ccount = 0; 576 lnum = 1; 577 tln = 0; 578 blkno = 0; 579 p = buf; 580 nlp = p; 581 if ((ccount = read(f,p,blksize))<=0) goto done; 582 istat = cstat = gotofn[0]['\n']; 583 if (out[cstat]) goto found; 584 for (;;) { 585 cstat = gotofn[cstat][(unsigned char)cmapr[*(unsigned char *)p]]; 586 if (out[cstat]) { 587 found: for(;;) { 588 if (*p++ == '\n') { 589 if (vflag == 0) { 590 succeed: nsucc = 1; 591 if (cflag) tln++; 592 else if (sflag) 593 ; /* ugh */ 594 else if (lflag) { 595 printf("%s\n", file); 596 close(f); 597 return; 598 } 599 else { 600 if (nfile > 1 && hflag || oflag) printf("%s:", file); 601 if (bflag) printf("%d:", blkno); 602 if (nflag) printf("%ld:", lnum); 603 if (p <= nlp) { 604 while (nlp < &buf[2*blksize]) putchar(*nlp++); 605 nlp = buf; 606 } 607 while (nlp < p) putchar(*nlp++); 608 } 609 } 610 lnum++; 611 nlp = p; 612 if ((out[(cstat=istat)]) == 0) goto brk2; 613 } 614 cfound: 615 if (--ccount <= 0) { 616 if (p <= &buf[blksize]) { 617 if ((ccount = read(f, p, blksize)) <= 0) goto done; 618 } 619 else if (p == &buf[2*blksize]) { 620 p = buf; 621 if ((ccount = read(f, p, blksize)) <= 0) goto done; 622 } 623 else { 624 if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done; 625 } 626 blkno += ccount / 512; 627 } 628 } 629 } 630 if (*p++ == '\n') { 631 if (vflag) goto succeed; 632 else { 633 lnum++; 634 nlp = p; 635 if (out[(cstat=istat)]) goto cfound; 636 } 637 } 638 brk2: 639 if (--ccount <= 0) { 640 if (p <= &buf[blksize]) { 641 if ((ccount = read(f, p, blksize)) <= 0) break; 642 } 643 else if (p == &buf[2*blksize]) { 644 p = buf; 645 if ((ccount = read(f, p, blksize)) <= 0) break; 646 } 647 else { 648 if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break; 649 } 650 blkno += ccount / 512; 651 } 652 } 653 done: close(f); 654 if (cflag) { 655 if (nfile > 1) 656 printf("%s:", file); 657 printf("%ld\n", tln); 658 } 659 } 660