1 /* 2 * egrep -- print lines containing (or not containing) a regular expression 3 * 4 * status returns: 5 * 0 - ok, and some matches 6 * 1 - ok, but no matches 7 * 2 - some error 8 */ 9 %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10 %left OR 11 %left CHAR DOT CCL NCCL '(' 12 %left CAT 13 %left STAR PLUS QUEST 14 15 %{ 16 static char *sccsid = "@(#)old.egrep.y 4.6 (Berkeley) 10/07/87"; 17 #include <stdio.h> 18 #include <sys/types.h> 19 #include <sys/stat.h> 20 #include <ctype.h> 21 22 #define BLKSIZE 8192 23 #define MAXLIN 350 24 #define MAXPOS 4000 25 #define NCHARS 128 26 #define NSTATES 128 27 #define FINAL -1 28 char gotofn[NSTATES][NCHARS]; 29 char cmap[256]; 30 int state[NSTATES]; 31 char out[NSTATES]; 32 int line = 1; 33 int name[MAXLIN]; 34 int left[MAXLIN]; 35 int right[MAXLIN]; 36 int parent[MAXLIN]; 37 int foll[MAXLIN]; 38 int positions[MAXPOS]; 39 char chars[MAXLIN]; 40 int nxtpos; 41 int nxtchar = 0; 42 int tmpstat[MAXLIN]; 43 int initstat[MAXLIN]; 44 int xstate; 45 int count; 46 int icount; 47 char *input; 48 FILE *exprfile; 49 50 long lnum; 51 int bflag; 52 int cflag; 53 int fflag; 54 int iflag; 55 int lflag; 56 int nflag; 57 int hflag = 1; 58 int oflag; 59 int sflag; 60 int vflag; 61 int retcode = 0; 62 int nfile; 63 int blkno; 64 long tln; 65 int nsucc; 66 67 int f; 68 char *fname; 69 %} 70 71 %% 72 s: t 73 ={ unary(FINAL, $1); 74 line--; 75 } 76 ; 77 t: b r 78 ={ $$ = node(CAT, $1, $2); } 79 | OR b r OR 80 ={ $$ = node(CAT, $2, $3); } 81 | OR b r 82 ={ $$ = node(CAT, $2, $3); } 83 | b r OR 84 ={ $$ = node(CAT, $1, $2); } 85 ; 86 b: 87 ={ $$ = enter(DOT); 88 $$ = unary(STAR, $$); } 89 ; 90 r: CHAR 91 ={ $$ = enter($1); } 92 | DOT 93 ={ $$ = enter(DOT); } 94 | CCL 95 ={ $$ = cclenter(CCL); } 96 | NCCL 97 ={ $$ = cclenter(NCCL); } 98 ; 99 100 r: r OR r 101 ={ $$ = node(OR, $1, $3); } 102 | r r %prec CAT 103 ={ $$ = node(CAT, $1, $2); } 104 | r STAR 105 ={ $$ = unary(STAR, $1); } 106 | r PLUS 107 ={ $$ = unary(PLUS, $1); } 108 | r QUEST 109 ={ $$ = unary(QUEST, $1); } 110 | '(' r ')' 111 ={ $$ = $2; } 112 | error 113 ; 114 115 %% 116 yyerror(s) { 117 fprintf(stderr, "egrep: %s\n", s); 118 exit(2); 119 } 120 121 yylex() { 122 extern int yylval; 123 int cclcnt, x; 124 register char c, d; 125 switch(c = nextch()) { 126 case '$': 127 case '^': c = '\n'; 128 goto defchar; 129 case '|': return (OR); 130 case '*': return (STAR); 131 case '+': return (PLUS); 132 case '?': return (QUEST); 133 case '(': return (c); 134 case ')': return (c); 135 case '.': return (DOT); 136 case '\0': return (0); 137 case '\n': return (OR); 138 case '[': 139 x = CCL; 140 cclcnt = 0; 141 count = nxtchar++; 142 if ((c = nextch()) == '^') { 143 x = NCCL; 144 c = nextch(); 145 } 146 do { 147 if (c == '\0') synerror(); 148 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 149 if ((d = nextch()) != 0) { 150 c = chars[nxtchar-1]; 151 while (c < d) { 152 if (nxtchar >= MAXLIN) overflo(); 153 chars[nxtchar++] = ++c; 154 cclcnt++; 155 } 156 continue; 157 } 158 } 159 if (nxtchar >= MAXLIN) overflo(); 160 chars[nxtchar++] = c; 161 cclcnt++; 162 } while ((c = nextch()) != ']'); 163 chars[count] = cclcnt; 164 return (x); 165 case '\\': 166 if ((c = nextch()) == '\0') synerror(); 167 defchar: 168 default: yylval = c; return (CHAR); 169 } 170 } 171 nextch() { 172 register int c; 173 if (fflag) { 174 if ((c = getc(exprfile)) == EOF) { 175 fclose(exprfile); 176 return(0); 177 } 178 } 179 else c = *input++; 180 return(c); 181 } 182 183 synerror() { 184 fprintf(stderr, "egrep: syntax error\n"); 185 exit(2); 186 } 187 188 enter(x) int x; { 189 if(line >= MAXLIN) overflo(); 190 name[line] = x; 191 left[line] = 0; 192 right[line] = 0; 193 return(line++); 194 } 195 196 cclenter(x) int x; { 197 register linno; 198 linno = enter(x); 199 right[linno] = count; 200 return (linno); 201 } 202 203 node(x, l, r) { 204 if(line >= MAXLIN) overflo(); 205 name[line] = x; 206 left[line] = l; 207 right[line] = r; 208 parent[l] = line; 209 parent[r] = line; 210 return(line++); 211 } 212 213 unary(x, d) { 214 if(line >= MAXLIN) overflo(); 215 name[line] = x; 216 left[line] = d; 217 right[line] = 0; 218 parent[d] = line; 219 return(line++); 220 } 221 overflo() { 222 fprintf(stderr, "egrep: regular expression too long\n"); 223 exit(2); 224 } 225 226 cfoll(v) { 227 register i; 228 if (left[v] == 0) { 229 count = 0; 230 for (i=1; i<=line; i++) tmpstat[i] = 0; 231 follow(v); 232 add(foll, v); 233 } 234 else if (right[v] == 0) cfoll(left[v]); 235 else { 236 cfoll(left[v]); 237 cfoll(right[v]); 238 } 239 } 240 cgotofn() { 241 register c, i, k; 242 int n, s; 243 char symbol[NCHARS]; 244 int j, nc, pc, pos; 245 int curpos, num; 246 int number, newpos; 247 count = 0; 248 for (n=3; n<=line; n++) tmpstat[n] = 0; 249 if (cstate(line-1)==0) { 250 tmpstat[line] = 1; 251 count++; 252 out[0] = 1; 253 } 254 for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 255 count--; /*leave out position 1 */ 256 icount = count; 257 tmpstat[1] = 0; 258 add(state, 0); 259 n = 0; 260 for (s=0; s<=n; s++) { 261 if (out[s] == 1) continue; 262 for (i=0; i<NCHARS; i++) symbol[i] = 0; 263 num = positions[state[s]]; 264 count = icount; 265 for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 266 pos = state[s] + 1; 267 for (i=0; i<num; i++) { 268 curpos = positions[pos]; 269 if ((c = name[curpos]) >= 0) { 270 if (c < NCHARS) symbol[c] = 1; 271 else if (c == DOT) { 272 for (k=0; k<NCHARS; k++) 273 if (k!='\n') symbol[k] = 1; 274 } 275 else if (c == CCL) { 276 nc = chars[right[curpos]]; 277 pc = right[curpos] + 1; 278 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 279 } 280 else if (c == NCCL) { 281 nc = chars[right[curpos]]; 282 for (j = 0; j < NCHARS; j++) { 283 pc = right[curpos] + 1; 284 for (k = 0; k < nc; k++) 285 if (j==chars[pc++]) goto cont; 286 if (j!='\n') symbol[j] = 1; 287 cont:; 288 } 289 } 290 else printf("something's funny\n"); 291 } 292 pos++; 293 } 294 for (c=0; c<NCHARS; c++) { 295 if (symbol[c] == 1) { /* nextstate(s,c) */ 296 count = icount; 297 for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 298 pos = state[s] + 1; 299 for (i=0; i<num; i++) { 300 curpos = positions[pos]; 301 if ((k = name[curpos]) >= 0) 302 if ( 303 (k == c) 304 | (k == DOT) 305 | (k == CCL && member(c, right[curpos], 1)) 306 | (k == NCCL && member(c, right[curpos], 0)) 307 ) { 308 number = positions[foll[curpos]]; 309 newpos = foll[curpos] + 1; 310 for (k=0; k<number; k++) { 311 if (tmpstat[positions[newpos]] != 1) { 312 tmpstat[positions[newpos]] = 1; 313 count++; 314 } 315 newpos++; 316 } 317 } 318 pos++; 319 } /* end nextstate */ 320 if (notin(n)) { 321 if (n >= NSTATES) overflo(); 322 add(state, ++n); 323 if (tmpstat[line] == 1) out[n] = 1; 324 gotofn[s][c] = n; 325 } 326 else { 327 gotofn[s][c] = xstate; 328 } 329 } 330 } 331 } 332 } 333 334 cstate(v) { 335 register b; 336 if (left[v] == 0) { 337 if (tmpstat[v] != 1) { 338 tmpstat[v] = 1; 339 count++; 340 } 341 return(1); 342 } 343 else if (right[v] == 0) { 344 if (cstate(left[v]) == 0) return (0); 345 else if (name[v] == PLUS) return (1); 346 else return (0); 347 } 348 else if (name[v] == CAT) { 349 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 350 else return (1); 351 } 352 else { /* name[v] == OR */ 353 b = cstate(right[v]); 354 if (cstate(left[v]) == 0 || b == 0) return (0); 355 else return (1); 356 } 357 } 358 359 360 member(symb, set, torf) { 361 register i, num, pos; 362 num = chars[set]; 363 pos = set + 1; 364 for (i=0; i<num; i++) 365 if (symb == chars[pos++]) return (torf); 366 return (!torf); 367 } 368 369 notin(n) { 370 register i, j, pos; 371 for (i=0; i<=n; i++) { 372 if (positions[state[i]] == count) { 373 pos = state[i] + 1; 374 for (j=0; j < count; j++) 375 if (tmpstat[positions[pos++]] != 1) goto nxt; 376 xstate = i; 377 return (0); 378 } 379 nxt: ; 380 } 381 return (1); 382 } 383 384 add(array, n) int *array; { 385 register i; 386 if (nxtpos + count > MAXPOS) overflo(); 387 array[n] = nxtpos; 388 positions[nxtpos++] = count; 389 for (i=3; i <= line; i++) { 390 if (tmpstat[i] == 1) { 391 positions[nxtpos++] = i; 392 } 393 } 394 } 395 396 follow(v) int v; { 397 int p; 398 if (v == line) return; 399 p = parent[v]; 400 switch(name[p]) { 401 case STAR: 402 case PLUS: cstate(v); 403 follow(p); 404 return; 405 406 case OR: 407 case QUEST: follow(p); 408 return; 409 410 case CAT: if (v == left[p]) { 411 if (cstate(right[p]) == 0) { 412 follow(p); 413 return; 414 } 415 } 416 else follow(p); 417 return; 418 case FINAL: if (tmpstat[line] != 1) { 419 tmpstat[line] = 1; 420 count++; 421 } 422 return; 423 } 424 } 425 426 427 main(argc, argv) 428 char **argv; 429 { 430 register int i; 431 432 while (--argc > 0 && (++argv)[0][0]=='-') 433 switch (argv[0][1]) { 434 435 case 's': 436 sflag++; 437 continue; 438 439 case 'h': 440 hflag = 0; 441 continue; 442 443 case 'o': 444 oflag++; 445 continue; 446 447 case 'b': 448 bflag++; 449 continue; 450 451 case 'c': 452 cflag++; 453 continue; 454 455 case 'e': 456 argc--; 457 argv++; 458 goto out; 459 460 case 'f': 461 fflag++; 462 continue; 463 464 case 'i': 465 iflag++; 466 for ( i = 'A'; i <= 'Z'; i++ ) 467 cmap[i] = (char) tolower ( i ); 468 continue; 469 470 case 'l': 471 lflag++; 472 continue; 473 474 case 'n': 475 nflag++; 476 continue; 477 478 case 'v': 479 vflag++; 480 continue; 481 482 default: 483 fprintf(stderr, "egrep: unknown flag\n"); 484 continue; 485 } 486 out: 487 if (argc<=0) 488 exit(2); 489 490 for (i = 0; i < 256; ++i) 491 cmap[i] = (char)i; 492 493 if (fflag) { 494 fname = *argv; 495 exprfile = fopen(fname, "r"); 496 if (exprfile == (FILE *)NULL) { 497 fprintf(stderr, "egrep: can't open %s\n", fname); 498 exit(2); 499 } 500 } 501 else input = *argv; 502 if ( iflag ) { 503 register char *s; 504 for ( s = input; *s != '\0'; s++ ) 505 if ( isupper ( (int)(*s) ) ) 506 *s = (char) tolower ( (int)(*s) ); 507 } 508 argc--; 509 argv++; 510 511 yyparse(); 512 513 cfoll(line-1); 514 cgotofn(); 515 nfile = argc; 516 if (argc<=0) { 517 if (lflag) exit(1); 518 execute(0); 519 } 520 else while (--argc >= 0) { 521 execute(*argv); 522 argv++; 523 } 524 exit(retcode != 0 ? retcode : nsucc == 0); 525 } 526 527 execute(file) 528 char *file; 529 { 530 register char *p; 531 register cstat; 532 register ccount; 533 register char *cmapr = cmap; 534 static char *buf; 535 static int blksize; 536 struct stat stb; 537 char *nlp; 538 int istat; 539 if (file) { 540 if ((f = open(file, 0)) < 0) { 541 fprintf(stderr, "egrep: can't open %s\n", file); 542 retcode = 2; 543 return; 544 } 545 } 546 else f = 0; 547 if (buf == NULL) { 548 if (fstat(f, &stb) > 0 && stb.st_blksize > 0) 549 blksize = stb.st_blksize; 550 else 551 blksize = BLKSIZE; 552 buf = (char *)malloc(2*blksize); 553 if (buf == NULL) { 554 fprintf(stderr, "egrep: no memory for %s\n", file); 555 retcode = 2; 556 return; 557 } 558 } 559 ccount = 0; 560 lnum = 1; 561 tln = 0; 562 blkno = 0; 563 p = buf; 564 nlp = p; 565 if ((ccount = read(f,p,blksize))<=0) goto done; 566 istat = cstat = gotofn[0]['\n']; 567 if (out[cstat]) goto found; 568 for (;;) { 569 cstat = gotofn[cstat][(unsigned char)cmapr[*(unsigned char *)p]]; 570 if (out[cstat]) { 571 found: for(;;) { 572 if (*p++ == '\n') { 573 if (vflag == 0) { 574 succeed: nsucc = 1; 575 if (cflag) tln++; 576 else if (sflag) 577 ; /* ugh */ 578 else if (lflag) { 579 printf("%s\n", file); 580 close(f); 581 return; 582 } 583 else { 584 if (nfile > 1 && hflag || oflag) printf("%s:", file); 585 if (bflag) printf("%d:", blkno); 586 if (nflag) printf("%ld:", lnum); 587 if (p <= nlp) { 588 while (nlp < &buf[2*blksize]) putchar(*nlp++); 589 nlp = buf; 590 } 591 while (nlp < p) putchar(*nlp++); 592 } 593 } 594 lnum++; 595 nlp = p; 596 if ((out[(cstat=istat)]) == 0) goto brk2; 597 } 598 cfound: 599 if (--ccount <= 0) { 600 if (p <= &buf[blksize]) { 601 if ((ccount = read(f, p, blksize)) <= 0) goto done; 602 } 603 else if (p == &buf[2*blksize]) { 604 p = buf; 605 if ((ccount = read(f, p, blksize)) <= 0) goto done; 606 } 607 else { 608 if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done; 609 } 610 blkno += ccount / 512; 611 } 612 } 613 } 614 if (*p++ == '\n') { 615 if (vflag) goto succeed; 616 else { 617 lnum++; 618 nlp = p; 619 if (out[(cstat=istat)]) goto cfound; 620 } 621 } 622 brk2: 623 if (--ccount <= 0) { 624 if (p <= &buf[blksize]) { 625 if ((ccount = read(f, p, blksize)) <= 0) break; 626 } 627 else if (p == &buf[2*blksize]) { 628 p = buf; 629 if ((ccount = read(f, p, blksize)) <= 0) break; 630 } 631 else { 632 if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break; 633 } 634 blkno += ccount / 512; 635 } 636 } 637 done: close(f); 638 if (cflag) { 639 if (nfile > 1) 640 printf("%s:", file); 641 printf("%ld\n", tln); 642 } 643 } 644