1 /* 2 * egrep -- print lines containing (or not containing) a regular expression 3 * 4 * status returns: 5 * 0 - ok, and some matches 6 * 1 - ok, but no matches 7 * 2 - some error 8 */ 9 %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10 %left OR 11 %left CHAR DOT CCL NCCL '(' 12 %left CAT 13 %left STAR PLUS QUEST 14 15 %{ 16 static char *sccsid = "@(#)old.egrep.y 4.3 (Berkeley) 03/13/83"; 17 #include <stdio.h> 18 19 #define MAXLIN 350 20 #define MAXPOS 4000 21 #define NCHARS 128 22 #define NSTATES 128 23 #define FINAL -1 24 char gotofn[NSTATES][NCHARS]; 25 int state[NSTATES]; 26 char out[NSTATES]; 27 int line = 1; 28 int name[MAXLIN]; 29 int left[MAXLIN]; 30 int right[MAXLIN]; 31 int parent[MAXLIN]; 32 int foll[MAXLIN]; 33 int positions[MAXPOS]; 34 char chars[MAXLIN]; 35 int nxtpos; 36 int nxtchar = 0; 37 int tmpstat[MAXLIN]; 38 int initstat[MAXLIN]; 39 int xstate; 40 int count; 41 int icount; 42 char *input; 43 FILE *exprfile; 44 45 long lnum; 46 int bflag; 47 int cflag; 48 int fflag; 49 int lflag; 50 int nflag; 51 int hflag = 1; 52 int sflag; 53 int vflag; 54 int retcode = 0; 55 int nfile; 56 int blkno; 57 long tln; 58 int nsucc; 59 60 int f; 61 char *fname; 62 %} 63 64 %% 65 s: t 66 ={ unary(FINAL, $1); 67 line--; 68 } 69 ; 70 t: b r 71 ={ $$ = node(CAT, $1, $2); } 72 | OR b r OR 73 ={ $$ = node(CAT, $2, $3); } 74 | OR b r 75 ={ $$ = node(CAT, $2, $3); } 76 | b r OR 77 ={ $$ = node(CAT, $1, $2); } 78 ; 79 b: 80 ={ $$ = enter(DOT); 81 $$ = unary(STAR, $$); } 82 ; 83 r: CHAR 84 ={ $$ = enter($1); } 85 | DOT 86 ={ $$ = enter(DOT); } 87 | CCL 88 ={ $$ = cclenter(CCL); } 89 | NCCL 90 ={ $$ = cclenter(NCCL); } 91 ; 92 93 r: r OR r 94 ={ $$ = node(OR, $1, $3); } 95 | r r %prec CAT 96 ={ $$ = node(CAT, $1, $2); } 97 | r STAR 98 ={ $$ = unary(STAR, $1); } 99 | r PLUS 100 ={ $$ = unary(PLUS, $1); } 101 | r QUEST 102 ={ $$ = unary(QUEST, $1); } 103 | '(' r ')' 104 ={ $$ = $2; } 105 | error 106 ; 107 108 %% 109 yyerror(s) { 110 fprintf(stderr, "egrep: %s\n", s); 111 exit(2); 112 } 113 114 yylex() { 115 extern int yylval; 116 int cclcnt, x; 117 register char c, d; 118 switch(c = nextch()) { 119 case '$': 120 case '^': c = '\n'; 121 goto defchar; 122 case '|': return (OR); 123 case '*': return (STAR); 124 case '+': return (PLUS); 125 case '?': return (QUEST); 126 case '(': return (c); 127 case ')': return (c); 128 case '.': return (DOT); 129 case '\0': return (0); 130 case '\n': return (OR); 131 case '[': 132 x = CCL; 133 cclcnt = 0; 134 count = nxtchar++; 135 if ((c = nextch()) == '^') { 136 x = NCCL; 137 c = nextch(); 138 } 139 do { 140 if (c == '\0') synerror(); 141 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 142 if ((d = nextch()) != 0) { 143 c = chars[nxtchar-1]; 144 while (c < d) { 145 if (nxtchar >= MAXLIN) overflo(); 146 chars[nxtchar++] = ++c; 147 cclcnt++; 148 } 149 continue; 150 } 151 } 152 if (nxtchar >= MAXLIN) overflo(); 153 chars[nxtchar++] = c; 154 cclcnt++; 155 } while ((c = nextch()) != ']'); 156 chars[count] = cclcnt; 157 return (x); 158 case '\\': 159 if ((c = nextch()) == '\0') synerror(); 160 defchar: 161 default: yylval = c; return (CHAR); 162 } 163 } 164 nextch() { 165 register char c; 166 if (fflag) { 167 if ((c = getc(exprfile)) == EOF) { 168 fclose(exprfile); 169 return(0); 170 } 171 } 172 else c = *input++; 173 return(c); 174 } 175 176 synerror() { 177 fprintf(stderr, "egrep: syntax error\n"); 178 exit(2); 179 } 180 181 enter(x) int x; { 182 if(line >= MAXLIN) overflo(); 183 name[line] = x; 184 left[line] = 0; 185 right[line] = 0; 186 return(line++); 187 } 188 189 cclenter(x) int x; { 190 register linno; 191 linno = enter(x); 192 right[linno] = count; 193 return (linno); 194 } 195 196 node(x, l, r) { 197 if(line >= MAXLIN) overflo(); 198 name[line] = x; 199 left[line] = l; 200 right[line] = r; 201 parent[l] = line; 202 parent[r] = line; 203 return(line++); 204 } 205 206 unary(x, d) { 207 if(line >= MAXLIN) overflo(); 208 name[line] = x; 209 left[line] = d; 210 right[line] = 0; 211 parent[d] = line; 212 return(line++); 213 } 214 overflo() { 215 fprintf(stderr, "egrep: regular expression too long\n"); 216 exit(2); 217 } 218 219 cfoll(v) { 220 register i; 221 if (left[v] == 0) { 222 count = 0; 223 for (i=1; i<=line; i++) tmpstat[i] = 0; 224 follow(v); 225 add(foll, v); 226 } 227 else if (right[v] == 0) cfoll(left[v]); 228 else { 229 cfoll(left[v]); 230 cfoll(right[v]); 231 } 232 } 233 cgotofn() { 234 register c, i, k; 235 int n, s; 236 char symbol[NCHARS]; 237 int j, nc, pc, pos; 238 int curpos, num; 239 int number, newpos; 240 count = 0; 241 for (n=3; n<=line; n++) tmpstat[n] = 0; 242 if (cstate(line-1)==0) { 243 tmpstat[line] = 1; 244 count++; 245 out[0] = 1; 246 } 247 for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 248 count--; /*leave out position 1 */ 249 icount = count; 250 tmpstat[1] = 0; 251 add(state, 0); 252 n = 0; 253 for (s=0; s<=n; s++) { 254 if (out[s] == 1) continue; 255 for (i=0; i<NCHARS; i++) symbol[i] = 0; 256 num = positions[state[s]]; 257 count = icount; 258 for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 259 pos = state[s] + 1; 260 for (i=0; i<num; i++) { 261 curpos = positions[pos]; 262 if ((c = name[curpos]) >= 0) { 263 if (c < NCHARS) symbol[c] = 1; 264 else if (c == DOT) { 265 for (k=0; k<NCHARS; k++) 266 if (k!='\n') symbol[k] = 1; 267 } 268 else if (c == CCL) { 269 nc = chars[right[curpos]]; 270 pc = right[curpos] + 1; 271 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 272 } 273 else if (c == NCCL) { 274 nc = chars[right[curpos]]; 275 for (j = 0; j < NCHARS; j++) { 276 pc = right[curpos] + 1; 277 for (k = 0; k < nc; k++) 278 if (j==chars[pc++]) goto cont; 279 if (j!='\n') symbol[j] = 1; 280 cont:; 281 } 282 } 283 else printf("something's funny\n"); 284 } 285 pos++; 286 } 287 for (c=0; c<NCHARS; c++) { 288 if (symbol[c] == 1) { /* nextstate(s,c) */ 289 count = icount; 290 for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 291 pos = state[s] + 1; 292 for (i=0; i<num; i++) { 293 curpos = positions[pos]; 294 if ((k = name[curpos]) >= 0) 295 if ( 296 (k == c) 297 | (k == DOT) 298 | (k == CCL && member(c, right[curpos], 1)) 299 | (k == NCCL && member(c, right[curpos], 0)) 300 ) { 301 number = positions[foll[curpos]]; 302 newpos = foll[curpos] + 1; 303 for (k=0; k<number; k++) { 304 if (tmpstat[positions[newpos]] != 1) { 305 tmpstat[positions[newpos]] = 1; 306 count++; 307 } 308 newpos++; 309 } 310 } 311 pos++; 312 } /* end nextstate */ 313 if (notin(n)) { 314 if (n >= NSTATES) overflo(); 315 add(state, ++n); 316 if (tmpstat[line] == 1) out[n] = 1; 317 gotofn[s][c] = n; 318 } 319 else { 320 gotofn[s][c] = xstate; 321 } 322 } 323 } 324 } 325 } 326 327 cstate(v) { 328 register b; 329 if (left[v] == 0) { 330 if (tmpstat[v] != 1) { 331 tmpstat[v] = 1; 332 count++; 333 } 334 return(1); 335 } 336 else if (right[v] == 0) { 337 if (cstate(left[v]) == 0) return (0); 338 else if (name[v] == PLUS) return (1); 339 else return (0); 340 } 341 else if (name[v] == CAT) { 342 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 343 else return (1); 344 } 345 else { /* name[v] == OR */ 346 b = cstate(right[v]); 347 if (cstate(left[v]) == 0 || b == 0) return (0); 348 else return (1); 349 } 350 } 351 352 353 member(symb, set, torf) { 354 register i, num, pos; 355 num = chars[set]; 356 pos = set + 1; 357 for (i=0; i<num; i++) 358 if (symb == chars[pos++]) return (torf); 359 return (!torf); 360 } 361 362 notin(n) { 363 register i, j, pos; 364 for (i=0; i<=n; i++) { 365 if (positions[state[i]] == count) { 366 pos = state[i] + 1; 367 for (j=0; j < count; j++) 368 if (tmpstat[positions[pos++]] != 1) goto nxt; 369 xstate = i; 370 return (0); 371 } 372 nxt: ; 373 } 374 return (1); 375 } 376 377 add(array, n) int *array; { 378 register i; 379 if (nxtpos + count > MAXPOS) overflo(); 380 array[n] = nxtpos; 381 positions[nxtpos++] = count; 382 for (i=3; i <= line; i++) { 383 if (tmpstat[i] == 1) { 384 positions[nxtpos++] = i; 385 } 386 } 387 } 388 389 follow(v) int v; { 390 int p; 391 if (v == line) return; 392 p = parent[v]; 393 switch(name[p]) { 394 case STAR: 395 case PLUS: cstate(v); 396 follow(p); 397 return; 398 399 case OR: 400 case QUEST: follow(p); 401 return; 402 403 case CAT: if (v == left[p]) { 404 if (cstate(right[p]) == 0) { 405 follow(p); 406 return; 407 } 408 } 409 else follow(p); 410 return; 411 case FINAL: if (tmpstat[line] != 1) { 412 tmpstat[line] = 1; 413 count++; 414 } 415 return; 416 } 417 } 418 419 420 main(argc, argv) 421 char **argv; 422 { 423 while (--argc > 0 && (++argv)[0][0]=='-') 424 switch (argv[0][1]) { 425 426 case 's': 427 sflag++; 428 continue; 429 430 case 'h': 431 hflag = 0; 432 continue; 433 434 case 'b': 435 bflag++; 436 continue; 437 438 case 'c': 439 cflag++; 440 continue; 441 442 case 'e': 443 argc--; 444 argv++; 445 goto out; 446 447 case 'f': 448 fflag++; 449 continue; 450 451 case 'l': 452 lflag++; 453 continue; 454 455 case 'n': 456 nflag++; 457 continue; 458 459 case 'v': 460 vflag++; 461 continue; 462 463 default: 464 fprintf(stderr, "egrep: unknown flag\n"); 465 continue; 466 } 467 out: 468 if (argc<=0) 469 exit(2); 470 if (fflag) { 471 fname = *argv; 472 exprfile = fopen(fname, "r"); 473 if (exprfile == (FILE *)NULL) { 474 fprintf(stderr, "egrep: can't open %s\n", fname); 475 exit(2); 476 } 477 } 478 else input = *argv; 479 argc--; 480 argv++; 481 482 yyparse(); 483 484 cfoll(line-1); 485 cgotofn(); 486 nfile = argc; 487 if (argc<=0) { 488 if (lflag) exit(1); 489 execute(0); 490 } 491 else while (--argc >= 0) { 492 execute(*argv); 493 argv++; 494 } 495 exit(retcode != 0 ? retcode : nsucc == 0); 496 } 497 498 execute(file) 499 char *file; 500 { 501 register char *p; 502 register cstat; 503 register ccount; 504 char buf[1024]; 505 char *nlp; 506 int istat; 507 if (file) { 508 if ((f = open(file, 0)) < 0) { 509 fprintf(stderr, "egrep: can't open %s\n", file); 510 retcode = 2; 511 return; 512 } 513 } 514 else f = 0; 515 ccount = 0; 516 lnum = 1; 517 tln = 0; 518 blkno = 0; 519 p = buf; 520 nlp = p; 521 if ((ccount = read(f,p,512))<=0) goto done; 522 istat = cstat = gotofn[0]['\n']; 523 if (out[cstat]) goto found; 524 for (;;) { 525 cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 526 if (out[cstat]) { 527 found: for(;;) { 528 if (*p++ == '\n') { 529 if (vflag == 0) { 530 succeed: nsucc = 1; 531 if (cflag) tln++; 532 else if (sflag) 533 ; /* ugh */ 534 else if (lflag) { 535 printf("%s\n", file); 536 close(f); 537 return; 538 } 539 else { 540 if (nfile > 1 && hflag) printf("%s:", file); 541 if (bflag) printf("%d:", blkno); 542 if (nflag) printf("%ld:", lnum); 543 if (p <= nlp) { 544 while (nlp < &buf[1024]) putchar(*nlp++); 545 nlp = buf; 546 } 547 while (nlp < p) putchar(*nlp++); 548 } 549 } 550 lnum++; 551 nlp = p; 552 if ((out[(cstat=istat)]) == 0) goto brk2; 553 } 554 cfound: 555 if (--ccount <= 0) { 556 if (p <= &buf[512]) { 557 if ((ccount = read(f, p, 512)) <= 0) goto done; 558 } 559 else if (p == &buf[1024]) { 560 p = buf; 561 if ((ccount = read(f, p, 512)) <= 0) goto done; 562 } 563 else { 564 if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; 565 } 566 blkno++; 567 } 568 } 569 } 570 if (*p++ == '\n') { 571 if (vflag) goto succeed; 572 else { 573 lnum++; 574 nlp = p; 575 if (out[(cstat=istat)]) goto cfound; 576 } 577 } 578 brk2: 579 if (--ccount <= 0) { 580 if (p <= &buf[512]) { 581 if ((ccount = read(f, p, 512)) <= 0) break; 582 } 583 else if (p == &buf[1024]) { 584 p = buf; 585 if ((ccount = read(f, p, 512)) <= 0) break; 586 } 587 else { 588 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 589 } 590 blkno++; 591 } 592 } 593 done: close(f); 594 if (cflag) { 595 if (nfile > 1) 596 printf("%s:", file); 597 printf("%ld\n", tln); 598 } 599 } 600