1 /* 2 * egrep -- print lines containing (or not containing) a regular expression 3 * 4 * status returns: 5 * 0 - ok, and some matches 6 * 1 - ok, but no matches 7 * 2 - some error 8 */ 9 %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10 %left OR 11 %left CHAR DOT CCL NCCL '(' 12 %left CAT 13 %left STAR PLUS QUEST 14 15 %{ 16 static char *sccsid = "@(#)old.egrep.y 4.2 (Berkeley) 11/08/82"; 17 #include <stdio.h> 18 19 #define MAXLIN 350 20 #define MAXPOS 4000 21 #define NCHARS 128 22 #define NSTATES 128 23 #define FINAL -1 24 char gotofn[NSTATES][NCHARS]; 25 int state[NSTATES]; 26 char out[NSTATES]; 27 int line = 1; 28 int name[MAXLIN]; 29 int left[MAXLIN]; 30 int right[MAXLIN]; 31 int parent[MAXLIN]; 32 int foll[MAXLIN]; 33 int positions[MAXPOS]; 34 char chars[MAXLIN]; 35 int nxtpos; 36 int nxtchar = 0; 37 int tmpstat[MAXLIN]; 38 int initstat[MAXLIN]; 39 int xstate; 40 int count; 41 int icount; 42 char *input; 43 FILE *exprfile; 44 45 long lnum; 46 int bflag; 47 int cflag; 48 int fflag; 49 int lflag; 50 int nflag; 51 int hflag = 1; 52 int sflag; 53 int vflag; 54 int nfile; 55 int blkno; 56 long tln; 57 int nsucc; 58 59 int f; 60 char *fname; 61 %} 62 63 %% 64 s: t 65 ={ unary(FINAL, $1); 66 line--; 67 } 68 ; 69 t: b r 70 ={ $$ = node(CAT, $1, $2); } 71 | OR b r OR 72 ={ $$ = node(CAT, $2, $3); } 73 | OR b r 74 ={ $$ = node(CAT, $2, $3); } 75 | b r OR 76 ={ $$ = node(CAT, $1, $2); } 77 ; 78 b: 79 ={ $$ = enter(DOT); 80 $$ = unary(STAR, $$); } 81 ; 82 r: CHAR 83 ={ $$ = enter($1); } 84 | DOT 85 ={ $$ = enter(DOT); } 86 | CCL 87 ={ $$ = cclenter(CCL); } 88 | NCCL 89 ={ $$ = cclenter(NCCL); } 90 ; 91 92 r: r OR r 93 ={ $$ = node(OR, $1, $3); } 94 | r r %prec CAT 95 ={ $$ = node(CAT, $1, $2); } 96 | r STAR 97 ={ $$ = unary(STAR, $1); } 98 | r PLUS 99 ={ $$ = unary(PLUS, $1); } 100 | r QUEST 101 ={ $$ = unary(QUEST, $1); } 102 | '(' r ')' 103 ={ $$ = $2; } 104 | error 105 ; 106 107 %% 108 yyerror(s) { 109 fprintf(stderr, "egrep: %s\n", s); 110 exit(2); 111 } 112 113 yylex() { 114 extern int yylval; 115 int cclcnt, x; 116 register char c, d; 117 switch(c = nextch()) { 118 case '$': 119 case '^': c = '\n'; 120 goto defchar; 121 case '|': return (OR); 122 case '*': return (STAR); 123 case '+': return (PLUS); 124 case '?': return (QUEST); 125 case '(': return (c); 126 case ')': return (c); 127 case '.': return (DOT); 128 case '\0': return (0); 129 case '\n': return (OR); 130 case '[': 131 x = CCL; 132 cclcnt = 0; 133 count = nxtchar++; 134 if ((c = nextch()) == '^') { 135 x = NCCL; 136 c = nextch(); 137 } 138 do { 139 if (c == '\0') synerror(); 140 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 141 if ((d = nextch()) != 0) { 142 c = chars[nxtchar-1]; 143 while (c < d) { 144 if (nxtchar >= MAXLIN) overflo(); 145 chars[nxtchar++] = ++c; 146 cclcnt++; 147 } 148 continue; 149 } 150 } 151 if (nxtchar >= MAXLIN) overflo(); 152 chars[nxtchar++] = c; 153 cclcnt++; 154 } while ((c = nextch()) != ']'); 155 chars[count] = cclcnt; 156 return (x); 157 case '\\': 158 if ((c = nextch()) == '\0') synerror(); 159 defchar: 160 default: yylval = c; return (CHAR); 161 } 162 } 163 nextch() { 164 register char c; 165 if (fflag) { 166 if ((c = getc(exprfile)) == EOF) { 167 fclose(exprfile); 168 return(0); 169 } 170 } 171 else c = *input++; 172 return(c); 173 } 174 175 synerror() { 176 fprintf(stderr, "egrep: syntax error\n"); 177 exit(2); 178 } 179 180 enter(x) int x; { 181 if(line >= MAXLIN) overflo(); 182 name[line] = x; 183 left[line] = 0; 184 right[line] = 0; 185 return(line++); 186 } 187 188 cclenter(x) int x; { 189 register linno; 190 linno = enter(x); 191 right[linno] = count; 192 return (linno); 193 } 194 195 node(x, l, r) { 196 if(line >= MAXLIN) overflo(); 197 name[line] = x; 198 left[line] = l; 199 right[line] = r; 200 parent[l] = line; 201 parent[r] = line; 202 return(line++); 203 } 204 205 unary(x, d) { 206 if(line >= MAXLIN) overflo(); 207 name[line] = x; 208 left[line] = d; 209 right[line] = 0; 210 parent[d] = line; 211 return(line++); 212 } 213 overflo() { 214 fprintf(stderr, "egrep: regular expression too long\n"); 215 exit(2); 216 } 217 218 cfoll(v) { 219 register i; 220 if (left[v] == 0) { 221 count = 0; 222 for (i=1; i<=line; i++) tmpstat[i] = 0; 223 follow(v); 224 add(foll, v); 225 } 226 else if (right[v] == 0) cfoll(left[v]); 227 else { 228 cfoll(left[v]); 229 cfoll(right[v]); 230 } 231 } 232 cgotofn() { 233 register c, i, k; 234 int n, s; 235 char symbol[NCHARS]; 236 int j, nc, pc, pos; 237 int curpos, num; 238 int number, newpos; 239 count = 0; 240 for (n=3; n<=line; n++) tmpstat[n] = 0; 241 if (cstate(line-1)==0) { 242 tmpstat[line] = 1; 243 count++; 244 out[0] = 1; 245 } 246 for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 247 count--; /*leave out position 1 */ 248 icount = count; 249 tmpstat[1] = 0; 250 add(state, 0); 251 n = 0; 252 for (s=0; s<=n; s++) { 253 if (out[s] == 1) continue; 254 for (i=0; i<NCHARS; i++) symbol[i] = 0; 255 num = positions[state[s]]; 256 count = icount; 257 for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 258 pos = state[s] + 1; 259 for (i=0; i<num; i++) { 260 curpos = positions[pos]; 261 if ((c = name[curpos]) >= 0) { 262 if (c < NCHARS) symbol[c] = 1; 263 else if (c == DOT) { 264 for (k=0; k<NCHARS; k++) 265 if (k!='\n') symbol[k] = 1; 266 } 267 else if (c == CCL) { 268 nc = chars[right[curpos]]; 269 pc = right[curpos] + 1; 270 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 271 } 272 else if (c == NCCL) { 273 nc = chars[right[curpos]]; 274 for (j = 0; j < NCHARS; j++) { 275 pc = right[curpos] + 1; 276 for (k = 0; k < nc; k++) 277 if (j==chars[pc++]) goto cont; 278 if (j!='\n') symbol[j] = 1; 279 cont:; 280 } 281 } 282 else printf("something's funny\n"); 283 } 284 pos++; 285 } 286 for (c=0; c<NCHARS; c++) { 287 if (symbol[c] == 1) { /* nextstate(s,c) */ 288 count = icount; 289 for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 290 pos = state[s] + 1; 291 for (i=0; i<num; i++) { 292 curpos = positions[pos]; 293 if ((k = name[curpos]) >= 0) 294 if ( 295 (k == c) 296 | (k == DOT) 297 | (k == CCL && member(c, right[curpos], 1)) 298 | (k == NCCL && member(c, right[curpos], 0)) 299 ) { 300 number = positions[foll[curpos]]; 301 newpos = foll[curpos] + 1; 302 for (k=0; k<number; k++) { 303 if (tmpstat[positions[newpos]] != 1) { 304 tmpstat[positions[newpos]] = 1; 305 count++; 306 } 307 newpos++; 308 } 309 } 310 pos++; 311 } /* end nextstate */ 312 if (notin(n)) { 313 if (n >= NSTATES) overflo(); 314 add(state, ++n); 315 if (tmpstat[line] == 1) out[n] = 1; 316 gotofn[s][c] = n; 317 } 318 else { 319 gotofn[s][c] = xstate; 320 } 321 } 322 } 323 } 324 } 325 326 cstate(v) { 327 register b; 328 if (left[v] == 0) { 329 if (tmpstat[v] != 1) { 330 tmpstat[v] = 1; 331 count++; 332 } 333 return(1); 334 } 335 else if (right[v] == 0) { 336 if (cstate(left[v]) == 0) return (0); 337 else if (name[v] == PLUS) return (1); 338 else return (0); 339 } 340 else if (name[v] == CAT) { 341 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 342 else return (1); 343 } 344 else { /* name[v] == OR */ 345 b = cstate(right[v]); 346 if (cstate(left[v]) == 0 || b == 0) return (0); 347 else return (1); 348 } 349 } 350 351 352 member(symb, set, torf) { 353 register i, num, pos; 354 num = chars[set]; 355 pos = set + 1; 356 for (i=0; i<num; i++) 357 if (symb == chars[pos++]) return (torf); 358 return (!torf); 359 } 360 361 notin(n) { 362 register i, j, pos; 363 for (i=0; i<=n; i++) { 364 if (positions[state[i]] == count) { 365 pos = state[i] + 1; 366 for (j=0; j < count; j++) 367 if (tmpstat[positions[pos++]] != 1) goto nxt; 368 xstate = i; 369 return (0); 370 } 371 nxt: ; 372 } 373 return (1); 374 } 375 376 add(array, n) int *array; { 377 register i; 378 if (nxtpos + count > MAXPOS) overflo(); 379 array[n] = nxtpos; 380 positions[nxtpos++] = count; 381 for (i=3; i <= line; i++) { 382 if (tmpstat[i] == 1) { 383 positions[nxtpos++] = i; 384 } 385 } 386 } 387 388 follow(v) int v; { 389 int p; 390 if (v == line) return; 391 p = parent[v]; 392 switch(name[p]) { 393 case STAR: 394 case PLUS: cstate(v); 395 follow(p); 396 return; 397 398 case OR: 399 case QUEST: follow(p); 400 return; 401 402 case CAT: if (v == left[p]) { 403 if (cstate(right[p]) == 0) { 404 follow(p); 405 return; 406 } 407 } 408 else follow(p); 409 return; 410 case FINAL: if (tmpstat[line] != 1) { 411 tmpstat[line] = 1; 412 count++; 413 } 414 return; 415 } 416 } 417 418 419 main(argc, argv) 420 char **argv; 421 { 422 while (--argc > 0 && (++argv)[0][0]=='-') 423 switch (argv[0][1]) { 424 425 case 's': 426 sflag++; 427 continue; 428 429 case 'h': 430 hflag = 0; 431 continue; 432 433 case 'b': 434 bflag++; 435 continue; 436 437 case 'c': 438 cflag++; 439 continue; 440 441 case 'e': 442 argc--; 443 argv++; 444 goto out; 445 446 case 'f': 447 fflag++; 448 continue; 449 450 case 'l': 451 lflag++; 452 continue; 453 454 case 'n': 455 nflag++; 456 continue; 457 458 case 'v': 459 vflag++; 460 continue; 461 462 default: 463 fprintf(stderr, "egrep: unknown flag\n"); 464 continue; 465 } 466 out: 467 if (argc<=0) 468 exit(2); 469 if (fflag) { 470 fname = *argv; 471 exprfile = fopen(fname, "r"); 472 if (exprfile == (FILE *)NULL) { 473 fprintf(stderr, "egrep: can't open %s\n", fname); 474 exit(2); 475 } 476 } 477 else input = *argv; 478 argc--; 479 argv++; 480 481 yyparse(); 482 483 cfoll(line-1); 484 cgotofn(); 485 nfile = argc; 486 if (argc<=0) { 487 if (lflag) exit(1); 488 execute(0); 489 } 490 else while (--argc >= 0) { 491 execute(*argv); 492 argv++; 493 } 494 exit(nsucc == 0); 495 } 496 497 execute(file) 498 char *file; 499 { 500 register char *p; 501 register cstat; 502 register ccount; 503 char buf[1024]; 504 char *nlp; 505 int istat; 506 if (file) { 507 if ((f = open(file, 0)) < 0) { 508 fprintf(stderr, "egrep: can't open %s\n", file); 509 exit(2); 510 } 511 } 512 else f = 0; 513 ccount = 0; 514 lnum = 1; 515 tln = 0; 516 blkno = 0; 517 p = buf; 518 nlp = p; 519 if ((ccount = read(f,p,512))<=0) goto done; 520 istat = cstat = gotofn[0]['\n']; 521 if (out[cstat]) goto found; 522 for (;;) { 523 cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 524 if (out[cstat]) { 525 found: for(;;) { 526 if (*p++ == '\n') { 527 if (vflag == 0) { 528 succeed: nsucc = 1; 529 if (cflag) tln++; 530 else if (sflag) 531 ; /* ugh */ 532 else if (lflag) { 533 printf("%s\n", file); 534 close(f); 535 return; 536 } 537 else { 538 if (nfile > 1 && hflag) printf("%s:", file); 539 if (bflag) printf("%d:", blkno); 540 if (nflag) printf("%ld:", lnum); 541 if (p <= nlp) { 542 while (nlp < &buf[1024]) putchar(*nlp++); 543 nlp = buf; 544 } 545 while (nlp < p) putchar(*nlp++); 546 } 547 } 548 lnum++; 549 nlp = p; 550 if ((out[(cstat=istat)]) == 0) goto brk2; 551 } 552 cfound: 553 if (--ccount <= 0) { 554 if (p <= &buf[512]) { 555 if ((ccount = read(f, p, 512)) <= 0) goto done; 556 } 557 else if (p == &buf[1024]) { 558 p = buf; 559 if ((ccount = read(f, p, 512)) <= 0) goto done; 560 } 561 else { 562 if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; 563 } 564 blkno++; 565 } 566 } 567 } 568 if (*p++ == '\n') { 569 if (vflag) goto succeed; 570 else { 571 lnum++; 572 nlp = p; 573 if (out[(cstat=istat)]) goto cfound; 574 } 575 } 576 brk2: 577 if (--ccount <= 0) { 578 if (p <= &buf[512]) { 579 if ((ccount = read(f, p, 512)) <= 0) break; 580 } 581 else if (p == &buf[1024]) { 582 p = buf; 583 if ((ccount = read(f, p, 512)) <= 0) break; 584 } 585 else { 586 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 587 } 588 blkno++; 589 } 590 } 591 done: close(f); 592 if (cflag) { 593 if (nfile > 1) 594 printf("%s:", file); 595 printf("%ld\n", tln); 596 } 597 } 598