1 /* 2 * egrep -- print lines containing (or not containing) a regular expression 3 * 4 * status returns: 5 * 0 - ok, and some matches 6 * 1 - ok, but no matches 7 * 2 - some error 8 */ 9 %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10 %left OR 11 %left CHAR DOT CCL NCCL '(' 12 %left CAT 13 %left STAR PLUS QUEST 14 15 %{ 16 static char *sccsid = "@(#)old.egrep.y 4.1 (Berkeley) 10/01/80"; 17 #include <stdio.h> 18 19 #define MAXLIN 350 20 #define MAXPOS 4000 21 #define NCHARS 128 22 #define NSTATES 128 23 #define FINAL -1 24 char gotofn[NSTATES][NCHARS]; 25 int state[NSTATES]; 26 char out[NSTATES]; 27 int line = 1; 28 int name[MAXLIN]; 29 int left[MAXLIN]; 30 int right[MAXLIN]; 31 int parent[MAXLIN]; 32 int foll[MAXLIN]; 33 int positions[MAXPOS]; 34 char chars[MAXLIN]; 35 int nxtpos; 36 int nxtchar = 0; 37 int tmpstat[MAXLIN]; 38 int initstat[MAXLIN]; 39 int xstate; 40 int count; 41 int icount; 42 char *input; 43 44 long lnum; 45 int bflag; 46 int cflag; 47 int fflag; 48 int lflag; 49 int nflag; 50 int hflag = 1; 51 int sflag; 52 int vflag; 53 int nfile; 54 int blkno; 55 long tln; 56 int nsucc; 57 58 int f; 59 int fname; 60 %} 61 62 %% 63 s: t 64 ={ unary(FINAL, $1); 65 line--; 66 } 67 ; 68 t: b r 69 ={ $$ = node(CAT, $1, $2); } 70 | OR b r OR 71 ={ $$ = node(CAT, $2, $3); } 72 | OR b r 73 ={ $$ = node(CAT, $2, $3); } 74 | b r OR 75 ={ $$ = node(CAT, $1, $2); } 76 ; 77 b: 78 ={ $$ = enter(DOT); 79 $$ = unary(STAR, $$); } 80 ; 81 r: CHAR 82 ={ $$ = enter($1); } 83 | DOT 84 ={ $$ = enter(DOT); } 85 | CCL 86 ={ $$ = cclenter(CCL); } 87 | NCCL 88 ={ $$ = cclenter(NCCL); } 89 ; 90 91 r: r OR r 92 ={ $$ = node(OR, $1, $3); } 93 | r r %prec CAT 94 ={ $$ = node(CAT, $1, $2); } 95 | r STAR 96 ={ $$ = unary(STAR, $1); } 97 | r PLUS 98 ={ $$ = unary(PLUS, $1); } 99 | r QUEST 100 ={ $$ = unary(QUEST, $1); } 101 | '(' r ')' 102 ={ $$ = $2; } 103 | error 104 ; 105 106 %% 107 yyerror(s) { 108 fprintf(stderr, "egrep: %s\n", s); 109 exit(2); 110 } 111 112 yylex() { 113 extern int yylval; 114 int cclcnt, x; 115 register char c, d; 116 switch(c = nextch()) { 117 case '$': 118 case '^': c = '\n'; 119 goto defchar; 120 case '|': return (OR); 121 case '*': return (STAR); 122 case '+': return (PLUS); 123 case '?': return (QUEST); 124 case '(': return (c); 125 case ')': return (c); 126 case '.': return (DOT); 127 case '\0': return (0); 128 case '\n': return (OR); 129 case '[': 130 x = CCL; 131 cclcnt = 0; 132 count = nxtchar++; 133 if ((c = nextch()) == '^') { 134 x = NCCL; 135 c = nextch(); 136 } 137 do { 138 if (c == '\0') synerror(); 139 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 140 if ((d = nextch()) != 0) { 141 c = chars[nxtchar-1]; 142 while (c < d) { 143 if (nxtchar >= MAXLIN) overflo(); 144 chars[nxtchar++] = ++c; 145 cclcnt++; 146 } 147 continue; 148 } 149 } 150 if (nxtchar >= MAXLIN) overflo(); 151 chars[nxtchar++] = c; 152 cclcnt++; 153 } while ((c = nextch()) != ']'); 154 chars[count] = cclcnt; 155 return (x); 156 case '\\': 157 if ((c = nextch()) == '\0') synerror(); 158 defchar: 159 default: yylval = c; return (CHAR); 160 } 161 } 162 nextch() { 163 register char c; 164 if (fflag) { 165 if ((c = getc(stdin)) == EOF) return(0); 166 } 167 else c = *input++; 168 return(c); 169 } 170 171 synerror() { 172 fprintf(stderr, "egrep: syntax error\n"); 173 exit(2); 174 } 175 176 enter(x) int x; { 177 if(line >= MAXLIN) overflo(); 178 name[line] = x; 179 left[line] = 0; 180 right[line] = 0; 181 return(line++); 182 } 183 184 cclenter(x) int x; { 185 register linno; 186 linno = enter(x); 187 right[linno] = count; 188 return (linno); 189 } 190 191 node(x, l, r) { 192 if(line >= MAXLIN) overflo(); 193 name[line] = x; 194 left[line] = l; 195 right[line] = r; 196 parent[l] = line; 197 parent[r] = line; 198 return(line++); 199 } 200 201 unary(x, d) { 202 if(line >= MAXLIN) overflo(); 203 name[line] = x; 204 left[line] = d; 205 right[line] = 0; 206 parent[d] = line; 207 return(line++); 208 } 209 overflo() { 210 fprintf(stderr, "egrep: regular expression too long\n"); 211 exit(2); 212 } 213 214 cfoll(v) { 215 register i; 216 if (left[v] == 0) { 217 count = 0; 218 for (i=1; i<=line; i++) tmpstat[i] = 0; 219 follow(v); 220 add(foll, v); 221 } 222 else if (right[v] == 0) cfoll(left[v]); 223 else { 224 cfoll(left[v]); 225 cfoll(right[v]); 226 } 227 } 228 cgotofn() { 229 register c, i, k; 230 int n, s; 231 char symbol[NCHARS]; 232 int j, nc, pc, pos; 233 int curpos, num; 234 int number, newpos; 235 count = 0; 236 for (n=3; n<=line; n++) tmpstat[n] = 0; 237 if (cstate(line-1)==0) { 238 tmpstat[line] = 1; 239 count++; 240 out[0] = 1; 241 } 242 for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 243 count--; /*leave out position 1 */ 244 icount = count; 245 tmpstat[1] = 0; 246 add(state, 0); 247 n = 0; 248 for (s=0; s<=n; s++) { 249 if (out[s] == 1) continue; 250 for (i=0; i<NCHARS; i++) symbol[i] = 0; 251 num = positions[state[s]]; 252 count = icount; 253 for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 254 pos = state[s] + 1; 255 for (i=0; i<num; i++) { 256 curpos = positions[pos]; 257 if ((c = name[curpos]) >= 0) { 258 if (c < NCHARS) symbol[c] = 1; 259 else if (c == DOT) { 260 for (k=0; k<NCHARS; k++) 261 if (k!='\n') symbol[k] = 1; 262 } 263 else if (c == CCL) { 264 nc = chars[right[curpos]]; 265 pc = right[curpos] + 1; 266 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 267 } 268 else if (c == NCCL) { 269 nc = chars[right[curpos]]; 270 for (j = 0; j < NCHARS; j++) { 271 pc = right[curpos] + 1; 272 for (k = 0; k < nc; k++) 273 if (j==chars[pc++]) goto cont; 274 if (j!='\n') symbol[j] = 1; 275 cont:; 276 } 277 } 278 else printf("something's funny\n"); 279 } 280 pos++; 281 } 282 for (c=0; c<NCHARS; c++) { 283 if (symbol[c] == 1) { /* nextstate(s,c) */ 284 count = icount; 285 for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 286 pos = state[s] + 1; 287 for (i=0; i<num; i++) { 288 curpos = positions[pos]; 289 if ((k = name[curpos]) >= 0) 290 if ( 291 (k == c) 292 | (k == DOT) 293 | (k == CCL && member(c, right[curpos], 1)) 294 | (k == NCCL && member(c, right[curpos], 0)) 295 ) { 296 number = positions[foll[curpos]]; 297 newpos = foll[curpos] + 1; 298 for (k=0; k<number; k++) { 299 if (tmpstat[positions[newpos]] != 1) { 300 tmpstat[positions[newpos]] = 1; 301 count++; 302 } 303 newpos++; 304 } 305 } 306 pos++; 307 } /* end nextstate */ 308 if (notin(n)) { 309 if (n >= NSTATES) overflo(); 310 add(state, ++n); 311 if (tmpstat[line] == 1) out[n] = 1; 312 gotofn[s][c] = n; 313 } 314 else { 315 gotofn[s][c] = xstate; 316 } 317 } 318 } 319 } 320 } 321 322 cstate(v) { 323 register b; 324 if (left[v] == 0) { 325 if (tmpstat[v] != 1) { 326 tmpstat[v] = 1; 327 count++; 328 } 329 return(1); 330 } 331 else if (right[v] == 0) { 332 if (cstate(left[v]) == 0) return (0); 333 else if (name[v] == PLUS) return (1); 334 else return (0); 335 } 336 else if (name[v] == CAT) { 337 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 338 else return (1); 339 } 340 else { /* name[v] == OR */ 341 b = cstate(right[v]); 342 if (cstate(left[v]) == 0 || b == 0) return (0); 343 else return (1); 344 } 345 } 346 347 348 member(symb, set, torf) { 349 register i, num, pos; 350 num = chars[set]; 351 pos = set + 1; 352 for (i=0; i<num; i++) 353 if (symb == chars[pos++]) return (torf); 354 return (!torf); 355 } 356 357 notin(n) { 358 register i, j, pos; 359 for (i=0; i<=n; i++) { 360 if (positions[state[i]] == count) { 361 pos = state[i] + 1; 362 for (j=0; j < count; j++) 363 if (tmpstat[positions[pos++]] != 1) goto nxt; 364 xstate = i; 365 return (0); 366 } 367 nxt: ; 368 } 369 return (1); 370 } 371 372 add(array, n) int *array; { 373 register i; 374 if (nxtpos + count > MAXPOS) overflo(); 375 array[n] = nxtpos; 376 positions[nxtpos++] = count; 377 for (i=3; i <= line; i++) { 378 if (tmpstat[i] == 1) { 379 positions[nxtpos++] = i; 380 } 381 } 382 } 383 384 follow(v) int v; { 385 int p; 386 if (v == line) return; 387 p = parent[v]; 388 switch(name[p]) { 389 case STAR: 390 case PLUS: cstate(v); 391 follow(p); 392 return; 393 394 case OR: 395 case QUEST: follow(p); 396 return; 397 398 case CAT: if (v == left[p]) { 399 if (cstate(right[p]) == 0) { 400 follow(p); 401 return; 402 } 403 } 404 else follow(p); 405 return; 406 case FINAL: if (tmpstat[line] != 1) { 407 tmpstat[line] = 1; 408 count++; 409 } 410 return; 411 } 412 } 413 414 415 main(argc, argv) 416 char **argv; 417 { 418 while (--argc > 0 && (++argv)[0][0]=='-') 419 switch (argv[0][1]) { 420 421 case 's': 422 sflag++; 423 continue; 424 425 case 'h': 426 hflag = 0; 427 continue; 428 429 case 'b': 430 bflag++; 431 continue; 432 433 case 'c': 434 cflag++; 435 continue; 436 437 case 'e': 438 argc--; 439 argv++; 440 goto out; 441 442 case 'f': 443 fflag++; 444 continue; 445 446 case 'l': 447 lflag++; 448 continue; 449 450 case 'n': 451 nflag++; 452 continue; 453 454 case 'v': 455 vflag++; 456 continue; 457 458 default: 459 fprintf(stderr, "egrep: unknown flag\n"); 460 continue; 461 } 462 out: 463 if (argc<=0) 464 exit(2); 465 if (fflag) { 466 if (freopen(fname = *argv, "r", stdin) == NULL) { 467 fprintf(stderr, "egrep: can't open %s\n", fname); 468 exit(2); 469 } 470 } 471 else input = *argv; 472 argc--; 473 argv++; 474 475 yyparse(); 476 477 cfoll(line-1); 478 cgotofn(); 479 nfile = argc; 480 if (argc<=0) { 481 if (lflag) exit(1); 482 execute(0); 483 } 484 else while (--argc >= 0) { 485 execute(*argv); 486 argv++; 487 } 488 exit(nsucc == 0); 489 } 490 491 execute(file) 492 char *file; 493 { 494 register char *p; 495 register cstat; 496 register ccount; 497 char buf[1024]; 498 char *nlp; 499 int istat; 500 if (file) { 501 if ((f = open(file, 0)) < 0) { 502 fprintf(stderr, "egrep: can't open %s\n", file); 503 exit(2); 504 } 505 } 506 else f = 0; 507 ccount = 0; 508 lnum = 1; 509 tln = 0; 510 blkno = 0; 511 p = buf; 512 nlp = p; 513 if ((ccount = read(f,p,512))<=0) goto done; 514 istat = cstat = gotofn[0]['\n']; 515 if (out[cstat]) goto found; 516 for (;;) { 517 cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 518 if (out[cstat]) { 519 found: for(;;) { 520 if (*p++ == '\n') { 521 if (vflag == 0) { 522 succeed: nsucc = 1; 523 if (cflag) tln++; 524 else if (sflag) 525 ; /* ugh */ 526 else if (lflag) { 527 printf("%s\n", file); 528 close(f); 529 return; 530 } 531 else { 532 if (nfile > 1 && hflag) printf("%s:", file); 533 if (bflag) printf("%d:", blkno); 534 if (nflag) printf("%ld:", lnum); 535 if (p <= nlp) { 536 while (nlp < &buf[1024]) putchar(*nlp++); 537 nlp = buf; 538 } 539 while (nlp < p) putchar(*nlp++); 540 } 541 } 542 lnum++; 543 nlp = p; 544 if ((out[(cstat=istat)]) == 0) goto brk2; 545 } 546 cfound: 547 if (--ccount <= 0) { 548 if (p <= &buf[512]) { 549 if ((ccount = read(f, p, 512)) <= 0) goto done; 550 } 551 else if (p == &buf[1024]) { 552 p = buf; 553 if ((ccount = read(f, p, 512)) <= 0) goto done; 554 } 555 else { 556 if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; 557 } 558 blkno++; 559 } 560 } 561 } 562 if (*p++ == '\n') { 563 if (vflag) goto succeed; 564 else { 565 lnum++; 566 nlp = p; 567 if (out[(cstat=istat)]) goto cfound; 568 } 569 } 570 brk2: 571 if (--ccount <= 0) { 572 if (p <= &buf[512]) { 573 if ((ccount = read(f, p, 512)) <= 0) break; 574 } 575 else if (p == &buf[1024]) { 576 p = buf; 577 if ((ccount = read(f, p, 512)) <= 0) break; 578 } 579 else { 580 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 581 } 582 blkno++; 583 } 584 } 585 done: close(f); 586 if (cflag) { 587 if (nfile > 1) 588 printf("%s:", file); 589 printf("%ld\n", tln); 590 } 591 } 592