1 /*
2 * egrep -- print lines containing (or not containing) a regular expression
3 *
4 * status returns:
5 * 0 - ok, and some matches
6 * 1 - ok, but no matches
7 * 2 - some error
8 */
9 %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
10 %left OR
11 %left CHAR DOT CCL NCCL '('
12 %left CAT
13 %left STAR PLUS QUEST
14
15 %{
16 /*-
17 * Copyright (c) 1991, 1993
18 * The Regents of the University of California. All rights reserved.
19 *
20 * %sccs.include.proprietary.c%
21 */
22
23 #ifndef lint
24 static char copyright[] =
25 "@(#) Copyright (c) 1991, 1993\n\
26 The Regents of the University of California. All rights reserved.\n";
27 #endif /* not lint */
28
29 #ifndef lint
30 static char sccsid[] = "@(#)old.egrep.y 8.1 (Berkeley) 06/06/93";
31 #endif /* not lint */
32
33 #include <stdio.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <ctype.h>
37
38 #define BLKSIZE 8192
39 #define MAXLIN 350
40 #define MAXPOS 4000
41 #define NCHARS 128
42 #define NSTATES 128
43 #define FINAL -1
44 char gotofn[NSTATES][NCHARS];
45 char cmap[256];
46 int state[NSTATES];
47 char out[NSTATES];
48 int line = 1;
49 int name[MAXLIN];
50 int left[MAXLIN];
51 int right[MAXLIN];
52 int parent[MAXLIN];
53 int foll[MAXLIN];
54 int positions[MAXPOS];
55 char chars[MAXLIN];
56 int nxtpos;
57 int nxtchar = 0;
58 int tmpstat[MAXLIN];
59 int initstat[MAXLIN];
60 int xstate;
61 int count;
62 int icount;
63 char *input;
64 FILE *exprfile;
65
66 long lnum;
67 int bflag;
68 int cflag;
69 int fflag;
70 int iflag;
71 int lflag;
72 int nflag;
73 int hflag = 1;
74 int oflag;
75 int sflag;
76 int vflag;
77 int retcode = 0;
78 int nfile;
79 int blkno;
80 long tln;
81 int nsucc;
82
83 int f;
84 char *fname;
85 %}
86
87 %%
88 s: t
89 ={ unary(FINAL, $1);
90 line--;
91 }
92 ;
93 t: b r
94 ={ $$ = node(CAT, $1, $2); }
95 | OR b r OR
96 ={ $$ = node(CAT, $2, $3); }
97 | OR b r
98 ={ $$ = node(CAT, $2, $3); }
99 | b r OR
100 ={ $$ = node(CAT, $1, $2); }
101 ;
102 b:
103 ={ $$ = enter(DOT);
104 $$ = unary(STAR, $$); }
105 ;
106 r: CHAR
107 ={ $$ = enter($1); }
108 | DOT
109 ={ $$ = enter(DOT); }
110 | CCL
111 ={ $$ = cclenter(CCL); }
112 | NCCL
113 ={ $$ = cclenter(NCCL); }
114 ;
115
116 r: r OR r
117 ={ $$ = node(OR, $1, $3); }
118 | r r %prec CAT
119 ={ $$ = node(CAT, $1, $2); }
120 | r STAR
121 ={ $$ = unary(STAR, $1); }
122 | r PLUS
123 ={ $$ = unary(PLUS, $1); }
124 | r QUEST
125 ={ $$ = unary(QUEST, $1); }
126 | '(' r ')'
127 ={ $$ = $2; }
128 | error
129 ;
130
131 %%
132 yyerror(s) {
133 fprintf(stderr, "egrep: %s\n", s);
134 exit(2);
135 }
136
yylex()137 yylex() {
138 extern int yylval;
139 int cclcnt, x;
140 register char c, d;
141 switch(c = nextch()) {
142 case '$':
143 case '^': c = '\n';
144 goto defchar;
145 case '|': return (OR);
146 case '*': return (STAR);
147 case '+': return (PLUS);
148 case '?': return (QUEST);
149 case '(': return (c);
150 case ')': return (c);
151 case '.': return (DOT);
152 case '\0': return (0);
153 case '\n': return (OR);
154 case '[':
155 x = CCL;
156 cclcnt = 0;
157 count = nxtchar++;
158 if ((c = nextch()) == '^') {
159 x = NCCL;
160 c = nextch();
161 }
162 do {
163 if (c == '\0') synerror();
164 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
165 if ((d = nextch()) != 0) {
166 c = chars[nxtchar-1];
167 while (c < d) {
168 if (nxtchar >= MAXLIN) overflo();
169 chars[nxtchar++] = ++c;
170 cclcnt++;
171 }
172 continue;
173 }
174 }
175 if (nxtchar >= MAXLIN) overflo();
176 chars[nxtchar++] = c;
177 cclcnt++;
178 } while ((c = nextch()) != ']');
179 chars[count] = cclcnt;
180 return (x);
181 case '\\':
182 if ((c = nextch()) == '\0') synerror();
183 defchar:
184 default: yylval = c; return (CHAR);
185 }
186 }
nextch()187 nextch() {
188 register int c;
189 if (fflag) {
190 if ((c = getc(exprfile)) == EOF) {
191 fclose(exprfile);
192 return(0);
193 }
194 }
195 else c = *input++;
196 return(c);
197 }
198
synerror()199 synerror() {
200 fprintf(stderr, "egrep: syntax error\n");
201 exit(2);
202 }
203
enter(x)204 enter(x) int x; {
205 if(line >= MAXLIN) overflo();
206 name[line] = x;
207 left[line] = 0;
208 right[line] = 0;
209 return(line++);
210 }
211
cclenter(x)212 cclenter(x) int x; {
213 register linno;
214 linno = enter(x);
215 right[linno] = count;
216 return (linno);
217 }
218
node(x,l,r)219 node(x, l, r) {
220 if(line >= MAXLIN) overflo();
221 name[line] = x;
222 left[line] = l;
223 right[line] = r;
224 parent[l] = line;
225 parent[r] = line;
226 return(line++);
227 }
228
unary(x,d)229 unary(x, d) {
230 if(line >= MAXLIN) overflo();
231 name[line] = x;
232 left[line] = d;
233 right[line] = 0;
234 parent[d] = line;
235 return(line++);
236 }
overflo()237 overflo() {
238 fprintf(stderr, "egrep: regular expression too long\n");
239 exit(2);
240 }
241
cfoll(v)242 cfoll(v) {
243 register i;
244 if (left[v] == 0) {
245 count = 0;
246 for (i=1; i<=line; i++) tmpstat[i] = 0;
247 follow(v);
248 add(foll, v);
249 }
250 else if (right[v] == 0) cfoll(left[v]);
251 else {
252 cfoll(left[v]);
253 cfoll(right[v]);
254 }
255 }
cgotofn()256 cgotofn() {
257 register c, i, k;
258 int n, s;
259 char symbol[NCHARS];
260 int j, nc, pc, pos;
261 int curpos, num;
262 int number, newpos;
263 count = 0;
264 for (n=3; n<=line; n++) tmpstat[n] = 0;
265 if (cstate(line-1)==0) {
266 tmpstat[line] = 1;
267 count++;
268 out[0] = 1;
269 }
270 for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
271 count--; /*leave out position 1 */
272 icount = count;
273 tmpstat[1] = 0;
274 add(state, 0);
275 n = 0;
276 for (s=0; s<=n; s++) {
277 if (out[s] == 1) continue;
278 for (i=0; i<NCHARS; i++) symbol[i] = 0;
279 num = positions[state[s]];
280 count = icount;
281 for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
282 pos = state[s] + 1;
283 for (i=0; i<num; i++) {
284 curpos = positions[pos];
285 if ((c = name[curpos]) >= 0) {
286 if (c < NCHARS) symbol[c] = 1;
287 else if (c == DOT) {
288 for (k=0; k<NCHARS; k++)
289 if (k!='\n') symbol[k] = 1;
290 }
291 else if (c == CCL) {
292 nc = chars[right[curpos]];
293 pc = right[curpos] + 1;
294 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
295 }
296 else if (c == NCCL) {
297 nc = chars[right[curpos]];
298 for (j = 0; j < NCHARS; j++) {
299 pc = right[curpos] + 1;
300 for (k = 0; k < nc; k++)
301 if (j==chars[pc++]) goto cont;
302 if (j!='\n') symbol[j] = 1;
303 cont:;
304 }
305 }
306 else printf("something's funny\n");
307 }
308 pos++;
309 }
310 for (c=0; c<NCHARS; c++) {
311 if (symbol[c] == 1) { /* nextstate(s,c) */
312 count = icount;
313 for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
314 pos = state[s] + 1;
315 for (i=0; i<num; i++) {
316 curpos = positions[pos];
317 if ((k = name[curpos]) >= 0)
318 if (
319 (k == c)
320 | (k == DOT)
321 | (k == CCL && member(c, right[curpos], 1))
322 | (k == NCCL && member(c, right[curpos], 0))
323 ) {
324 number = positions[foll[curpos]];
325 newpos = foll[curpos] + 1;
326 for (k=0; k<number; k++) {
327 if (tmpstat[positions[newpos]] != 1) {
328 tmpstat[positions[newpos]] = 1;
329 count++;
330 }
331 newpos++;
332 }
333 }
334 pos++;
335 } /* end nextstate */
336 if (notin(n)) {
337 if (n >= NSTATES) overflo();
338 add(state, ++n);
339 if (tmpstat[line] == 1) out[n] = 1;
340 gotofn[s][c] = n;
341 }
342 else {
343 gotofn[s][c] = xstate;
344 }
345 }
346 }
347 }
348 }
349
cstate(v)350 cstate(v) {
351 register b;
352 if (left[v] == 0) {
353 if (tmpstat[v] != 1) {
354 tmpstat[v] = 1;
355 count++;
356 }
357 return(1);
358 }
359 else if (right[v] == 0) {
360 if (cstate(left[v]) == 0) return (0);
361 else if (name[v] == PLUS) return (1);
362 else return (0);
363 }
364 else if (name[v] == CAT) {
365 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
366 else return (1);
367 }
368 else { /* name[v] == OR */
369 b = cstate(right[v]);
370 if (cstate(left[v]) == 0 || b == 0) return (0);
371 else return (1);
372 }
373 }
374
375
member(symb,set,torf)376 member(symb, set, torf) {
377 register i, num, pos;
378 num = chars[set];
379 pos = set + 1;
380 for (i=0; i<num; i++)
381 if (symb == chars[pos++]) return (torf);
382 return (!torf);
383 }
384
notin(n)385 notin(n) {
386 register i, j, pos;
387 for (i=0; i<=n; i++) {
388 if (positions[state[i]] == count) {
389 pos = state[i] + 1;
390 for (j=0; j < count; j++)
391 if (tmpstat[positions[pos++]] != 1) goto nxt;
392 xstate = i;
393 return (0);
394 }
395 nxt: ;
396 }
397 return (1);
398 }
399
add(array,n)400 add(array, n) int *array; {
401 register i;
402 if (nxtpos + count > MAXPOS) overflo();
403 array[n] = nxtpos;
404 positions[nxtpos++] = count;
405 for (i=3; i <= line; i++) {
406 if (tmpstat[i] == 1) {
407 positions[nxtpos++] = i;
408 }
409 }
410 }
411
follow(v)412 follow(v) int v; {
413 int p;
414 if (v == line) return;
415 p = parent[v];
416 switch(name[p]) {
417 case STAR:
418 case PLUS: cstate(v);
419 follow(p);
420 return;
421
422 case OR:
423 case QUEST: follow(p);
424 return;
425
426 case CAT: if (v == left[p]) {
427 if (cstate(right[p]) == 0) {
428 follow(p);
429 return;
430 }
431 }
432 else follow(p);
433 return;
434 case FINAL: if (tmpstat[line] != 1) {
435 tmpstat[line] = 1;
436 count++;
437 }
438 return;
439 }
440 }
441
442
main(argc,argv)443 main(argc, argv)
444 char **argv;
445 {
446 register int i;
447
448 while (--argc > 0 && (++argv)[0][0]=='-')
449 switch (argv[0][1]) {
450
451 case 's':
452 sflag++;
453 continue;
454
455 case 'h':
456 hflag = 0;
457 continue;
458
459 case 'o':
460 oflag++;
461 continue;
462
463 case 'b':
464 bflag++;
465 continue;
466
467 case 'c':
468 cflag++;
469 continue;
470
471 case 'e':
472 argc--;
473 argv++;
474 goto out;
475
476 case 'f':
477 fflag++;
478 continue;
479
480 case 'i':
481 iflag++;
482 for ( i = 'A'; i <= 'Z'; i++ )
483 cmap[i] = (char) tolower ( i );
484 continue;
485
486 case 'l':
487 lflag++;
488 continue;
489
490 case 'n':
491 nflag++;
492 continue;
493
494 case 'v':
495 vflag++;
496 continue;
497
498 default:
499 fprintf(stderr, "egrep: unknown flag\n");
500 continue;
501 }
502 out:
503 if (argc<=0)
504 exit(2);
505
506 for (i = 0; i < 256; ++i)
507 cmap[i] = (char)i;
508
509 if (fflag) {
510 fname = *argv;
511 exprfile = fopen(fname, "r");
512 if (exprfile == (FILE *)NULL) {
513 fprintf(stderr, "egrep: can't open %s\n", fname);
514 exit(2);
515 }
516 }
517 else input = *argv;
518 if ( iflag ) {
519 register char *s;
520 for ( s = input; *s != '\0'; s++ )
521 if ( isupper ( (int)(*s) ) )
522 *s = (char) tolower ( (int)(*s) );
523 }
524 argc--;
525 argv++;
526
527 yyparse();
528
529 cfoll(line-1);
530 cgotofn();
531 nfile = argc;
532 if (argc<=0) {
533 if (lflag) exit(1);
534 execute(0);
535 }
536 else while (--argc >= 0) {
537 execute(*argv);
538 argv++;
539 }
540 exit(retcode != 0 ? retcode : nsucc == 0);
541 }
542
execute(file)543 execute(file)
544 char *file;
545 {
546 register char *p;
547 register cstat;
548 register ccount;
549 register char *cmapr = cmap;
550 static char *buf;
551 static int blksize;
552 struct stat stb;
553 char *nlp;
554 int istat;
555 if (file) {
556 if ((f = open(file, 0)) < 0) {
557 fprintf(stderr, "egrep: can't open %s\n", file);
558 retcode = 2;
559 return;
560 }
561 }
562 else f = 0;
563 if (buf == NULL) {
564 if (fstat(f, &stb) > 0 && stb.st_blksize > 0)
565 blksize = stb.st_blksize;
566 else
567 blksize = BLKSIZE;
568 buf = (char *)malloc(2*blksize);
569 if (buf == NULL) {
570 fprintf(stderr, "egrep: no memory for %s\n", file);
571 retcode = 2;
572 return;
573 }
574 }
575 ccount = 0;
576 lnum = 1;
577 tln = 0;
578 blkno = 0;
579 p = buf;
580 nlp = p;
581 if ((ccount = read(f,p,blksize))<=0) goto done;
582 istat = cstat = gotofn[0]['\n'];
583 if (out[cstat]) goto found;
584 for (;;) {
585 cstat = gotofn[cstat][(unsigned char)cmapr[*(unsigned char *)p]];
586 if (out[cstat]) {
587 found: for(;;) {
588 if (*p++ == '\n') {
589 if (vflag == 0) {
590 succeed: nsucc = 1;
591 if (cflag) tln++;
592 else if (sflag)
593 ; /* ugh */
594 else if (lflag) {
595 printf("%s\n", file);
596 close(f);
597 return;
598 }
599 else {
600 if (nfile > 1 && hflag || oflag) printf("%s:", file);
601 if (bflag) printf("%d:", blkno);
602 if (nflag) printf("%ld:", lnum);
603 if (p <= nlp) {
604 while (nlp < &buf[2*blksize]) putchar(*nlp++);
605 nlp = buf;
606 }
607 while (nlp < p) putchar(*nlp++);
608 }
609 }
610 lnum++;
611 nlp = p;
612 if ((out[(cstat=istat)]) == 0) goto brk2;
613 }
614 cfound:
615 if (--ccount <= 0) {
616 if (p <= &buf[blksize]) {
617 if ((ccount = read(f, p, blksize)) <= 0) goto done;
618 }
619 else if (p == &buf[2*blksize]) {
620 p = buf;
621 if ((ccount = read(f, p, blksize)) <= 0) goto done;
622 }
623 else {
624 if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done;
625 }
626 blkno += ccount / 512;
627 }
628 }
629 }
630 if (*p++ == '\n') {
631 if (vflag) goto succeed;
632 else {
633 lnum++;
634 nlp = p;
635 if (out[(cstat=istat)]) goto cfound;
636 }
637 }
638 brk2:
639 if (--ccount <= 0) {
640 if (p <= &buf[blksize]) {
641 if ((ccount = read(f, p, blksize)) <= 0) break;
642 }
643 else if (p == &buf[2*blksize]) {
644 p = buf;
645 if ((ccount = read(f, p, blksize)) <= 0) break;
646 }
647 else {
648 if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break;
649 }
650 blkno += ccount / 512;
651 }
652 }
653 done: close(f);
654 if (cflag) {
655 if (nfile > 1)
656 printf("%s:", file);
657 printf("%ld\n", tln);
658 }
659 }
660