1 /*
2 * Derived from Unix 32V /usr/src/cmd/egrep.y
3 *
4 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, April 2001.
5 */
6 /*
7 * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * Redistributions of source code and documentation must retain the
13 * above copyright notice, this list of conditions and the following
14 * disclaimer.
15 * Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed or owned by Caldera
21 * International, Inc.
22 * Neither the name of Caldera International, Inc. nor the names of
23 * other contributors may be used to endorse or promote products
24 * derived from this software without specific prior written permission.
25 *
26 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
27 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE
31 * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
35 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
36 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
37 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 %{
41 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
42 #define USED __attribute__ ((used))
43 #elif defined __GNUC__
44 #define USED __attribute__ ((unused))
45 #else
46 #define USED
47 #endif
48 static const char sccsid[] USED = "@(#)egrep.sl 2.22 (gritter) 5/29/05";
49 %}
50
51 /*
52 * egrep -- print lines containing (or not containing) a regular expression
53 *
54 * status returns:
55 * 0 - ok, and some matches
56 * 1 - ok, but no matches
57 * 2 - some error
58 */
59
60 %token CHAR MCHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
61 %left OR
62 %left CHAR MCHAR DOT CCL NCCL '('
63 %left CAT
64 %left STAR PLUS QUEST
65
66 %{
67 #include <sys/types.h>
68 #include <unistd.h>
69 #include <stdio.h>
70 #include <string.h>
71 #include <ctype.h>
72 #include <regex.h>
73 #include <limits.h>
74 #include <stdlib.h>
75 #include "grep.h"
76 #include "alloc.h"
77
78 #include <mbtowi.h>
79
80 #define NCHARS 256
81 #define NSTATES 64
82 #define FINAL -1
83 static unsigned MAXLIN;
84 static unsigned MAXPOS;
85 static unsigned MAXCHARS;
86 static unsigned char gotofn[NSTATES][NCHARS];
87 static int lastn;
88 static int state[NSTATES];
89 static char out[NSTATES];
90 static int line = 1;
91 static int *name;
92 static int *left;
93 static int *right;
94 static int *parent;
95 static int *foll;
96 static int *positions;
97 static wchar_t *chars;
98 static int nxtpos;
99 static int initpos;
100 static int nxtchar = 0;
101 static int *tmpstat;
102 static int *initstat;
103 static int xstate;
104 static int count;
105 static int icount;
106
107 static void yyerror(const char *);
108 static void *erealloc(void *, size_t);
109 static void more_chars(unsigned);
110 static void more_lines(unsigned);
111 static void more_positions(unsigned);
112 static int yylex(void);
113 static void synerror(void);
114 static int enter(int);
115 static int cclenter(int);
116 static int menter(int);
117 static int node(int, int, int);
118 static int unary(int, int);
119 static void cfoll(int);
120 static int cgotofn(int, int);
121 static void igotofn(void);
122 static int cstate(int);
123 static int member(int, int, int);
124 static int notin(int);
125 static void add(int *, int);
126 static void follow(int);
127 static void eg_build(void);
128 static int eg_matchw(const char *, size_t);
129 static int eg_match(const char *, size_t);
130 static int eg_range(struct iblok *, char *);
131 static int eg_rangew(struct iblok *, char *);
132 %}
133
134 %%
135 s: t
136 { unary(FINAL, $1);
137 line--;
138 }
139 ;
140 t: b r
141 { $$ = node(CAT, $1, $2); }
142 | OR b r OR
143 { $$ = node(CAT, $2, $3); }
144 | OR b r
145 { $$ = node(CAT, $2, $3); }
146 | b r OR
147 { $$ = node(CAT, $1, $2); }
148 ;
149 b:
150 { $$ = enter(DOT);
151 $$ = unary(STAR, $$); }
152 ;
153 r: CHAR
154 { $$ = enter($1); }
155 | MCHAR
156 { $$ = menter($1); }
157 | DOT
158 { $$ = enter(DOT); }
159 | CCL
160 { $$ = cclenter(CCL); }
161 | NCCL
162 { $$ = cclenter(NCCL); }
163 ;
164
165 r: r OR r
166 { $$ = node(OR, $1, $3); }
167 | r r %prec CAT
168 { $$ = node(CAT, $1, $2); }
169 | r STAR
170 { $$ = unary(STAR, $1); }
171 | r PLUS
172 { $$ = unary(PLUS, $1); }
173 | r QUEST
174 { $$ = unary(QUEST, $1); }
175 | '(' r ')'
176 { $$ = $2; }
177 | error
178 ;
179
180 %%
181 static void
182 yyerror(const char *s) {
183 fprintf(stderr, "%s: %s\n", progname, s);
184 exit(2);
185 }
186
187 static void *
erealloc(void * vp,size_t size)188 erealloc(void *vp, size_t size)
189 {
190 if ((vp = realloc(vp, size)) == NULL) {
191 write(2, progname, strlen(progname));
192 write(2, ": regular expression too long\n", 30);
193 _exit(2);
194 }
195 return (vp);
196 }
197
198 static void
more_chars(unsigned need)199 more_chars(unsigned need)
200 {
201 unsigned omaxchars = MAXCHARS;
202 unsigned incr;
203 incr = need - MAXCHARS + 32;
204 MAXCHARS += incr;
205 chars = erealloc(chars, MAXCHARS * sizeof *chars);
206 memset(&chars[omaxchars], 0, incr * sizeof *chars);
207 }
208
209 static void
more_lines(unsigned need)210 more_lines(unsigned need)
211 {
212 unsigned omaxlin = MAXLIN;
213 unsigned incr;
214 incr = need - MAXLIN + 32;
215 MAXLIN += incr;
216 name = erealloc(name, MAXLIN * sizeof *name);
217 memset(&name[omaxlin], 0, incr * sizeof *name);
218 left = erealloc(left, MAXLIN * sizeof *left);
219 memset(&left[omaxlin], 0, incr * sizeof *left);
220 right = erealloc(right, MAXLIN * sizeof *right);
221 memset(&right[omaxlin], 0, incr * sizeof *right);
222 parent = erealloc(parent, MAXLIN * sizeof *parent);
223 memset(&parent[omaxlin], 0, incr * sizeof *parent);
224 foll = erealloc(foll, MAXLIN * sizeof *foll);
225 memset(&foll[omaxlin], 0, incr * sizeof *foll);
226 tmpstat = erealloc(tmpstat, MAXLIN * sizeof *tmpstat);
227 memset(&tmpstat[omaxlin], 0, incr * sizeof *tmpstat);
228 initstat = erealloc(initstat, MAXLIN * sizeof *initstat);
229 memset(&initstat[omaxlin], 0, incr * sizeof *initstat);
230 }
231
232 static void
more_positions(unsigned need)233 more_positions(unsigned need)
234 {
235 unsigned omaxpos = MAXPOS;
236 unsigned incr;
237 incr = need - MAXPOS + 48;
238 MAXPOS += incr;
239 positions = erealloc(positions, MAXPOS * sizeof *positions);
240 memset(&positions[omaxpos], 0, incr * sizeof *positions);
241 }
242
243 static int
yylex(void)244 yylex(void) {
245 int cclcnt, x;
246 register int c;
247 switch(c = nextch()) {
248 case '$':
249 case '^': c = '\n';
250 goto defchar;
251 case '|': return (OR);
252 case '*': return (STAR);
253 case '+': return (PLUS);
254 case '?': return (QUEST);
255 case '(': return (c);
256 case ')': return (c);
257 case '.': return (DOT);
258 case EOF: return (0);
259 case '\0': return (0);
260 case '\n': return (OR);
261 case '[':
262 x = CCL;
263 cclcnt = 0;
264 count = nxtchar++;
265 if ((c = nextch()) == '^') {
266 x = NCCL;
267 c = nextch();
268 }
269 do {
270 if (c == '\0' || c == EOF) synerror();
271 if (nxtchar >= MAXCHARS) more_chars(nxtchar);
272 chars[nxtchar++] = c;
273 cclcnt++;
274 } while ((c = nextch()) != ']');
275 chars[count] = cclcnt;
276 return (x);
277 case '\\':
278 if ((c = nextch()) == EOF) synerror();
279 defchar:
280 default:
281 if (mbcode && c & ~(wchar_t)0177) {
282 if (nxtchar >= MAXCHARS) more_chars(nxtchar);
283 chars[nxtchar] = c;
284 yylval = nxtchar++;
285 return (MCHAR);
286 }
287 yylval = c; return (CHAR);
288 }
289 }
290
291 static void
synerror(void)292 synerror(void) {
293 yyerror("syntax error");
294 }
295
296 static int
enter(int x)297 enter(int x) {
298 if(line >= MAXLIN) more_lines(line);
299 name[line] = x;
300 left[line] = 0;
301 right[line] = 0;
302 return(line++);
303 }
304
305 static int
cclenter(int x)306 cclenter(int x) {
307 register int linno;
308 linno = enter(x);
309 right[linno] = count;
310 return (linno);
311 }
312
313 static int
menter(int x)314 menter(int x) {
315 register int linno;
316 linno = enter(MCHAR);
317 right[linno] = x;
318 return (linno);
319 }
320
321 static int
node(int x,int l,int r)322 node(int x, int l, int r) {
323 if(line >= MAXLIN) more_lines(line);
324 name[line] = x;
325 left[line] = l;
326 right[line] = r;
327 parent[l] = line;
328 parent[r] = line;
329 return(line++);
330 }
331
332 static int
unary(int x,int d)333 unary(int x, int d) {
334 if(line >= MAXLIN) more_lines(line);
335 name[line] = x;
336 left[line] = d;
337 right[line] = 0;
338 parent[d] = line;
339 return(line++);
340 }
341
342 static void
cfoll(int v)343 cfoll(int v) {
344 register int i;
345 if (left[v] == 0) {
346 count = 0;
347 for (i=1; i<=line; i++) tmpstat[i] = 0;
348 follow(v);
349 add(foll, v);
350 }
351 else if (right[v] == 0) cfoll(left[v]);
352 else {
353 cfoll(left[v]);
354 cfoll(right[v]);
355 }
356 }
357
358 static int
cgotofn(int s,int c)359 cgotofn(int s, int c)
360 {
361 register int cc, i, k;
362 int n, pos;
363 int st;
364 int curpos, num;
365 int number, newpos;
366 n = lastn;
367 cc = iflag ? mbcode && c & ~(wchar_t)0177 ? towlower(c):tolower(c) : c;
368 num = positions[state[s]];
369 count = icount;
370 for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
371 pos = state[s] + 1;
372 for (i=0; i<num; i++) {
373 curpos = positions[pos];
374 if ((k = name[curpos]) >= 0)
375 if (
376 ((cc & ~(wchar_t)(NCHARS-1)) == 0 && k == cc)
377 || (k == MCHAR && cc == chars[right[curpos]])
378 || (cc != '\n' && cc != WEOF && ((k == DOT)
379 || (k == CCL && member(cc, right[curpos], 1))
380 || (k == NCCL && member(cc, right[curpos], 0))))
381 ) {
382 if (foll[curpos] < MAXPOS)
383 number = positions[foll[curpos]];
384 else
385 number = 0;
386 newpos = foll[curpos] + 1;
387 for (k=0; k<number; k++) {
388 if (tmpstat[positions[newpos]] != 1) {
389 tmpstat[positions[newpos]] = 1;
390 count++;
391 }
392 newpos++;
393 }
394 }
395 pos++;
396 }
397 if (notin(n)) {
398 if (++n >= NSTATES) {
399 n = gotofn[0]['\n'];
400 memset(gotofn, 0, sizeof gotofn);
401 gotofn[0]['\n'] = n;
402 memset(&out[n], 0, sizeof out - n);
403 nxtpos = initpos;
404 st = n + 1;
405 }
406 else {
407 st = n + 1;
408 if ((c & ~(wchar_t)(NCHARS-1)) == 0)
409 gotofn[s][c] = st;
410 }
411 add(state, n);
412 if (tmpstat[line] == 1) out[n] = 1;
413 }
414 else {
415 st = xstate + 1;
416 if ((c & ~(wchar_t)(NCHARS-1)) == 0)
417 gotofn[s][c] = st;
418 }
419 lastn = n;
420 return (st);
421 }
422
423 static void
igotofn(void)424 igotofn(void)
425 {
426 int n;
427 count = 0;
428 for (n=3; n<=line; n++) tmpstat[n] = 0;
429 if (cstate(line-1)==0) {
430 tmpstat[line] = 1;
431 count++;
432 out[0] = 1;
433 }
434 for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
435 count--; /*leave out position 1 */
436 icount = count;
437 tmpstat[1] = 0;
438 add(state, 0);
439 cgotofn(0, '\n');
440 initpos = nxtpos;
441 }
442
443 static int
cstate(int v)444 cstate(int v) {
445 register int b;
446 if (left[v] == 0) {
447 if (tmpstat[v] != 1) {
448 tmpstat[v] = 1;
449 count++;
450 }
451 return(1);
452 }
453 else if (right[v] == 0) {
454 if (cstate(left[v]) == 0) return (0);
455 else if (name[v] == PLUS) return (1);
456 else return (0);
457 }
458 else if (name[v] == CAT) {
459 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
460 else return (1);
461 }
462 else { /* name[v] == OR */
463 b = cstate(right[v]);
464 if (cstate(left[v]) == 0 || b == 0) return (0);
465 else return (1);
466 }
467 }
468
469 static int
member(int symb,int set,int torf)470 member(int symb, int set, int torf) {
471 register int num, pos, lastc = WEOF;
472 num = chars[set];
473 pos = set + 1;
474 num += pos;
475 while (pos < num) {
476 if (chars[pos] == '-' && lastc != WEOF) {
477 if (++pos == num) /* System V oddity: '[a-]' */
478 return (!torf); /* matches 'a' but not '-' */
479 if (symb > lastc && symb < chars[pos])
480 return (torf);
481 }
482 if (symb == chars[pos]) return (torf);
483 lastc = chars[pos++];
484 }
485 return (!torf);
486 }
487
488 static int
notin(int n)489 notin(int n) {
490 register int i, j, pos;
491 for (i=0; i<=n; i++) {
492 if (positions[state[i]] == count) {
493 pos = state[i] + 1;
494 for (j=0; j < count; j++)
495 if (tmpstat[positions[pos++]] != 1) goto nxt;
496 xstate = i;
497 return (0);
498 }
499 nxt: ;
500 }
501 return (1);
502 }
503
504 static void
add(int * array,int n)505 add(int *array, int n) {
506 register int i;
507 unsigned need;
508 if ((need = nxtpos + count) >= MAXPOS) more_positions(need);
509 array[n] = nxtpos;
510 positions[nxtpos++] = count;
511 for (i=3; i <= line; i++) {
512 if (tmpstat[i] == 1) {
513 if (nxtpos >= MAXPOS) more_positions(nxtpos);
514 positions[nxtpos++] = i;
515 }
516 }
517 }
518
519 static void
follow(int v)520 follow(int v) {
521 int p;
522 if (v == line) return;
523 p = parent[v];
524 switch(name[p]) {
525 case STAR:
526 case PLUS: cstate(v);
527 follow(p);
528 return;
529
530 case OR:
531 case QUEST: follow(p);
532 return;
533
534 case CAT: if (v == left[p]) {
535 if (cstate(right[p]) == 0) {
536 follow(p);
537 return;
538 }
539 }
540 else follow(p);
541 return;
542 case FINAL: if (tmpstat[line] != 1) {
543 tmpstat[line] = 1;
544 count++;
545 }
546 return;
547 }
548 }
549
550 static void
eg_build(void)551 eg_build(void)
552 {
553 extern int yyparse(void);
554
555 more_chars(0);
556 more_lines(0);
557 more_positions(0);
558 yyparse();
559 cfoll(line-1);
560 igotofn();
561 range = mbcode ? eg_rangew : eg_range;
562 }
563
564 static int
eg_matchw(const char * str,size_t sz)565 eg_matchw(const char *str, size_t sz)
566 {
567 register const char *p;
568 register int cstat, nstat;
569 wint_t wc;
570 int n;
571
572 p = str;
573 cstat = gotofn[0]['\n']-1;
574 while (out[cstat] == 0 && p < &str[sz]) {
575 if (*p & 0200) {
576 if ((n = mbtowi(&wc, p, &str[sz] - p)) < 0) {
577 p++;
578 wc = WEOF;
579 }
580 else p += n;
581 }
582 else wc = *p++;
583 if ((wc & ~(wchar_t)(NCHARS-1)) != 0 ||
584 (nstat = gotofn[cstat][wc]) == 0)
585 nstat = cgotofn(cstat, wc);
586 cstat = nstat-1;
587 }
588 if (out[cstat] == 0 && p == &str[sz])
589 cstat = gotofn[cstat]['\n']-1;
590 return (out[cstat]);
591 }
592
593 static int
eg_match(const char * str,size_t sz)594 eg_match(const char *str, size_t sz)
595 {
596 register const char *p;
597 register int cstat, nstat;
598
599 p = str;
600 cstat = gotofn[0]['\n']-1;
601 while (out[cstat] == 0 && p < &str[sz]) {
602 if ((nstat = gotofn[cstat][*p++ & 0377]) == 0)
603 nstat = cgotofn(cstat, p[-1] & 0377);
604 cstat = nstat-1;
605 }
606 if (out[cstat] == 0 && p == &str[sz])
607 cstat = gotofn[cstat]['\n']-1;
608 return (out[cstat]);
609 }
610
611 static int
eg_range(struct iblok * ip,char * last)612 eg_range(struct iblok *ip, char *last)
613 {
614 register char *p;
615 register int cstat, nstat;
616 int istat;
617
618 p = ip->ib_cur;
619 lineno++;
620 istat = cstat = gotofn[0]['\n']-1;
621 if (out[cstat]) goto found;
622 for (;;) {
623 if ((nstat = gotofn[cstat][*p & 0377]) == 0)
624 nstat = cgotofn(cstat, *p & 0377);
625 cstat = nstat-1;
626 if (out[cstat]) {
627 found: for (;;) {
628 if (vflag == 0) {
629 succeed: outline(ip, last, p - ip->ib_cur);
630 if (qflag || lflag)
631 return (1);
632 }
633 else {
634 ip->ib_cur = p;
635 while (*ip->ib_cur++ != '\n');
636 }
637 if ((p = ip->ib_cur) > last)
638 return (0);
639 lineno++;
640 if ((out[(cstat=istat)]) == 0) goto brk2;
641 }
642 }
643 if (*p++ == '\n') {
644 if (vflag) {
645 p--;
646 goto succeed;
647 }
648 if ((ip->ib_cur = p) > last)
649 return (0);
650 lineno++;
651 if (out[(cstat=istat)]) goto found;
652 }
653 brk2: ;
654 }
655 }
656
657 static int
eg_rangew(struct iblok * ip,char * last)658 eg_rangew(struct iblok *ip, char *last)
659 {
660 register char *p;
661 register int cstat, nstat;
662 int istat;
663 wint_t wc;
664 int n;
665
666 p = ip->ib_cur;
667 lineno++;
668 istat = cstat = gotofn[0]['\n']-1;
669 if (out[cstat]) goto found;
670 for (;;) {
671 if (*p & 0200) {
672 if ((n = mbtowi(&wc, p, last + 1 - p)) < 0) {
673 n = 1;
674 wc = WEOF;
675 }
676 }
677 else {
678 wc = *p;
679 n = 1;
680 }
681 if ((wc & ~(wchar_t)(NCHARS-1)) != 0 ||
682 (nstat = gotofn[cstat][wc]) == 0)
683 nstat = cgotofn(cstat, wc);
684 cstat = nstat-1;
685 if (out[cstat]) {
686 found: for (;;) {
687 if (vflag == 0) {
688 succeed: outline(ip, last, p - ip->ib_cur);
689 if (qflag || lflag)
690 return (1);
691 }
692 else {
693 ip->ib_cur = p;
694 while (*ip->ib_cur++ != '\n');
695 }
696 if ((p = ip->ib_cur) > last)
697 return (0);
698 lineno++;
699 if ((out[(cstat=istat)]) == 0) goto brk2;
700 }
701 }
702 p += n;
703 if (wc == '\n') {
704 if (vflag) {
705 p--;
706 goto succeed;
707 }
708 if ((ip->ib_cur = p) > last)
709 return (0);
710 lineno++;
711 if (out[(cstat=istat)]) goto found;
712 }
713 brk2: ;
714 }
715 }
716
717 void
misop(void)718 misop(void)
719 {
720 synerror();
721 }
722
723 void
init(void)724 init(void)
725 {
726 Eflag = 1;
727 eg_select();
728 options = "bce:f:hilnrRvyz";
729 }
730
731 void
eg_select(void)732 eg_select(void)
733 {
734 build = eg_build;
735 match = mbcode ? eg_matchw : eg_match;
736 matchflags &= ~(MF_NULTERM|MF_LOCONV);
737 }
738
739 /*
740 * Some dummy routines for sharing source with other grep flavors.
741 */
742 void
ac_select(void)743 ac_select(void)
744 {
745 }
746
747 void
st_select(void)748 st_select(void)
749 {
750 }
751
752 void
rc_select(void)753 rc_select(void)
754 {
755 }
756
757 char *usagemsg =
758 "usage: %s [ -bchilnv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ...\n";
759 char *stdinmsg;
760