1 /* Copyright (c) 1991 Sun Wu and Udi Manber.  All Rights Reserved. */
2 /* multipattern matcher */
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <ctype.h>
6 #include <string.h>
7 #include <unistd.h>
8 #define MAXPAT  256
9 #define MAXLINE 1024
10 #define MAXSYM  256
11 #define MAXMEMBER1 4096
12 #define MAXPATFILE 260000
13 #define BLOCKSIZE  8192
14 #define MAXHASH    8192
15 #define mm 	   8191
16 #define max_num    30000
17 #define W_DELIM	   128
18 #define L_DELIM    10
19 
20 void countline(unsigned char *text, int len);
21 void monkey1( register unsigned char *text, int start, int end );
22 void m_short( register unsigned char *text, int start, int end );
23 void f_prep(int pat_index, unsigned char *Pattern);
24 
25 extern int COUNT, FNAME, SILENT, FILENAMEONLY, num_of_matched;
26 extern int INVERSE;
27 extern int WORDBOUND, WHOLELINE, NOUPPER;
28 extern unsigned char  CurrentFileName[], Progname[];
29 extern int total_line;
30 
31 int LONG  = 0;
32 int SHORT = 0;
33 int p_size=0;
34 unsigned char SHIFT1[MAXMEMBER1];
35 unsigned char tr[MAXSYM];
36 unsigned char tr1[MAXSYM];
37 struct pat_list {
38 	int  index;
39 	struct pat_list *next;
40 } *HASH[MAXHASH];
41 struct pat_list  *pt, *qt;
42 unsigned char buf[MAXPATFILE+BLOCKSIZE];
43 unsigned char pat_spool[MAXPATFILE+2*max_num+MAXPAT];
44 unsigned char *patt[max_num];
45 unsigned char pat_len[max_num];
46 
47 
prepf(fp)48 void prepf(fp)
49 int fp;
50 {
51     int length=0, i, p=1, pdx=0, num_pat;
52     unsigned char *pat_ptr=pat_spool, temp[10];
53     unsigned Mask = 15;
54     int num_read;
55 
56     while((num_read = read(fp, buf+length, BLOCKSIZE)) > 0) {
57 	length = length + num_read;
58 	if(length > MAXPATFILE) {
59 		fprintf(stderr, "%s: maximum pattern file size is %d\n", Progname, MAXPATFILE);
60 		exit(2);
61 	}
62     }
63     buf[length] = '\n';
64     i = 0; p=1;
65     while(i<length) {
66 	patt[p] = pat_ptr;
67 	if(WORDBOUND) *pat_ptr++ = W_DELIM;
68 	if(WHOLELINE) *pat_ptr++ = L_DELIM;
69 	while((*pat_ptr = buf[i++]) != '\n') pat_ptr++;
70 	if(WORDBOUND) *pat_ptr++ = W_DELIM;
71 	if(WHOLELINE) *pat_ptr++ = L_DELIM;           /* Can't be both on */
72 	*pat_ptr++ = 0;
73 	p++;
74     }
75     if(p>max_num) {
76 	fprintf(stderr, "%s: maximum number of patterns is %d\n", Progname, max_num);
77 	exit(2);
78     }
79     for(i=1; i<20; i++) *pat_ptr = i;  /* boundary safety zone */
80     for(i=0; i< MAXSYM; i++) tr[i] = i;
81     if(NOUPPER) {
82 	for(i='A'; i<= 'Z'; i++) tr[i] = i + 'a' - 'A';
83     }
84     if(WORDBOUND) {
85 	for(i=0; i<128; i++) if(!isalnum(i)) tr[i] = W_DELIM;
86     }
87     for(i=0; i< MAXSYM; i++) tr1[i] = tr[i]&Mask;
88     num_pat = p-1;
89     p_size = MAXPAT;
90     for(i=1 ; i <= num_pat; i++) {
91 	p = strlen(patt[i]);
92 	pat_len[i] = p;
93 	if(p!=0 && p < p_size) p_size = p;
94     }
95     if(p_size == 0) {
96 	fprintf(stderr, "%s: the pattern file is empty\n", Progname);
97 	exit(2);
98     }
99     if(length > 400 && p_size > 2) LONG = 1;
100     if(p_size == 1) SHORT = 1;
101     for(i=0; i<MAXMEMBER1; i++) SHIFT1[i] = p_size - 2;
102     for(i=0; i<MAXHASH; i++) {
103 	HASH[i] = 0;
104     }
105     for(i=1; i<= num_pat; i++) f_prep(i, patt[i]);
106 }
107 
108 
mgrep(fd)109 void mgrep(fd)
110 int fd;
111 {
112     register char r_newline = '\n';
113     unsigned char text[2*BLOCKSIZE+MAXLINE];
114     register int buf_end, num_read, i=0, j, start, end, residue = 0;
115 
116     text[MAXLINE-1] = '\n';  /* initial case */
117     start = MAXLINE-1;
118 
119     while( (num_read = read(fd, text+MAXLINE, BLOCKSIZE)) > 0)
120     {
121        if(INVERSE && COUNT) countline(text+MAXLINE, num_read);
122        buf_end = end = MAXLINE + num_read -1 ;
123        while(text[end]  != r_newline && end > MAXLINE) end--;
124        residue = buf_end - end  + 1 ;
125        text[start-1] = r_newline;
126        if(SHORT) m_short(text, start, end);
127        else      monkey1(text, start, end);
128        if(FILENAMEONLY && num_of_matched) {
129 		printf("%s\n", CurrentFileName);
130 		return;
131        }
132        start = MAXLINE - residue;
133        if(start < 0) {
134             start = 1;
135        }
136        strncpy(text+start, text+end, residue);
137     } /* end of while(num_read = ... */
138     text[MAXLINE] = '\n';
139     text[start-1] = '\n';
140     if(residue > 1) {
141         if(SHORT) m_short(text, start, end);
142         else      monkey1(text, start, end);
143     }
144     return;
145 } /* end mgrep */
146 
countline(text,len)147 void countline(text, len)
148 unsigned char *text; int len;
149 {
150 int i;
151 	for (i=0; i<len; i++) if(text[i] == '\n') total_line++;
152 }
153 
154 
monkey1(text,start,end)155 void monkey1( text, start, end  )
156 int start, end; register unsigned char *text;
157 {
158 register unsigned char *textend;
159 register unsigned hash, i;
160 register unsigned char shift;
161 register int  m1, j, Long=LONG;
162 int pat_index, m=p_size;
163 int MATCHED=0;
164 register unsigned char *qx;
165 register struct pat_list *p;
166 unsigned char *lastout;
167 int OUT=0;
168 
169 textend = text + end;
170 m1 = m - 1;
171 lastout = text+start+1;
172 text = text + start + m1 ;
173 while (text <= textend) {
174 	hash=tr1[*text];
175 	hash=(hash<<4)+(tr1[*(text-1)]);
176 	if(Long) hash=(hash<<4)+(tr1[*(text-2)]);
177 	shift = SHIFT1[hash];
178 	if(shift == 0) {
179 		hash=0;
180 		for(i=0;i<=m1;i++)  {
181 		    hash=(hash<<4)+(tr1[*(text-i)]);
182 		}
183 		hash=hash&mm;
184 		p = HASH[hash];
185 		while(p != 0) {
186 			pat_index = p->index;
187 			p = p->next;
188 			qx = text-m1;
189 			j = 0;
190 			while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
191 	        	if (j > m1 ) {
192 			   if(pat_len[pat_index] <= j) {
193 				if(text > textend) return;
194                 		num_of_matched++;
195                 		if(FILENAMEONLY || SILENT)  return;
196 				MATCHED=1;
197 	        		if(COUNT) {
198 			  		while (*text != '\n') text++;
199 				}
200 				else {
201 				    if(!INVERSE) {
202 			  		if(FNAME) printf("%s: ",CurrentFileName);
203                           		while(*(--text) != '\n');
204                           		while(*(++text) != '\n') putchar(*text);
205 			  		printf("\n");
206 				    }
207 				    else {
208 			  		if(FNAME) printf("%s: ",CurrentFileName);
209                           		while(*(--text) != '\n');
210 					if(lastout < text) OUT=1;
211 					while(lastout < text) putchar(*lastout++);
212 					if(OUT) {
213 						putchar('\n');
214 						OUT=0;
215 					}
216                           		while(*(++text) != '\n');
217 					lastout=text+1;
218 				    }
219 				}
220 /*
221 				else {
222 			  		if(FNAME) printf("%s: ",CurrentFileName);
223                           		while(*(--text) != '\n');
224                           		while(*(++text) != '\n') putchar(*text);
225 			  		printf("\n");
226 				}
227 */
228 			   }
229                 	}
230 			if(MATCHED) break;
231 		}
232 		if(!MATCHED) shift = 1;
233 		else {
234 			MATCHED = 0;
235 			shift = m1;
236 		}
237         }
238 	text = text + shift;
239   }
240   if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++);
241 }
242 
m_short(text,start,end)243 void m_short( text, start, end  )
244 int start, end; register unsigned char *text;
245 {
246 register unsigned char *textend;
247 register unsigned i;
248 register int  j;
249 register struct pat_list *p, *q;
250 register int pat_index;
251 int MATCHED=0;
252 int OUT=0;
253 unsigned char *lastout;
254 unsigned char *qx;
255 
256 textend = text + end;
257 lastout = text + start + 1;
258 text = text + start - 1 ;
259 while (++text <= textend) {
260 		p = HASH[*text];
261 		while(p != 0) {
262 			pat_index = p->index;
263 			p = p->next;
264 			qx = text;
265 			j = 0;
266 			while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
267 			if(pat_len[pat_index] <= j) {
268 				if(text >= textend) return;
269                 		num_of_matched++;
270                 		if(FILENAMEONLY || SILENT)  return;
271 	        		if(COUNT) {
272 			  		while (*text != '\n') text++;
273 				}
274 				else {
275 			  	    if(FNAME) printf("%s: ",CurrentFileName);
276 				    if(!INVERSE) {
277                           		while(*(--text) != '\n');
278                           		while(*(++text) != '\n') putchar(*text);
279 			  		printf("\n");
280 					MATCHED = 1;
281 				    }
282 				    else {
283                           		while(*(--text) != '\n');
284 					if(lastout < text) OUT=1;
285 					while(lastout < text) putchar(*lastout++);
286 					if(OUT) {
287 						putchar('\n');
288 						OUT=0;
289 					}
290                           		while(*(++text) != '\n');
291 					lastout=text+1;
292 					MATCHED = 1;
293 				    }
294 				}
295                 	}
296 			if(MATCHED) break;
297 		}
298 		MATCHED = 0;
299   } /* while */
300   if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++);
301 }
302 
f_prep(pat_index,Pattern)303 void f_prep(pat_index, Pattern)
304 unsigned char *Pattern ; int pat_index;
305 {
306 int i, j, m;
307 register unsigned hash, Mask=15;
308 	m = p_size;
309 	for (i = m-1; i>=(1+LONG); i--) {
310 		hash = (Pattern[i] & Mask);
311 		hash = (hash << 4) + (Pattern[i-1]& Mask);
312 		if(LONG) hash = (hash << 4) + (Pattern[i-2] & Mask);
313 		if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i;
314 	}
315 	if(SHORT) Mask = 255;  /* 011111111 */
316 	hash = 0;
317 	for(i = m-1; i>=0; i--)  {
318 	    hash = (hash << 4) + (tr[Pattern[i]]&Mask);
319 	}
320 /*
321 	if(INVERSE) hash = Pattern[1];
322 */
323 	hash=hash&mm;
324 	qt = (struct pat_list *) malloc(sizeof(struct pat_list));
325 	qt->index = pat_index;
326 	pt = HASH[hash];
327 	qt->next = pt;
328 	HASH[hash] = qt;
329 }
330 
331 
332