1 /* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
2 /* multipattern matcher */
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <ctype.h>
6 #include <string.h>
7 #include <unistd.h>
8 #define MAXPAT 256
9 #define MAXLINE 1024
10 #define MAXSYM 256
11 #define MAXMEMBER1 4096
12 #define MAXPATFILE 260000
13 #define BLOCKSIZE 8192
14 #define MAXHASH 8192
15 #define mm 8191
16 #define max_num 30000
17 #define W_DELIM 128
18 #define L_DELIM 10
19
20 void countline(unsigned char *text, int len);
21 void monkey1( register unsigned char *text, int start, int end );
22 void m_short( register unsigned char *text, int start, int end );
23 void f_prep(int pat_index, unsigned char *Pattern);
24
25 extern int COUNT, FNAME, SILENT, FILENAMEONLY, num_of_matched;
26 extern int INVERSE;
27 extern int WORDBOUND, WHOLELINE, NOUPPER;
28 extern unsigned char CurrentFileName[], Progname[];
29 extern int total_line;
30
31 int LONG = 0;
32 int SHORT = 0;
33 int p_size=0;
34 unsigned char SHIFT1[MAXMEMBER1];
35 unsigned char tr[MAXSYM];
36 unsigned char tr1[MAXSYM];
37 struct pat_list {
38 int index;
39 struct pat_list *next;
40 } *HASH[MAXHASH];
41 struct pat_list *pt, *qt;
42 unsigned char buf[MAXPATFILE+BLOCKSIZE];
43 unsigned char pat_spool[MAXPATFILE+2*max_num+MAXPAT];
44 unsigned char *patt[max_num];
45 unsigned char pat_len[max_num];
46
47
prepf(fp)48 void prepf(fp)
49 int fp;
50 {
51 int length=0, i, p=1, pdx=0, num_pat;
52 unsigned char *pat_ptr=pat_spool, temp[10];
53 unsigned Mask = 15;
54 int num_read;
55
56 while((num_read = read(fp, buf+length, BLOCKSIZE)) > 0) {
57 length = length + num_read;
58 if(length > MAXPATFILE) {
59 fprintf(stderr, "%s: maximum pattern file size is %d\n", Progname, MAXPATFILE);
60 exit(2);
61 }
62 }
63 buf[length] = '\n';
64 i = 0; p=1;
65 while(i<length) {
66 patt[p] = pat_ptr;
67 if(WORDBOUND) *pat_ptr++ = W_DELIM;
68 if(WHOLELINE) *pat_ptr++ = L_DELIM;
69 while((*pat_ptr = buf[i++]) != '\n') pat_ptr++;
70 if(WORDBOUND) *pat_ptr++ = W_DELIM;
71 if(WHOLELINE) *pat_ptr++ = L_DELIM; /* Can't be both on */
72 *pat_ptr++ = 0;
73 p++;
74 }
75 if(p>max_num) {
76 fprintf(stderr, "%s: maximum number of patterns is %d\n", Progname, max_num);
77 exit(2);
78 }
79 for(i=1; i<20; i++) *pat_ptr = i; /* boundary safety zone */
80 for(i=0; i< MAXSYM; i++) tr[i] = i;
81 if(NOUPPER) {
82 for(i='A'; i<= 'Z'; i++) tr[i] = i + 'a' - 'A';
83 }
84 if(WORDBOUND) {
85 for(i=0; i<128; i++) if(!isalnum(i)) tr[i] = W_DELIM;
86 }
87 for(i=0; i< MAXSYM; i++) tr1[i] = tr[i]&Mask;
88 num_pat = p-1;
89 p_size = MAXPAT;
90 for(i=1 ; i <= num_pat; i++) {
91 p = strlen(patt[i]);
92 pat_len[i] = p;
93 if(p!=0 && p < p_size) p_size = p;
94 }
95 if(p_size == 0) {
96 fprintf(stderr, "%s: the pattern file is empty\n", Progname);
97 exit(2);
98 }
99 if(length > 400 && p_size > 2) LONG = 1;
100 if(p_size == 1) SHORT = 1;
101 for(i=0; i<MAXMEMBER1; i++) SHIFT1[i] = p_size - 2;
102 for(i=0; i<MAXHASH; i++) {
103 HASH[i] = 0;
104 }
105 for(i=1; i<= num_pat; i++) f_prep(i, patt[i]);
106 }
107
108
mgrep(fd)109 void mgrep(fd)
110 int fd;
111 {
112 register char r_newline = '\n';
113 unsigned char text[2*BLOCKSIZE+MAXLINE];
114 register int buf_end, num_read, i=0, j, start, end, residue = 0;
115
116 text[MAXLINE-1] = '\n'; /* initial case */
117 start = MAXLINE-1;
118
119 while( (num_read = read(fd, text+MAXLINE, BLOCKSIZE)) > 0)
120 {
121 if(INVERSE && COUNT) countline(text+MAXLINE, num_read);
122 buf_end = end = MAXLINE + num_read -1 ;
123 while(text[end] != r_newline && end > MAXLINE) end--;
124 residue = buf_end - end + 1 ;
125 text[start-1] = r_newline;
126 if(SHORT) m_short(text, start, end);
127 else monkey1(text, start, end);
128 if(FILENAMEONLY && num_of_matched) {
129 printf("%s\n", CurrentFileName);
130 return;
131 }
132 start = MAXLINE - residue;
133 if(start < 0) {
134 start = 1;
135 }
136 strncpy(text+start, text+end, residue);
137 } /* end of while(num_read = ... */
138 text[MAXLINE] = '\n';
139 text[start-1] = '\n';
140 if(residue > 1) {
141 if(SHORT) m_short(text, start, end);
142 else monkey1(text, start, end);
143 }
144 return;
145 } /* end mgrep */
146
countline(text,len)147 void countline(text, len)
148 unsigned char *text; int len;
149 {
150 int i;
151 for (i=0; i<len; i++) if(text[i] == '\n') total_line++;
152 }
153
154
monkey1(text,start,end)155 void monkey1( text, start, end )
156 int start, end; register unsigned char *text;
157 {
158 register unsigned char *textend;
159 register unsigned hash, i;
160 register unsigned char shift;
161 register int m1, j, Long=LONG;
162 int pat_index, m=p_size;
163 int MATCHED=0;
164 register unsigned char *qx;
165 register struct pat_list *p;
166 unsigned char *lastout;
167 int OUT=0;
168
169 textend = text + end;
170 m1 = m - 1;
171 lastout = text+start+1;
172 text = text + start + m1 ;
173 while (text <= textend) {
174 hash=tr1[*text];
175 hash=(hash<<4)+(tr1[*(text-1)]);
176 if(Long) hash=(hash<<4)+(tr1[*(text-2)]);
177 shift = SHIFT1[hash];
178 if(shift == 0) {
179 hash=0;
180 for(i=0;i<=m1;i++) {
181 hash=(hash<<4)+(tr1[*(text-i)]);
182 }
183 hash=hash&mm;
184 p = HASH[hash];
185 while(p != 0) {
186 pat_index = p->index;
187 p = p->next;
188 qx = text-m1;
189 j = 0;
190 while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
191 if (j > m1 ) {
192 if(pat_len[pat_index] <= j) {
193 if(text > textend) return;
194 num_of_matched++;
195 if(FILENAMEONLY || SILENT) return;
196 MATCHED=1;
197 if(COUNT) {
198 while (*text != '\n') text++;
199 }
200 else {
201 if(!INVERSE) {
202 if(FNAME) printf("%s: ",CurrentFileName);
203 while(*(--text) != '\n');
204 while(*(++text) != '\n') putchar(*text);
205 printf("\n");
206 }
207 else {
208 if(FNAME) printf("%s: ",CurrentFileName);
209 while(*(--text) != '\n');
210 if(lastout < text) OUT=1;
211 while(lastout < text) putchar(*lastout++);
212 if(OUT) {
213 putchar('\n');
214 OUT=0;
215 }
216 while(*(++text) != '\n');
217 lastout=text+1;
218 }
219 }
220 /*
221 else {
222 if(FNAME) printf("%s: ",CurrentFileName);
223 while(*(--text) != '\n');
224 while(*(++text) != '\n') putchar(*text);
225 printf("\n");
226 }
227 */
228 }
229 }
230 if(MATCHED) break;
231 }
232 if(!MATCHED) shift = 1;
233 else {
234 MATCHED = 0;
235 shift = m1;
236 }
237 }
238 text = text + shift;
239 }
240 if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++);
241 }
242
m_short(text,start,end)243 void m_short( text, start, end )
244 int start, end; register unsigned char *text;
245 {
246 register unsigned char *textend;
247 register unsigned i;
248 register int j;
249 register struct pat_list *p, *q;
250 register int pat_index;
251 int MATCHED=0;
252 int OUT=0;
253 unsigned char *lastout;
254 unsigned char *qx;
255
256 textend = text + end;
257 lastout = text + start + 1;
258 text = text + start - 1 ;
259 while (++text <= textend) {
260 p = HASH[*text];
261 while(p != 0) {
262 pat_index = p->index;
263 p = p->next;
264 qx = text;
265 j = 0;
266 while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
267 if(pat_len[pat_index] <= j) {
268 if(text >= textend) return;
269 num_of_matched++;
270 if(FILENAMEONLY || SILENT) return;
271 if(COUNT) {
272 while (*text != '\n') text++;
273 }
274 else {
275 if(FNAME) printf("%s: ",CurrentFileName);
276 if(!INVERSE) {
277 while(*(--text) != '\n');
278 while(*(++text) != '\n') putchar(*text);
279 printf("\n");
280 MATCHED = 1;
281 }
282 else {
283 while(*(--text) != '\n');
284 if(lastout < text) OUT=1;
285 while(lastout < text) putchar(*lastout++);
286 if(OUT) {
287 putchar('\n');
288 OUT=0;
289 }
290 while(*(++text) != '\n');
291 lastout=text+1;
292 MATCHED = 1;
293 }
294 }
295 }
296 if(MATCHED) break;
297 }
298 MATCHED = 0;
299 } /* while */
300 if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++);
301 }
302
f_prep(pat_index,Pattern)303 void f_prep(pat_index, Pattern)
304 unsigned char *Pattern ; int pat_index;
305 {
306 int i, j, m;
307 register unsigned hash, Mask=15;
308 m = p_size;
309 for (i = m-1; i>=(1+LONG); i--) {
310 hash = (Pattern[i] & Mask);
311 hash = (hash << 4) + (Pattern[i-1]& Mask);
312 if(LONG) hash = (hash << 4) + (Pattern[i-2] & Mask);
313 if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i;
314 }
315 if(SHORT) Mask = 255; /* 011111111 */
316 hash = 0;
317 for(i = m-1; i>=0; i--) {
318 hash = (hash << 4) + (tr[Pattern[i]]&Mask);
319 }
320 /*
321 if(INVERSE) hash = Pattern[1];
322 */
323 hash=hash&mm;
324 qt = (struct pat_list *) malloc(sizeof(struct pat_list));
325 qt->index = pat_index;
326 pt = HASH[hash];
327 qt->next = pt;
328 HASH[hash] = qt;
329 }
330
331
332