1 /* This file is part of the Zebra server.
2 Copyright (C) 2004-2013 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18 */
19
20
21 #if HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <stdlib.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <assert.h>
29
30 #include <idzebra/util.h>
31 #include <yaz/yaz-util.h>
32 #include <dfa.h>
33 #include "imalloc.h"
34
35 char *prog;
36 static int show_line = 0;
37
38 typedef unsigned MatchWord;
39 #define WORD_BITS 32
40
41 typedef struct {
42 int n; /* no of MatchWord needed */
43 int range; /* max no. of errors */
44 MatchWord *Sc; /* Mask Sc */
45 } MatchContext;
46
47 #define INFBUF_SIZE 16384
48
49 #define INLINE
50
set_bit(MatchContext * mc,MatchWord * m,int ch,int state)51 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
52 {
53 int off = state & (WORD_BITS-1);
54 int wno = state / WORD_BITS;
55
56 m[mc->n * ch + wno] |= 1<<off;
57 }
58
reset_bit(MatchContext * mc,MatchWord * m,int ch,int state)59 static INLINE void reset_bit (MatchContext *mc, MatchWord *m, int ch,
60 int state)
61 {
62 int off = state & (WORD_BITS-1);
63 int wno = state / WORD_BITS;
64
65 m[mc->n * ch + wno] &= ~(1<<off);
66 }
67
get_bit(MatchContext * mc,MatchWord * m,int ch,int state)68 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
69 int state)
70 {
71 int off = state & (WORD_BITS-1);
72 int wno = state / WORD_BITS;
73
74 return m[mc->n * ch + wno] & (1<<off);
75 }
76
mk_MatchContext(struct DFA * dfa,int range)77 static MatchContext *mk_MatchContext (struct DFA *dfa, int range)
78 {
79 MatchContext *mc = imalloc (sizeof(*mc));
80 int i;
81
82 mc->n = (dfa->no_states+WORD_BITS) / WORD_BITS;
83 mc->range = range;
84 mc->Sc = icalloc (sizeof(*mc->Sc) * 256 * mc->n);
85
86 for (i=0; i<dfa->no_states; i++)
87 {
88 int j;
89 struct DFA_state *state = dfa->states[i];
90
91 for (j=0; j<state->tran_no; j++)
92 {
93 int ch;
94 int ch0 = state->trans[j].ch[0];
95 int ch1 = state->trans[j].ch[1];
96 assert (ch0 >= 0 && ch1 >= 0);
97
98 for (ch = ch0; ch <= ch1; ch++)
99 set_bit (mc, mc->Sc, ch, i);
100 }
101 }
102 return mc;
103 }
104
105
mask_shift(MatchContext * mc,MatchWord * Rdst,MatchWord * Rsrc,struct DFA * dfa,int ch)106 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
107 struct DFA *dfa, int ch)
108 {
109 int j, s = 0;
110 MatchWord *Rsrc_p = Rsrc, mask;
111
112 Rdst[0] = 1;
113 for (j = 1; j<mc->n; j++)
114 Rdst[j] = 0;
115 while (1)
116 {
117 mask = *Rsrc_p++;
118 for (j = 0; j<WORD_BITS/4; j++)
119 {
120 if (mask & 15)
121 {
122 if (mask & 1)
123 {
124 struct DFA_state *state = dfa->states[s];
125 int i = state->tran_no;
126 while (--i >= 0)
127 if (ch >= state->trans[i].ch[0] &&
128 ch <= state->trans[i].ch[1])
129 set_bit (mc, Rdst, 0, state->trans[i].to);
130 }
131 if (mask & 2)
132 {
133 struct DFA_state *state = dfa->states[s+1];
134 int i = state->tran_no;
135 while (--i >= 0)
136 if (ch >= state->trans[i].ch[0] &&
137 ch <= state->trans[i].ch[1])
138 set_bit (mc, Rdst, 0, state->trans[i].to);
139 }
140 if (mask & 4)
141 {
142 struct DFA_state *state = dfa->states[s+2];
143 int i = state->tran_no;
144 while (--i >= 0)
145 if (ch >= state->trans[i].ch[0] &&
146 ch <= state->trans[i].ch[1])
147 set_bit (mc, Rdst, 0, state->trans[i].to);
148 }
149 if (mask & 8)
150 {
151 struct DFA_state *state = dfa->states[s+3];
152 int i = state->tran_no;
153 while (--i >= 0)
154 if (ch >= state->trans[i].ch[0] &&
155 ch <= state->trans[i].ch[1])
156 set_bit (mc, Rdst, 0, state->trans[i].to);
157 }
158 }
159 s += 4;
160 if (s >= dfa->no_states)
161 return;
162 mask >>= 4;
163 }
164 }
165 }
166
shift(MatchContext * mc,MatchWord * Rdst,MatchWord * Rsrc,struct DFA * dfa)167 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
168 struct DFA *dfa)
169 {
170 int j, s = 0;
171 MatchWord *Rsrc_p = Rsrc, mask;
172 for (j = 0; j<mc->n; j++)
173 Rdst[j] = 0;
174 while (1)
175 {
176 mask = *Rsrc_p++;
177 for (j = 0; j<WORD_BITS/4; j++)
178 {
179 if (mask & 15)
180 {
181 if (mask & 1)
182 {
183 struct DFA_state *state = dfa->states[s];
184 int i = state->tran_no;
185 while (--i >= 0)
186 set_bit (mc, Rdst, 0, state->trans[i].to);
187 }
188 if (mask & 2)
189 {
190 struct DFA_state *state = dfa->states[s+1];
191 int i = state->tran_no;
192 while (--i >= 0)
193 set_bit (mc, Rdst, 0, state->trans[i].to);
194 }
195 if (mask & 4)
196 {
197 struct DFA_state *state = dfa->states[s+2];
198 int i = state->tran_no;
199 while (--i >= 0)
200 set_bit (mc, Rdst, 0, state->trans[i].to);
201 }
202 if (mask & 8)
203 {
204 struct DFA_state *state = dfa->states[s+3];
205 int i = state->tran_no;
206 while (--i >= 0)
207 set_bit (mc, Rdst, 0, state->trans[i].to);
208 }
209 }
210 s += 4;
211 if (s >= dfa->no_states)
212 return;
213 mask >>= 4;
214 }
215 }
216 }
217
or(MatchContext * mc,MatchWord * Rdst,MatchWord * Rsrc1,MatchWord * Rsrc2)218 static void or (MatchContext *mc, MatchWord *Rdst,
219 MatchWord *Rsrc1, MatchWord *Rsrc2)
220 {
221 int i;
222 for (i = 0; i<mc->n; i++)
223 Rdst[i] = Rsrc1[i] | Rsrc2[i];
224 }
225
226
go(MatchContext * mc,struct DFA * dfa,FILE * inf)227 static int go (MatchContext *mc, struct DFA *dfa, FILE *inf)
228 {
229 MatchWord *Rj, *Rj1, *Rj_a, *Rj_b, *Rj_c;
230 int s, d, ch;
231 int lineno = 1;
232 char *infbuf;
233 int inf_ptr = 1;
234 int no_match = 0;
235
236 infbuf = imalloc (INFBUF_SIZE);
237 infbuf[0] = '\n';
238 Rj = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
239 Rj1 = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
240 Rj_a = icalloc (mc->n * sizeof(*Rj));
241 Rj_b = icalloc (mc->n * sizeof(*Rj));
242 Rj_c = icalloc (mc->n * sizeof(*Rj));
243
244 set_bit (mc, Rj, 0, 0);
245 for (d = 1; d<=mc->range; d++)
246 {
247 int s;
248 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
249 for (s = 0; s<dfa->no_states; s++)
250 {
251 if (get_bit (mc, Rj, d-1, s))
252 {
253 struct DFA_state *state = dfa->states[s];
254 int i = state->tran_no;
255 while (--i >= 0)
256 set_bit (mc, Rj, d, state->trans[i].to);
257 }
258 }
259 }
260 while ((ch = getc (inf)) != EOF)
261 {
262 MatchWord *Rj_t;
263
264 infbuf[inf_ptr] = ch;
265 if (ch == '\n')
266 {
267 if (no_match)
268 {
269 int i = inf_ptr;
270 if (show_line)
271 printf ("%5d:", lineno);
272 do
273 {
274 if (--i < 0)
275 i = INFBUF_SIZE-1;
276 } while (infbuf[i] != '\n');
277 do
278 {
279 if (++i == INFBUF_SIZE)
280 i = 0;
281 putchar (infbuf[i]);
282 } while (infbuf[i] != '\n');
283 no_match = 0;
284 }
285 lineno++;
286 }
287 if (++inf_ptr == INFBUF_SIZE)
288 inf_ptr = 0;
289 mask_shift (mc, Rj1, Rj, dfa, ch);
290 for (d = 1; d <= mc->range; d++)
291 {
292 mask_shift (mc, Rj_b, Rj+d*mc->n, dfa, ch); /* 1 */
293
294 or (mc, Rj_a, Rj+(d-1)*mc->n, Rj1+(d-1)*mc->n); /* 2,3 */
295
296 shift (mc, Rj_c, Rj_a, dfa);
297
298 or (mc, Rj_a, Rj_b, Rj_c); /* 1,2,3*/
299
300 or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
301 }
302 for (s = 0; s<dfa->no_states; s++)
303 {
304 if (dfa->states[s]->rule_no)
305 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
306 no_match++;
307 }
308 for (d = 0; d <= mc->range; d++)
309 reset_bit (mc, Rj1+d*mc->n, 0, dfa->no_states);
310 Rj_t = Rj1;
311 Rj1 = Rj;
312 Rj = Rj_t;
313 }
314 ifree (Rj);
315 ifree (Rj1);
316 ifree (Rj_a);
317 ifree (Rj_b);
318 ifree (Rj_c);
319 ifree (infbuf);
320 return 0;
321 }
322
grep_file(struct DFA * dfa,const char * fname,int range)323 static int grep_file (struct DFA *dfa, const char *fname, int range)
324 {
325 FILE *inf;
326 MatchContext *mc;
327
328 if (fname)
329 {
330 inf = fopen (fname, "r");
331 if (!inf)
332 {
333 yaz_log (YLOG_FATAL|YLOG_ERRNO, "cannot open `%s'", fname);
334 exit (1);
335 }
336 }
337 else
338 inf = stdin;
339
340 mc = mk_MatchContext (dfa, range);
341
342 go (mc, dfa, inf);
343
344 if (fname)
345 fclose (inf);
346 return 0;
347 }
348
main(int argc,char ** argv)349 int main (int argc, char **argv)
350 {
351 int ret;
352 int range = 0;
353 char *arg;
354 const char *pattern = NULL;
355 int no_files = 0;
356 struct DFA *dfa = dfa_init();
357
358 prog = argv[0];
359 while ((ret = options ("nr:dsv:", argv, argc, &arg)) != -2)
360 {
361 if (ret == 0)
362 {
363 if (!pattern)
364 {
365 int i;
366 pattern = arg;
367 i = dfa_parse (dfa, &pattern);
368 if (i || *pattern)
369 {
370 fprintf (stderr, "%s: illegal pattern\n", prog);
371 return 1;
372 }
373 dfa_mkstate (dfa);
374 }
375 else
376 {
377 no_files++;
378 grep_file (dfa, arg, range);
379 }
380 }
381 else if (ret == 'v')
382 {
383 yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
384 }
385 else if (ret == 's')
386 {
387 dfa_verbose = 1;
388 }
389 else if (ret == 'd')
390 {
391 debug_dfa_tran = 1;
392 debug_dfa_followpos = 1;
393 debug_dfa_trav = 1;
394 }
395 else if (ret == 'r')
396 {
397 range = atoi (arg);
398 }
399 else if (ret == 'n')
400 {
401 show_line = 1;
402 }
403 else
404 {
405 yaz_log (YLOG_FATAL, "Unknown option '-%s'", arg);
406 exit (1);
407 }
408 }
409 if (!pattern)
410 {
411 fprintf (stderr, "usage:\n "
412 " %s [-d] [-n] [-r n] [-s] [-v n] pattern file ..\n", prog);
413 exit (1);
414 }
415 else if (no_files == 0)
416 {
417 grep_file (dfa, NULL, range);
418 }
419 dfa_delete (&dfa);
420 return 0;
421 }
422 /*
423 * Local variables:
424 * c-basic-offset: 4
425 * c-file-style: "Stroustrup"
426 * indent-tabs-mode: nil
427 * End:
428 * vim: shiftwidth=4 tabstop=8 expandtab
429 */
430
431