1 /*
2  *
3  * re.c -
4  *
5  * $Id: re.c,v 1.32.8.15 2008-03-06 15:38:09 opengl2772 Exp $
6  *
7  * Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
8  * Copyright (C) 2000-2008 Namazu Project All rights reserved.
9  * This is free software with ABSOLUTELY NO WARRANTY.
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  * 02111-1307, USA
25  *
26  * This file must be encoded in EUC-JP encoding
27  *
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #  include "config.h"
32 #endif
33 #ifdef HAVE_SUPPORT_H
34 #  include "support.h"
35 #endif
36 
37 #include <stdio.h>
38 #ifdef HAVE_STDLIB_H
39 #  include <stdlib.h>
40 #endif
41 
42 #ifdef HAVE_ERRNO_H
43 #  include <errno.h>
44 #endif
45 
46 #ifdef HAVE_STRING_H
47 #  include <string.h>
48 #else
49 #  include <strings.h>
50 #endif
51 
52 #include "libnamazu.h"
53 #include "regex.h"
54 #include "util.h"
55 #include "hlist.h"
56 #include "replace.h"
57 #include "re.h"
58 #include "l10n-ja.h"
59 #include "var.h"
60 
61 #define STEP 256
62 
63 static NmzResult nmz_regex_grep_standard ( struct re_pattern_buffer *rp, FILE *fp );
64 static NmzResult nmz_regex_grep_field ( struct re_pattern_buffer *rp, FILE *fp, const char * field );
65 
66 /*
67  *
68  * Public functions
69  *
70  */
71 
72 /*
73  * FIXME: Dirty coding...
74  */
75 NmzResult
nmz_regex_grep(const char * expr,FILE * fp,const char * field,int field_mode)76 nmz_regex_grep(const char *expr, FILE *fp, const char *field, int field_mode)
77 {
78     char tmpexpr[BUFSIZE] = "";
79     struct re_pattern_buffer *rp;
80     NmzResult val;
81 
82     val.num  = 0;
83     val.data = NULL;
84     val.stat = SUCCESS;
85 
86     if (nmz_is_lang_ja()) {
87         /* japanese only */
88         nmz_re_mbcinit(MBCTYPE_EUC);
89     } else {
90         nmz_re_mbcinit(MBCTYPE_ASCII);
91     }
92 
93     rp = ALLOC(struct re_pattern_buffer);
94     MEMZERO((char *)rp, struct re_pattern_buffer, 1);
95     rp->buffer = 0;
96     rp->allocated = 0;
97 
98     strncpy(tmpexpr, expr, BUFSIZE - 1); /* save orig_expr */
99     nmz_debug_printf("REGEX: '%s'\n", tmpexpr);
100 
101     nmz_re_compile_pattern(tmpexpr, strlen(tmpexpr), rp);
102 
103     if (!field_mode) {
104         val = nmz_regex_grep_standard(rp, fp);
105     } else {
106         val = nmz_regex_grep_field(rp, fp, field);
107     }
108 
109     nmz_re_free_pattern(rp);
110 
111     return val;
112 }
113 
114 static NmzResult
nmz_regex_grep_standard(struct re_pattern_buffer * rp,FILE * fp)115 nmz_regex_grep_standard(struct re_pattern_buffer *rp, FILE *fp)
116 {
117     char buf[BUFSIZE] = "";
118     int i, n, maxmatch, maxhit;
119     NmzResult val, tmp;
120 
121     val.num  = 0;
122     val.data = NULL;
123     val.stat = SUCCESS;
124     tmp.num  = 0;
125     tmp.data = NULL;
126     tmp.stat = SUCCESS;
127 
128     maxmatch = nmz_get_maxmatch();
129     maxhit = nmz_get_maxhit();
130 
131     for (i = n = 0; fgets(buf, BUFSIZE - 1, fp); i++) {
132         if (buf[strlen(buf) - 1] != '\n') {  /* too long */
133             i--;
134             continue;
135         }
136         buf[strlen(buf) - 1] = '\0';  /* LF to NULL */
137         if (strlen(buf) == 0) {
138             continue;
139         }
140         nmz_strlower(buf);
141         if (nmz_re_search(rp, buf, strlen(buf), 0, strlen(buf), 0) != -1) {
142             /* Matched */
143             tmp = nmz_get_hlist(i);
144             if (tmp.stat == ERR_FATAL) {
145                 nmz_free_hlist(val);
146 	        return tmp;
147             }
148             if (tmp.num > maxhit) {
149                 nmz_free_hlist(tmp);
150                 nmz_free_hlist(val);
151                 val.data = NULL;
152                 val.stat = ERR_TOO_MUCH_HIT;
153                 break;
154             }
155 
156             if (tmp.num > 0) {
157                 n++;
158                 if (n > maxmatch) {
159                     nmz_free_hlist(tmp);
160                     nmz_free_hlist(val);
161                     val.data = NULL;
162                     val.stat = ERR_TOO_MUCH_MATCH;
163                     return val;
164                 }
165 
166                 val = nmz_ormerge(val, tmp);
167 		if (val.stat == ERR_FATAL) {
168 		    return val;
169                 }
170                 if (val.num > maxhit) {
171                     nmz_free_hlist(val);
172                     val.data = NULL;
173                     val.stat = ERR_TOO_MUCH_HIT;
174                     break;
175                 }
176             }
177 
178 	    if (nmz_is_debugmode()) {
179                 char buf2[BUFSIZE];
180 
181                 fseek(Nmz.w, nmz_getidxptr(Nmz.wi, i), 0);
182                 fgets(buf2, BUFSIZE, Nmz.w);
183                 nmz_chomp(buf2);
184                 nmz_debug_printf("re: %s, (%d:%s), %d, %d\n",
185                         buf2, i, buf, tmp.num, val.num);
186 	    }
187         }
188     }
189 
190     return val;
191 }
192 
193 static NmzResult
nmz_regex_grep_field(struct re_pattern_buffer * rp,FILE * fp,const char * field)194 nmz_regex_grep_field(struct re_pattern_buffer *rp, FILE *fp, const char *field)
195 {
196     char buf[BUFSIZE] = "";
197     int i, n, size = 0, maxhit, uri_mode = 0;
198     NmzResult val;
199     FILE *date_index;
200 
201     val.num  = 0;
202     val.data = NULL;
203     val.stat = SUCCESS;
204 
205     date_index = fopen(NMZ.t, "rb");
206     if (date_index == NULL) {
207         nmz_set_dyingmsg(nmz_msg("%s: %s", NMZ.t, strerror(errno)));
208         val.stat = ERR_FATAL;
209         return val; /* error */
210     }
211 
212     {
213         nmz_malloc_hlist(&val, size += STEP);
214 	if (val.stat == ERR_FATAL) {
215             fclose(date_index);
216 	    return val;
217         }
218 	val.num = 0; /* set 0 for no matching case */
219         if (strcmp(field, "uri") == 0) {
220             uri_mode = 1;
221         }
222     }
223 
224     maxhit = nmz_get_maxhit();
225 
226     for (i = n = 0; fgets(buf, BUFSIZE - 1, fp); i++) {
227         if (buf[strlen(buf) - 1] != '\n') {  /* too long */
228             i--;
229             continue;
230         }
231         buf[strlen(buf) - 1] = '\0';  /* LF to NULL */
232         if (strlen(buf) == 0) {
233             continue;
234         }
235         if (uri_mode) {  /* consider the REPLACE directive in namazurc */
236             nmz_replace_uri(buf);
237         }
238         nmz_strlower(buf);
239         if (nmz_re_search(rp, buf, strlen(buf), 0, strlen(buf), 0) != -1) {
240             /* Matched */
241             struct nmz_data data;
242 
243             if (fseek(date_index, i * sizeof(data.date), 0) != 0) {
244                 nmz_set_dyingmsg(nmz_msg("%s: %s", NMZ.t, strerror(errno)));
245                 fclose(date_index);
246                 nmz_free_hlist(val);
247                 val.data = NULL;
248                 val.stat = ERR_FATAL;
249                 return val; /* error */
250             }
251             nmz_fread(&data.date, sizeof(data.date), 1, date_index);
252 
253             if (data.date == -1) {
254                 continue;
255             }
256 
257             n++;
258             if (n > maxhit) {
259                 fclose(date_index);
260                 nmz_free_hlist(val);
261                 val.data = NULL;
262                 val.stat = ERR_TOO_MUCH_HIT;
263                 return val;
264             }
265             {
266                 if (n > size) {
267                     nmz_realloc_hlist(&val, size += STEP);
268 		    if (val.stat == ERR_FATAL) {
269                         fclose(date_index);
270 		        return val;
271                     }
272                 }
273                 val.data[n-1].docid = i;
274                 val.data[n-1].score = 1;  /* score = 1 */
275                 val.num = n;
276             }
277 
278 	    if (nmz_is_debugmode()) {
279                 nmz_debug_printf("field: [%d]<%s> id: %d\n",
280                         val.num, buf, i);
281 	    }
282         }
283     }
284 
285     fclose(date_index);
286 
287     return val;
288 }
289