1 /*
2  *
3  * $Id: field.c,v 1.29.8.11 2007-12-05 16:50:47 opengl2772 Exp $
4  *
5  * Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
6  * Copyright (C) 2000-2007 Namazu Project All rights reserved.
7  * This is free software with ABSOLUTELY NO WARRANTY.
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22  * 02111-1307, USA
23  *
24  *
25  */
26 
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30 #ifdef HAVE_SUPPORT_H
31 #  include "support.h"
32 #endif
33 
34 #include <ctype.h>
35 #include <stdio.h>
36 
37 #ifdef HAVE_ERRNO_H
38 #  include <errno.h>
39 #endif
40 
41 #ifdef HAVE_STRING_H
42 #  include <string.h>
43 #else
44 #  include <strings.h>
45 #endif
46 
47 #include "libnamazu.h"
48 #include "field.h"
49 #include "util.h"
50 #include "re.h"
51 #include "var.h"
52 #include "replace.h"
53 #include "idxname.h"
54 
55 /*
56  * Private variables
57  */
58 
59 static int cache_idx = 0, cache_num = 0;
60 static struct field_cache fc[FIELD_CACHE_SIZE];
61 
62 /*
63  *
64  * Private functions
65  *
66  */
67 
68 static void apply_field_alias ( char *field );
69 static int is_field_safe_char ( int c );
70 static void make_fullpathname_field ( int n );
71 
72 
73 static void
apply_field_alias(char * field)74 apply_field_alias(char *field)
75 {
76     if (strcmp(field, "title") == 0) {
77         strcpy(field, "subject");
78     } else if (strcmp(field, "author") == 0) {
79         strcpy(field, "from");
80     } else if (strcmp(field, "path") == 0) {
81         strcpy(field, "uri");
82     }
83 }
84 
85 static int
is_field_safe_char(int c)86 is_field_safe_char(int c)
87 {
88     if ((strchr(FIELD_SAFE_CHARS, c) != NULL)) {
89         return 1;
90     } else {
91         return 0;
92     }
93 
94 }
95 
96 static void
make_fullpathname_field(int n)97 make_fullpathname_field(int n)
98 {
99     char *base;
100 
101     base = nmz_get_idxname(n);
102     nmz_pathcat(base, NMZ.field);
103 }
104 
105 /*
106  *
107  * Public functions
108  *
109  */
110 
111 
112 /*
113  * Check the key whether field or not.
114  */
115 int
nmz_isfield(const char * key)116 nmz_isfield(const char *key)
117 {
118     if (*key == '+') {
119         key++;
120     } else {
121         return 0;
122     }
123     while (*key) {
124         if (! is_field_safe_char(*key)) {
125             break;
126         }
127         key++;
128     }
129     if (nmz_isalpha((unsigned char)*(key - 1)) && *key == ':' ) {
130         return 1;
131     }
132     return 0;
133 }
134 
135 /*
136  * This function returns a string storing a field name in
137  * the fieldpat. The string can only be used until the next
138  * call to the function.
139  */
140 char *
nmz_get_field_name(const char * fieldpat)141 nmz_get_field_name(const char *fieldpat)
142 {
143     static char field_name[BUFSIZE]; /* storing field name */
144     char *tmp = field_name;
145     int count = 0;
146 
147     fieldpat++;  /* ignore beggining '+' mark */
148     while (*fieldpat && count < BUFSIZE - 1) {
149         if (! is_field_safe_char(*fieldpat)) {
150             break;
151         }
152         *tmp = *fieldpat;
153         tmp++;
154         fieldpat++;
155 	count++;
156     }
157     *tmp = '\0';
158 
159     apply_field_alias(field_name);
160     return field_name;
161 }
162 
163 void
nmz_get_field_data(int idxid,int docid,const char * field,char * data)164 nmz_get_field_data(int idxid, int docid, const char *field, char *data)
165 {
166     char fname[BUFSIZE] = "";
167     char tmpfield[BUFSIZE] = "";
168     int i;
169     FILE *fp_field, *fp_field_idx;
170 
171     strcpy(data, ""); /* For safety. */
172 
173     strncpy(tmpfield, field, BUFSIZE - 1);
174     apply_field_alias(tmpfield);  /* This would overwrite `tmpfield' */
175 
176     /*
177      * Consult caches.
178      * Caching is intended to reduce rereading same data from a disk drive.
179      * It works well with this kind of format: <a href="${uri}">${uri}</a>.
180      */
181     for (i = 0; i < cache_num; i++) {
182 	if (idxid == fc[i].idxid && docid == fc[i].docid &&
183 	    strcmp(tmpfield, fc[i].field) == 0)
184 	{  /* cache hit! */
185 	    nmz_debug_printf("field cache [%s] hit!\n", tmpfield);
186 	    strncpy(data, fc[i].data, BUFSIZE - 1);	/* data length should be BUFSIZE - 1 */
187 	    return;
188 	}
189     }
190 
191     /* Make a pathname */
192     make_fullpathname_field(idxid);
193     strncpy(fname, NMZ.field, BUFSIZE - 1);
194     strncat(fname, tmpfield, BUFSIZE - strlen(fname) - 1);
195 
196     fp_field = fopen(fname, "rb");
197     if (fp_field == NULL) {
198         nmz_warn_printf("%s: %s", fname, strerror(errno));
199 	return;
200     }
201 
202     strncat(fname, ".i", BUFSIZE - strlen(fname) - 1);
203     fp_field_idx = fopen(fname, "rb");
204     if (fp_field_idx == NULL) {
205         nmz_warn_printf("%s: %s", fname, strerror(errno));
206         fclose(fp_field);
207 	return;
208     }
209 
210     /*
211      * You can rely on that length of a field is shorter than
212      * BUFSIZE [1024] because its length is restricted in
213      * conf.pl: $conf::MAX_FIELD_LENGTH = 200;
214      */
215     fseek(fp_field, nmz_getidxptr(fp_field_idx, docid), 0);
216     fgets(data, BUFSIZE, fp_field);
217     nmz_chomp(data);
218 
219     fclose(fp_field);
220     fclose(fp_field_idx);
221 
222     /* Cache */
223     fc[cache_idx].idxid = idxid;
224     fc[cache_idx].docid = docid;
225     strncpy(fc[cache_idx].field, tmpfield, BUFSIZE - 1);
226     fc[cache_idx].field[BUFSIZE - 1] = '\0';
227     strncpy(fc[cache_idx].data, data, BUFSIZE - 1);
228     fc[cache_idx].data[BUFSIZE - 1] = '\0';
229     cache_idx = (cache_idx + 1) % FIELD_CACHE_SIZE;
230     if (cache_num < FIELD_CACHE_SIZE) {
231 	cache_num++;
232     }
233 }
234 
235 void
nmz_free_field_cache(void)236 nmz_free_field_cache(void)
237 {
238        int i;
239        for(i = 0; i < cache_num; i++) {
240                fc[i].idxid =0;
241                fc[i].docid =0;
242                strcpy(fc[i].field, "");
243                strcpy(fc[i].data, "");
244        }
245        cache_num = 0;
246 }
247