1 /*
2 *
3 * $Id: field.c,v 1.29.8.11 2007-12-05 16:50:47 opengl2772 Exp $
4 *
5 * Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
6 * Copyright (C) 2000-2007 Namazu Project All rights reserved.
7 * This is free software with ABSOLUTELY NO WARRANTY.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 * 02111-1307, USA
23 *
24 *
25 */
26
27 #ifdef HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30 #ifdef HAVE_SUPPORT_H
31 # include "support.h"
32 #endif
33
34 #include <ctype.h>
35 #include <stdio.h>
36
37 #ifdef HAVE_ERRNO_H
38 # include <errno.h>
39 #endif
40
41 #ifdef HAVE_STRING_H
42 # include <string.h>
43 #else
44 # include <strings.h>
45 #endif
46
47 #include "libnamazu.h"
48 #include "field.h"
49 #include "util.h"
50 #include "re.h"
51 #include "var.h"
52 #include "replace.h"
53 #include "idxname.h"
54
55 /*
56 * Private variables
57 */
58
59 static int cache_idx = 0, cache_num = 0;
60 static struct field_cache fc[FIELD_CACHE_SIZE];
61
62 /*
63 *
64 * Private functions
65 *
66 */
67
68 static void apply_field_alias ( char *field );
69 static int is_field_safe_char ( int c );
70 static void make_fullpathname_field ( int n );
71
72
73 static void
apply_field_alias(char * field)74 apply_field_alias(char *field)
75 {
76 if (strcmp(field, "title") == 0) {
77 strcpy(field, "subject");
78 } else if (strcmp(field, "author") == 0) {
79 strcpy(field, "from");
80 } else if (strcmp(field, "path") == 0) {
81 strcpy(field, "uri");
82 }
83 }
84
85 static int
is_field_safe_char(int c)86 is_field_safe_char(int c)
87 {
88 if ((strchr(FIELD_SAFE_CHARS, c) != NULL)) {
89 return 1;
90 } else {
91 return 0;
92 }
93
94 }
95
96 static void
make_fullpathname_field(int n)97 make_fullpathname_field(int n)
98 {
99 char *base;
100
101 base = nmz_get_idxname(n);
102 nmz_pathcat(base, NMZ.field);
103 }
104
105 /*
106 *
107 * Public functions
108 *
109 */
110
111
112 /*
113 * Check the key whether field or not.
114 */
115 int
nmz_isfield(const char * key)116 nmz_isfield(const char *key)
117 {
118 if (*key == '+') {
119 key++;
120 } else {
121 return 0;
122 }
123 while (*key) {
124 if (! is_field_safe_char(*key)) {
125 break;
126 }
127 key++;
128 }
129 if (nmz_isalpha((unsigned char)*(key - 1)) && *key == ':' ) {
130 return 1;
131 }
132 return 0;
133 }
134
135 /*
136 * This function returns a string storing a field name in
137 * the fieldpat. The string can only be used until the next
138 * call to the function.
139 */
140 char *
nmz_get_field_name(const char * fieldpat)141 nmz_get_field_name(const char *fieldpat)
142 {
143 static char field_name[BUFSIZE]; /* storing field name */
144 char *tmp = field_name;
145 int count = 0;
146
147 fieldpat++; /* ignore beggining '+' mark */
148 while (*fieldpat && count < BUFSIZE - 1) {
149 if (! is_field_safe_char(*fieldpat)) {
150 break;
151 }
152 *tmp = *fieldpat;
153 tmp++;
154 fieldpat++;
155 count++;
156 }
157 *tmp = '\0';
158
159 apply_field_alias(field_name);
160 return field_name;
161 }
162
163 void
nmz_get_field_data(int idxid,int docid,const char * field,char * data)164 nmz_get_field_data(int idxid, int docid, const char *field, char *data)
165 {
166 char fname[BUFSIZE] = "";
167 char tmpfield[BUFSIZE] = "";
168 int i;
169 FILE *fp_field, *fp_field_idx;
170
171 strcpy(data, ""); /* For safety. */
172
173 strncpy(tmpfield, field, BUFSIZE - 1);
174 apply_field_alias(tmpfield); /* This would overwrite `tmpfield' */
175
176 /*
177 * Consult caches.
178 * Caching is intended to reduce rereading same data from a disk drive.
179 * It works well with this kind of format: <a href="${uri}">${uri}</a>.
180 */
181 for (i = 0; i < cache_num; i++) {
182 if (idxid == fc[i].idxid && docid == fc[i].docid &&
183 strcmp(tmpfield, fc[i].field) == 0)
184 { /* cache hit! */
185 nmz_debug_printf("field cache [%s] hit!\n", tmpfield);
186 strncpy(data, fc[i].data, BUFSIZE - 1); /* data length should be BUFSIZE - 1 */
187 return;
188 }
189 }
190
191 /* Make a pathname */
192 make_fullpathname_field(idxid);
193 strncpy(fname, NMZ.field, BUFSIZE - 1);
194 strncat(fname, tmpfield, BUFSIZE - strlen(fname) - 1);
195
196 fp_field = fopen(fname, "rb");
197 if (fp_field == NULL) {
198 nmz_warn_printf("%s: %s", fname, strerror(errno));
199 return;
200 }
201
202 strncat(fname, ".i", BUFSIZE - strlen(fname) - 1);
203 fp_field_idx = fopen(fname, "rb");
204 if (fp_field_idx == NULL) {
205 nmz_warn_printf("%s: %s", fname, strerror(errno));
206 fclose(fp_field);
207 return;
208 }
209
210 /*
211 * You can rely on that length of a field is shorter than
212 * BUFSIZE [1024] because its length is restricted in
213 * conf.pl: $conf::MAX_FIELD_LENGTH = 200;
214 */
215 fseek(fp_field, nmz_getidxptr(fp_field_idx, docid), 0);
216 fgets(data, BUFSIZE, fp_field);
217 nmz_chomp(data);
218
219 fclose(fp_field);
220 fclose(fp_field_idx);
221
222 /* Cache */
223 fc[cache_idx].idxid = idxid;
224 fc[cache_idx].docid = docid;
225 strncpy(fc[cache_idx].field, tmpfield, BUFSIZE - 1);
226 fc[cache_idx].field[BUFSIZE - 1] = '\0';
227 strncpy(fc[cache_idx].data, data, BUFSIZE - 1);
228 fc[cache_idx].data[BUFSIZE - 1] = '\0';
229 cache_idx = (cache_idx + 1) % FIELD_CACHE_SIZE;
230 if (cache_num < FIELD_CACHE_SIZE) {
231 cache_num++;
232 }
233 }
234
235 void
nmz_free_field_cache(void)236 nmz_free_field_cache(void)
237 {
238 int i;
239 for(i = 0; i < cache_num; i++) {
240 fc[i].idxid =0;
241 fc[i].docid =0;
242 strcpy(fc[i].field, "");
243 strcpy(fc[i].data, "");
244 }
245 cache_num = 0;
246 }
247