1 /*
2 *
3 * $Id: query.c,v 1.13.8.4 2008-04-28 15:09:19 opengl2772 Exp $
4 *
5 * Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
6 * Copyright (C) 2000-2008 Namazu Project All rights reserved.
7 * This is free software with ABSOLUTELY NO WARRANTY.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 * 02111-1307, USA
23 *
24 *
25 */
26
27 #ifdef HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30 #ifdef HAVE_SUPPORT_H
31 # include "support.h"
32 #endif
33
34 #ifdef HAVE_STDLIB_H
35 # include <stdlib.h>
36 #endif
37
38 #ifdef HAVE_STRING_H
39 # include <string.h>
40 #else
41 # include <strings.h>
42 #endif
43
44 #include "libnamazu.h"
45 #include "util.h"
46 #include "var.h"
47 #include "field.h"
48 #include "query.h"
49 #include "codeconv.h"
50
51 static struct nmz_query query = {0}; /* Initialize member `tokennum' with 0 */
52
53 /*
54 *
55 * Private functions
56 *
57 */
58
59 static void set_phrase_trick(char *str);
60 static void set_regex_trick(char *str);
61
62 /*
63 * Replace duble quotes with spaces and replace internal spaces with TABs
64 *{foo bar} is also acceptable.
65 * FIXME: very complicated ad hoc routine.
66 */
67 static void
set_phrase_trick(char * str)68 set_phrase_trick(char *str)
69 {
70 int i, delim;
71 char *b = str, *e;
72
73 for (i = delim = 0; str[i] != '\0'; i++) {
74 if (delim == 0 &&
75 (str[i] == '"' || str[i] == '{') &&
76 (i == 0 || str[i - 1] == ' ') &&
77 (str[i + 1] != ' '))
78 {
79 delim = str[i];
80 if (delim == '{') {
81 delim = '}';
82 }
83 b = str + i + 1;
84 } else if (delim != 0 && str[i] == delim &&
85 (str[i + 1] == ' ' || str[i + 1] == '\0') &&
86 (str[i - 1] != ' '))
87 {
88 delim = 0;
89 e = str + i - 1;
90
91 for (;b <= e; b++) {
92 if (*b == ' ')
93 *b = '\t';
94 }
95 }
96 }
97 }
98
99 /*
100 * Replace internal spaces in the regex pattern with
101 * FIXME: very complicated ad hoc routine.
102 */
103 static void
set_regex_trick(char * str)104 set_regex_trick(char *str)
105 {
106 int i, delim;
107 char *b = str, *e;
108
109 for (i = delim = 0; str[i] != '\0'; i++) {
110 int field = 0;
111 if ((i == 0 || str[i - 1] == ' ') && nmz_isfield(str + i)) {
112 field = 1;
113 i += (int)strcspn(str + i, ":") + 1;
114 }
115 if ((field || i == 0 || str[i - 1] == ' ') &&
116 (str[i] == '/' ||
117 (field && (str[i] == '"' || str[i] == '{'))))
118 {
119 delim = str[i];
120 if (delim == '{') {
121 delim = '}';
122 }
123 b = str + i + 1;
124 } else if (delim != 0 && str[i] == delim
125 && (str[i + 1] == ' ' || str[i + 1] == '\0'))
126 {
127 delim = 0;
128 e = str + i - 1;
129
130 for (;b <= e; b++) {
131 if (*b == ' ')
132 *b = '';
133 }
134 }
135 }
136 }
137
138
139 /*
140 *
141 * Public functions
142 *
143 */
144
145 /*
146 * Make the query from the string querystring.
147 * FIXME: The function is tremendously dirty. it should be rewritten.
148 */
149 enum nmz_stat
nmz_make_query(const char * querystring)150 nmz_make_query(const char *querystring)
151 {
152 int i, tokennum;
153
154 if (strlen(querystring) > QUERY_MAX) {
155 return ERR_TOO_LONG_QUERY;
156 }
157
158 strcpy(query.str, querystring);
159
160 set_phrase_trick(query.str);
161 nmz_debug_printf("set_phrase_trick: %s\n", query.str);
162
163 set_regex_trick(query.str);
164 nmz_debug_printf("set_regex_trick: %s\n", query.str);
165
166 /* Count number of tokens in querystring. */
167 for (i = 0, tokennum = 0; *(query.str + i);) {
168 while (query.str[i] == ' ')
169 i++;
170 if (query.str[i])
171 tokennum++;
172 while (query.str[i] != ' ' &&
173 query.str[i] != '\0')
174 i++;
175 }
176
177 if (tokennum == 0) { /* if no token available */
178 return ERR_INVALID_QUERY;
179 }
180
181 /* If too much items in query, return with error */
182 if (tokennum > QUERY_TOKEN_MAX) {
183 return ERR_TOO_MANY_TOKENS;
184 }
185
186 /* Assign a pointer to each token and set NULL to the end of each token. */
187 for (i = 0, tokennum = 0; query.str[i];) {
188 while (query.str[i] == ' ')
189 i++;
190 if (query.str[i])
191 query.tab[tokennum++] = &query.str[i];
192 while (query.str[i] != ' ' &&
193 query.str[i] != '\0')
194 i++;
195 if (query.str[i] != '\0')
196 query.str[i++] = '\0';
197 }
198
199 /* Set NULL to the last key table. */
200 query.tab[tokennum] = (char *) NULL;
201
202 /* Replace with spaces (restore). */
203 for (i = 0; i < tokennum; i++) {
204 nmz_tr(query.tab[i], "", " ");
205 }
206
207 /* Assign tokennum. */
208 query.tokennum = tokennum;
209
210 if (nmz_is_debugmode()) {
211 nmz_debug_printf("query.tokennum: %d\n", query.tokennum);
212 for (i = 0; i < tokennum; i++) {
213 nmz_debug_printf("query.tab[%d]: %s\n", i, query.tab[i]);
214 }
215 }
216
217 return SUCCESS;
218 }
219
220 int
nmz_get_querytokennum(void)221 nmz_get_querytokennum(void)
222 {
223 return query.tokennum;
224 }
225
226 char *
nmz_get_querytoken(int id)227 nmz_get_querytoken(int id)
228 {
229 return query.tab[id];
230 }
231