1 /*
2  *
3  * $Id: query.c,v 1.13.8.4 2008-04-28 15:09:19 opengl2772 Exp $
4  *
5  * Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
6  * Copyright (C) 2000-2008 Namazu Project All rights reserved.
7  * This is free software with ABSOLUTELY NO WARRANTY.
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22  * 02111-1307, USA
23  *
24  *
25  */
26 
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30 #ifdef HAVE_SUPPORT_H
31 #  include "support.h"
32 #endif
33 
34 #ifdef HAVE_STDLIB_H
35 #  include <stdlib.h>
36 #endif
37 
38 #ifdef HAVE_STRING_H
39 #  include <string.h>
40 #else
41 #  include <strings.h>
42 #endif
43 
44 #include "libnamazu.h"
45 #include "util.h"
46 #include "var.h"
47 #include "field.h"
48 #include "query.h"
49 #include "codeconv.h"
50 
51 static struct nmz_query query = {0}; /* Initialize member `tokennum' with 0 */
52 
53 /*
54  *
55  * Private functions
56  *
57  */
58 
59 static void set_phrase_trick(char *str);
60 static void set_regex_trick(char *str);
61 
62 /*
63  * Replace duble quotes with spaces and replace internal spaces with TABs
64  *{foo bar} is also acceptable.
65  * FIXME: very complicated ad hoc routine.
66  */
67 static void
set_phrase_trick(char * str)68 set_phrase_trick(char *str)
69 {
70     int i, delim;
71     char *b = str, *e;
72 
73     for (i = delim = 0; str[i] != '\0'; i++) {
74         if (delim == 0 &&
75             (str[i] == '"' || str[i] == '{') &&
76 	    (i == 0 || str[i - 1] == ' ') &&
77 	    (str[i + 1] != ' '))
78         {
79             delim = str[i];
80             if (delim == '{') {
81                 delim = '}';
82             }
83             b = str + i + 1;
84         } else if (delim != 0 && str[i] == delim &&
85                    (str[i + 1] == ' ' || str[i + 1] == '\0') &&
86                    (str[i - 1] != ' '))
87         {
88             delim = 0;
89             e = str + i - 1;
90 
91 	    for (;b <= e; b++) {
92 		if (*b == ' ')
93 		    *b = '\t';
94 	    }
95         }
96     }
97 }
98 
99 /*
100  * Replace internal spaces in the regex pattern with
101  * FIXME: very complicated ad hoc routine.
102  */
103 static void
set_regex_trick(char * str)104 set_regex_trick(char *str)
105 {
106     int i, delim;
107     char *b = str, *e;
108 
109     for (i = delim = 0; str[i] != '\0'; i++) {
110         int field = 0;
111         if ((i == 0 || str[i - 1] == ' ') && nmz_isfield(str + i)) {
112             field = 1;
113             i += (int)strcspn(str + i, ":") + 1;
114         }
115         if ((field || i == 0 || str[i - 1] == ' ') &&
116             (str[i] == '/' ||
117              (field && (str[i] == '"' || str[i] == '{'))))
118         {
119             delim = str[i];
120             if (delim == '{') {
121                 delim = '}';
122             }
123             b = str + i + 1;
124         } else if (delim != 0 && str[i] == delim
125                    && (str[i + 1] == ' ' || str[i + 1] == '\0'))
126         {
127             delim = 0;
128             e = str + i - 1;
129 
130             for (;b <= e; b++) {
131                 if (*b == ' ')
132                     *b = '';
133             }
134         }
135     }
136 }
137 
138 
139 /*
140  *
141  * Public functions
142  *
143  */
144 
145 /*
146  * Make the query from the string querystring.
147  * FIXME: The function is tremendously dirty. it should be rewritten.
148  */
149 enum nmz_stat
nmz_make_query(const char * querystring)150 nmz_make_query(const char *querystring)
151 {
152     int i, tokennum;
153 
154     if (strlen(querystring) > QUERY_MAX) {
155 	return ERR_TOO_LONG_QUERY;
156     }
157 
158     strcpy(query.str, querystring);
159 
160     set_phrase_trick(query.str);
161     nmz_debug_printf("set_phrase_trick: %s\n", query.str);
162 
163     set_regex_trick(query.str);
164     nmz_debug_printf("set_regex_trick: %s\n", query.str);
165 
166     /* Count number of tokens in querystring. */
167     for (i = 0, tokennum = 0; *(query.str + i);) {
168 	while (query.str[i] == ' ')
169 	    i++;
170 	if (query.str[i])
171 	    tokennum++;
172 	while (query.str[i] != ' ' &&
173 	       query.str[i] != '\0')
174 	    i++;
175     }
176 
177     if (tokennum == 0) { /* if no token available */
178 	return ERR_INVALID_QUERY;
179     }
180 
181     /* If too much items in query, return with error */
182     if (tokennum > QUERY_TOKEN_MAX) {
183 	return ERR_TOO_MANY_TOKENS;
184     }
185 
186     /* Assign a pointer to each token and set NULL to the end of each token. */
187     for (i = 0, tokennum = 0; query.str[i];) {
188 	while (query.str[i] == ' ')
189 	    i++;
190 	if (query.str[i])
191 	    query.tab[tokennum++] = &query.str[i];
192 	while (query.str[i] != ' ' &&
193 	       query.str[i] != '\0')
194 	    i++;
195 	if (query.str[i] != '\0')
196 	    query.str[i++] = '\0';
197     }
198 
199     /* Set NULL to the last key table. */
200     query.tab[tokennum] = (char *) NULL;
201 
202     /* Replace  with spaces (restore). */
203     for (i = 0; i < tokennum; i++) {
204 	nmz_tr(query.tab[i], "", " ");
205     }
206 
207     /* Assign tokennum. */
208     query.tokennum = tokennum;
209 
210     if (nmz_is_debugmode()) {
211 	nmz_debug_printf("query.tokennum: %d\n", query.tokennum);
212 	for (i = 0; i < tokennum; i++) {
213 	    nmz_debug_printf("query.tab[%d]: %s\n", i, query.tab[i]);
214 	}
215     }
216 
217     return SUCCESS;
218 }
219 
220 int
nmz_get_querytokennum(void)221 nmz_get_querytokennum(void)
222 {
223     return query.tokennum;
224 }
225 
226 char *
nmz_get_querytoken(int id)227 nmz_get_querytoken(int id)
228 {
229     return query.tab[id];
230 }
231