1 /* search_engines.c -- Prefiltering routines for SEARCH
2 *
3 * Copyright (c) 1994-2008 Carnegie Mellon University. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. The name "Carnegie Mellon University" must not be used to
18 * endorse or promote products derived from this software without
19 * prior written permission. For permission or any legal
20 * details, please contact
21 * Carnegie Mellon University
22 * Center for Technology Transfer and Enterprise Creation
23 * 4615 Forbes Avenue
24 * Suite 302
25 * Pittsburgh, PA 15213
26 * (412) 268-7393, fax: (412) 268-7395
27 * innovation@andrew.cmu.edu
28 *
29 * 4. Redistributions of any form whatsoever must retain the following
30 * acknowledgment:
31 * "This product includes software developed by Computing Services
32 * at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33 *
34 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41 */
42
43 #include <config.h>
44
45 #include <sys/types.h>
46 #include <stdlib.h>
47 #include <syslog.h>
48 #include <string.h>
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52
53 #include "index.h"
54 #include "message.h"
55 #include "global.h"
56 #include "search_engines.h"
57 #include "ptrarray.h"
58
59 /* generated headers are not necessarily in current directory */
60 #include "imap/imap_err.h"
61
62 #ifdef USE_SQUAT
63 extern const struct search_engine squat_search_engine;
64 #endif
65 #ifdef USE_XAPIAN
66 extern const struct search_engine xapian_search_engine;
67 #endif
68
69 static const struct search_engine default_search_engine = {
70 "default",
71 0,
72 NULL,
73 NULL,
74 NULL,
75 NULL,
76 NULL,
77 NULL,
78 NULL,
79 NULL,
80 NULL,
81 NULL,
82 NULL,
83 NULL
84 };
85
search_engine(void)86 EXPORTED const struct search_engine *search_engine(void)
87 {
88 switch (config_getenum(IMAPOPT_SEARCH_ENGINE)) {
89 #ifdef USE_XAPIAN
90 case IMAP_ENUM_SEARCH_ENGINE_XAPIAN:
91 return &xapian_search_engine;
92 #endif
93 #ifdef USE_SQUAT
94 case IMAP_ENUM_SEARCH_ENGINE_SQUAT:
95 return &squat_search_engine;
96 #endif
97 default:
98 return &default_search_engine;
99 }
100 }
101
102 EXPORTED search_snippet_markup_t default_snippet_markup = {
103 "<b>", "</b>", "..."
104 };
105
search_part_as_string(int part)106 EXPORTED const char *search_part_as_string(int part)
107 {
108 static const char *names[SEARCH_NUM_PARTS] = {
109 /* ANY */NULL, "FROM", "TO", "CC",
110 "BCC", "SUBJECT", "LISTID", "TYPE",
111 "HEADERS", "BODY", "LOCATION", "ATTACHMENTNAME",
112 "ATTACHMENTBODY"
113 };
114
115 return (part < 0 || part >= SEARCH_NUM_PARTS ? NULL : names[part]);
116 }
117
search_part_is_body(int part)118 EXPORTED int search_part_is_body(int part)
119 {
120 return part == SEARCH_PART_BODY ||
121 part == SEARCH_PART_LOCATION ||
122 part == SEARCH_PART_ATTACHMENTBODY;
123 }
124
125
search_begin_search(struct mailbox * mailbox,int opts)126 EXPORTED search_builder_t *search_begin_search(struct mailbox *mailbox, int opts)
127 {
128 const struct search_engine *se = search_engine();
129 return (se->begin_search ?
130 se->begin_search(mailbox, opts) : NULL);
131 }
132
search_end_search(search_builder_t * bx)133 EXPORTED void search_end_search(search_builder_t *bx)
134 {
135 const struct search_engine *se = search_engine();
136 if (se->end_search) se->end_search(bx);
137 }
138
search_begin_update(int verbose)139 EXPORTED search_text_receiver_t *search_begin_update(int verbose)
140 {
141 const struct search_engine *se = search_engine();
142 /* We don't fallback to the default search engine here
143 * because the default behaviour is not to index anything */
144 return (se->begin_update ? se->begin_update(verbose) : NULL);
145 }
146
search_batch_size(void)147 static int search_batch_size(void)
148 {
149 const struct search_engine *se = search_engine();
150 return (se->flags & SEARCH_FLAG_CAN_BATCH ?
151 config_getint(IMAPOPT_SEARCH_BATCHSIZE) : INT_MAX);
152 }
153
154 /*
155 * Flush a batch of messages to the search engine's indexer code. We
156 * drop the index lock during the presumably CPU and IO heavy parts of
157 * the procedure and re-acquire it afterward, to avoid delaying other
158 * processes like imapds. The reacquisition may of course fail.
159 * Returns an IMAP error code or 0 on success.
160 */
flush_batch(search_text_receiver_t * rx,struct mailbox * mailbox,ptrarray_t * batch)161 static int flush_batch(search_text_receiver_t *rx,
162 struct mailbox *mailbox,
163 ptrarray_t *batch)
164 {
165 int i;
166 int r = 0;
167
168 /* give someone else a chance */
169 mailbox_unlock_index(mailbox, NULL);
170
171 /* prefetch files */
172 for (i = 0 ; i < batch->count ; i++) {
173 message_t *msg = ptrarray_nth(batch, i);
174
175 const char *fname;
176 r = message_get_fname(msg, &fname);
177 if (r) return r;
178 r = warmup_file(fname, 0, 0);
179 if (r) return r; /* means we failed to open a file,
180 so we'll fail later anyway */
181 }
182
183 for (i = 0 ; i < batch->count ; i++) {
184 message_t *msg = ptrarray_nth(batch, i);
185 if (!r) r = index_getsearchtext(msg, NULL, rx, 0);
186 message_unref(&msg);
187 }
188 ptrarray_truncate(batch, 0);
189
190 if (r) return r;
191
192 if (rx->flush) {
193 r = rx->flush(rx);
194 if (r) return r;
195 }
196
197 return r;
198 }
199
search_update_mailbox(search_text_receiver_t * rx,struct mailbox * mailbox,int flags)200 EXPORTED int search_update_mailbox(search_text_receiver_t *rx,
201 struct mailbox *mailbox,
202 int flags)
203 {
204 int r = 0; /* Using IMAP_* not SQUAT_* return codes here */
205 int r2;
206 int was_partial = 0;
207 int batch_size = search_batch_size();
208 ptrarray_t batch = PTRARRAY_INITIALIZER;
209 const message_t *msg;
210
211 r = rx->begin_mailbox(rx, mailbox, flags);
212 if (r) goto done;
213
214 /* we want to index EXPUNGED messages too, because otherwise when we check the
215 * ranges matching the GUID in conversations DB later, we might think we've
216 * indexed it when we actually haven't */
217 struct mailbox_iter *iter = mailbox_iter_init(mailbox, 0, ITER_SKIP_UNLINKED);
218 if (flags & SEARCH_UPDATE_INCREMENTAL) mailbox_iter_startuid(iter, rx->first_unindexed_uid(rx));
219
220 while ((msg = mailbox_iter_step(iter))) {
221 const struct index_record *record = msg_record(msg);
222 if ((flags & SEARCH_UPDATE_BATCH) && batch.count >= batch_size) {
223 syslog(LOG_INFO, "search_update_mailbox batching %u messages to %s",
224 batch.count, mailbox->name);
225 was_partial = 1;
226 break;
227 }
228
229 message_t *msg = message_new_from_record(mailbox, record);
230
231 if (!rx->is_indexed(rx, msg))
232 ptrarray_append(&batch, msg);
233 else
234 message_unref(&msg);
235 }
236 mailbox_iter_done(&iter);
237
238 if (batch.count)
239 r = flush_batch(rx, mailbox, &batch);
240
241 done:
242 ptrarray_fini(&batch);
243 r2 = rx->end_mailbox(rx, mailbox);
244 if (r) return r;
245 if (r2) return r2;
246 if (was_partial) return IMAP_AGAIN;
247 return 0;
248 }
249
search_end_update(search_text_receiver_t * rx)250 EXPORTED int search_end_update(search_text_receiver_t *rx)
251 {
252 const struct search_engine *se = search_engine();
253 /* We don't fallback to the default search engine here
254 * because the default behaviour is not to index anything */
255 return (se->end_update ? se->end_update(rx) : 0);
256 }
257
search_begin_snippets(void * internalised,int verbose,search_snippet_markup_t * markup,search_snippet_cb_t proc,void * rock)258 EXPORTED search_text_receiver_t *search_begin_snippets(void *internalised,
259 int verbose,
260 search_snippet_markup_t *markup,
261 search_snippet_cb_t proc,
262 void *rock)
263 {
264 const struct search_engine *se = search_engine();
265 return (se->begin_snippets ? se->begin_snippets(internalised,
266 verbose, markup, proc, rock) : NULL);
267 }
268
search_end_snippets(search_text_receiver_t * rx)269 EXPORTED int search_end_snippets(search_text_receiver_t *rx)
270 {
271 const struct search_engine *se = search_engine();
272 return (se->end_snippets ? se->end_snippets(rx) : 0);
273 }
274
search_describe_internalised(void * internalised)275 EXPORTED char *search_describe_internalised(void *internalised)
276 {
277 const struct search_engine *se = search_engine();
278 return (se->describe_internalised ?
279 se->describe_internalised(internalised) : 0);
280 }
281
search_free_internalised(void * internalised)282 EXPORTED void search_free_internalised(void *internalised)
283 {
284 const struct search_engine *se = search_engine();
285 if (se->free_internalised) se->free_internalised(internalised);
286 }
287
search_list_files(const char * userid,strarray_t * files)288 EXPORTED int search_list_files(const char *userid,
289 strarray_t *files)
290 {
291 const struct search_engine *se = search_engine();
292 return (se->list_files ? se->list_files(userid, files) : 0);
293 }
294
search_compact(const char * userid,const char * tempdir,const strarray_t * srctiers,const char * desttier,int flags)295 EXPORTED int search_compact(const char *userid,
296 const char *tempdir,
297 const strarray_t *srctiers,
298 const char *desttier,
299 int flags)
300 {
301 const struct search_engine *se = search_engine();
302 return (se->compact ? se->compact(userid, tempdir, srctiers, desttier, flags) : 0);
303 }
304
search_deluser(const char * userid)305 EXPORTED int search_deluser(const char *userid)
306 {
307 const struct search_engine *se = search_engine();
308 return (se->deluser ? se->deluser(userid) : 0);
309 }
310
search_check_config(char ** errstr)311 EXPORTED int search_check_config(char **errstr)
312 {
313 const struct search_engine *se = search_engine();
314 return (se->check_config ? se->check_config(errstr) : 0);
315 }
316
search_op_as_string(int op)317 const char *search_op_as_string(int op)
318 {
319 static char buf[33];
320
321 switch (op) {
322 case SEARCH_OP_AND: return "AND";
323 case SEARCH_OP_OR: return "OR";
324 case SEARCH_OP_NOT: return "NOT";
325 default:
326 snprintf(buf, sizeof(buf), "(%d)", op);
327 return buf;
328 }
329 }
330