1 /* search_engines.c -- Prefiltering routines for SEARCH
2  *
3  * Copyright (c) 1994-2008 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 #include <config.h>
44 
45 #include <sys/types.h>
46 #include <stdlib.h>
47 #include <syslog.h>
48 #include <string.h>
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52 
53 #include "index.h"
54 #include "message.h"
55 #include "global.h"
56 #include "search_engines.h"
57 #include "ptrarray.h"
58 
59 /* generated headers are not necessarily in current directory */
60 #include "imap/imap_err.h"
61 
62 #ifdef USE_SQUAT
63 extern const struct search_engine squat_search_engine;
64 #endif
65 #ifdef USE_SPHINX
66 extern const struct search_engine sphinx_search_engine;
67 #endif
68 #ifdef USE_XAPIAN
69 extern const struct search_engine xapian_search_engine;
70 #endif
71 
72 static const struct search_engine default_search_engine = {
73     "default",
74     0,
75     NULL,
76     NULL,
77     NULL,
78     NULL,
79     NULL,
80     NULL,
81     NULL,
82     NULL,
83     NULL,
84     NULL,
85     NULL,
86     NULL,
87     NULL
88 };
89 
engine(void)90 static const struct search_engine *engine(void)
91 {
92     switch (config_getenum(IMAPOPT_SEARCH_ENGINE)) {
93 #ifdef USE_XAPIAN
94     case IMAP_ENUM_SEARCH_ENGINE_XAPIAN:
95         return &xapian_search_engine;
96 #endif
97 #ifdef USE_SPHINX
98     case IMAP_ENUM_SEARCH_ENGINE_SPHINX:
99         return &sphinx_search_engine;
100 #endif
101 #ifdef USE_SQUAT
102     case IMAP_ENUM_SEARCH_ENGINE_SQUAT:
103         return &squat_search_engine;
104 #endif
105     default:
106         return &default_search_engine;
107     }
108 }
109 
110 EXPORTED search_snippet_markup_t default_snippet_markup = {
111     "<b>", "</b>", "..."
112 };
113 
search_part_as_string(int part)114 EXPORTED const char *search_part_as_string(int part)
115 {
116     static const char *names[SEARCH_NUM_PARTS] = {
117         /* ANY */NULL, "FROM", "TO", "CC",
118         "BCC", "SUBJECT", "LISTID", "TYPE",
119         "HEADERS", "BODY", "LOCATION", "ATTACHMENTNAME"
120     };
121 
122     return (part < 0 || part >= SEARCH_NUM_PARTS ? NULL : names[part]);
123 }
124 
125 
search_begin_search(struct mailbox * mailbox,int opts)126 EXPORTED search_builder_t *search_begin_search(struct mailbox *mailbox, int opts)
127 {
128     const struct search_engine *se = engine();
129     return (se->begin_search ?
130             se->begin_search(mailbox, opts) : NULL);
131 }
132 
search_end_search(search_builder_t * bx)133 EXPORTED void search_end_search(search_builder_t *bx)
134 {
135     const struct search_engine *se = engine();
136     if (se->end_search) se->end_search(bx);
137 }
138 
search_begin_update(int verbose)139 EXPORTED search_text_receiver_t *search_begin_update(int verbose)
140 {
141     const struct search_engine *se = engine();
142     /* We don't fallback to the default search engine here
143      * because the default behaviour is not to index anything */
144     return (se->begin_update ? se->begin_update(verbose) : NULL);
145 }
146 
search_batch_size(void)147 static int search_batch_size(void)
148 {
149     const struct search_engine *se = engine();
150     return (se->flags & SEARCH_FLAG_CAN_BATCH ?
151             config_getint(IMAPOPT_SEARCH_BATCHSIZE) : INT_MAX);
152 }
153 
154 /*
155  * Flush a batch of messages to the search engine's indexer code.  We
156  * drop the index lock during the presumably CPU and IO heavy parts of
157  * the procedure and re-acquire it afterward, to avoid delaying other
158  * processes like imapds.  The reacquisition may of course fail.
159  * Returns an IMAP error code or 0 on success.
160  */
flush_batch(search_text_receiver_t * rx,struct mailbox * mailbox,ptrarray_t * batch)161 static int flush_batch(search_text_receiver_t *rx,
162                        struct mailbox *mailbox,
163                        ptrarray_t *batch)
164 {
165     int i;
166     int r = 0;
167 
168     /* give someone else a chance */
169     mailbox_unlock_index(mailbox, NULL);
170 
171     /* prefetch files */
172     for (i = 0 ; i < batch->count ; i++) {
173         message_t *msg = ptrarray_nth(batch, i);
174 
175         const char *fname;
176         r = message_get_fname(msg, &fname);
177         if (r) return r;
178         r = warmup_file(fname, 0, 0);
179         if (r) return r; /* means we failed to open a file,
180                             so we'll fail later anyway */
181     }
182 
183     for (i = 0 ; i < batch->count ; i++) {
184         message_t *msg = ptrarray_nth(batch, i);
185         if (!r) r = index_getsearchtext(msg, rx, 0);
186         message_unref(&msg);
187     }
188     ptrarray_truncate(batch, 0);
189 
190     if (r) return r;
191 
192     if (rx->flush) {
193         r = rx->flush(rx);
194         if (r) return r;
195     }
196 
197     return r;
198 }
199 
search_update_mailbox(search_text_receiver_t * rx,struct mailbox * mailbox,int flags)200 EXPORTED int search_update_mailbox(search_text_receiver_t *rx,
201                                    struct mailbox *mailbox,
202                                    int flags)
203 {
204     int r = 0;                  /* Using IMAP_* not SQUAT_* return codes here */
205     int r2;
206     int was_partial = 0;
207     int batch_size = search_batch_size();
208     ptrarray_t batch = PTRARRAY_INITIALIZER;
209     const message_t *msg;
210 
211     r = rx->begin_mailbox(rx, mailbox, flags);
212     if (r) goto done;
213 
214     struct mailbox_iter *iter = mailbox_iter_init(mailbox, 0, ITER_SKIP_EXPUNGED);
215     if (flags & SEARCH_UPDATE_INCREMENTAL) mailbox_iter_startuid(iter, rx->first_unindexed_uid(rx));
216 
217     while ((msg = mailbox_iter_step(iter))) {
218         const struct index_record *record = msg_record(msg);
219         if ((flags & SEARCH_UPDATE_BATCH) && batch.count >= batch_size) {
220             syslog(LOG_INFO, "search_update_mailbox batching %u messages to %s",
221                    batch.count, mailbox->name);
222             was_partial = 1;
223             break;
224         }
225 
226         message_t *msg = message_new_from_record(mailbox, record);
227 
228         if (!rx->is_indexed(rx, msg))
229             ptrarray_append(&batch, msg);
230         else
231             message_unref(&msg);
232     }
233     mailbox_iter_done(&iter);
234 
235     if (batch.count)
236         r = flush_batch(rx, mailbox, &batch);
237 
238  done:
239     ptrarray_fini(&batch);
240     r2 = rx->end_mailbox(rx, mailbox);
241     if (r) return r;
242     if (r2) return r2;
243     if (was_partial) return IMAP_AGAIN;
244     return 0;
245 }
246 
search_end_update(search_text_receiver_t * rx)247 EXPORTED int search_end_update(search_text_receiver_t *rx)
248 {
249     const struct search_engine *se = engine();
250     /* We don't fallback to the default search engine here
251      * because the default behaviour is not to index anything */
252     return (se->end_update ? se->end_update(rx) : 0);
253 }
254 
search_begin_snippets(void * internalised,int verbose,search_snippet_markup_t * markup,search_snippet_cb_t proc,void * rock)255 EXPORTED search_text_receiver_t *search_begin_snippets(void *internalised,
256                                                        int verbose,
257                                                        search_snippet_markup_t *markup,
258                                                        search_snippet_cb_t proc,
259                                                        void *rock)
260 {
261     const struct search_engine *se = engine();
262     return (se->begin_snippets ? se->begin_snippets(internalised,
263                                     verbose, markup, proc, rock) : NULL);
264 }
265 
search_end_snippets(search_text_receiver_t * rx)266 EXPORTED int search_end_snippets(search_text_receiver_t *rx)
267 {
268     const struct search_engine *se = engine();
269     return (se->end_snippets ? se->end_snippets(rx) : 0);
270 }
271 
search_describe_internalised(void * internalised)272 EXPORTED char *search_describe_internalised(void *internalised)
273 {
274     const struct search_engine *se = engine();
275     return (se->describe_internalised ?
276             se->describe_internalised(internalised) : 0);
277 }
278 
search_free_internalised(void * internalised)279 EXPORTED void search_free_internalised(void *internalised)
280 {
281     const struct search_engine *se = engine();
282     if (se->free_internalised) se->free_internalised(internalised);
283 }
284 
search_start_daemon(int verbose)285 EXPORTED int search_start_daemon(int verbose)
286 {
287     const struct search_engine *se = engine();
288     return (se->start_daemon ? se->start_daemon(verbose) : 0);
289 }
290 
search_stop_daemon(int verbose)291 EXPORTED int search_stop_daemon(int verbose)
292 {
293     const struct search_engine *se = engine();
294     return (se->stop_daemon ? se->stop_daemon(verbose) : 0);
295 }
296 
search_list_files(const char * userid,strarray_t * files)297 EXPORTED int search_list_files(const char *userid,
298                                strarray_t *files)
299 {
300     const struct search_engine *se = engine();
301     return (se->list_files ? se->list_files(userid, files) : 0);
302 }
303 
search_compact(const char * userid,const char * tempdir,const strarray_t * srctiers,const char * desttier,int flags)304 EXPORTED int search_compact(const char *userid,
305                             const char *tempdir,
306                             const strarray_t *srctiers,
307                             const char *desttier,
308                             int flags)
309 {
310     const struct search_engine *se = engine();
311     return (se->compact ? se->compact(userid, tempdir, srctiers, desttier, flags) : 0);
312 }
313 
search_deluser(const char * userid)314 EXPORTED int search_deluser(const char *userid)
315 {
316     const struct search_engine *se = engine();
317     return (se->deluser ? se->deluser(userid) : 0);
318 }
319 
search_op_as_string(int op)320 const char *search_op_as_string(int op)
321 {
322     static char buf[33];
323 
324     switch (op) {
325     case SEARCH_OP_AND: return "AND";
326     case SEARCH_OP_OR: return "OR";
327     case SEARCH_OP_NOT: return "NOT";
328     default:
329         snprintf(buf, sizeof(buf), "(%d)", op);
330         return buf;
331     }
332 }
333