1 /* search_engines.c -- Prefiltering routines for SEARCH
2  *
3  * Copyright (c) 1994-2008 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 #include <config.h>
44 
45 #include <sys/types.h>
46 #include <stdlib.h>
47 #include <syslog.h>
48 #include <string.h>
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52 
53 #include "index.h"
54 #include "message.h"
55 #include "global.h"
56 #include "search_engines.h"
57 #include "ptrarray.h"
58 
59 /* generated headers are not necessarily in current directory */
60 #include "imap/imap_err.h"
61 
62 #ifdef USE_SQUAT
63 extern const struct search_engine squat_search_engine;
64 #endif
65 #ifdef USE_XAPIAN
66 extern const struct search_engine xapian_search_engine;
67 #endif
68 
69 static const struct search_engine default_search_engine = {
70     "default",
71     0,
72     NULL,
73     NULL,
74     NULL,
75     NULL,
76     NULL,
77     NULL,
78     NULL,
79     NULL,
80     NULL,
81     NULL,
82     NULL,
83     NULL,
84     NULL,
85     NULL
86 };
87 
search_engine(void)88 EXPORTED const struct search_engine *search_engine(void)
89 {
90     switch (config_getenum(IMAPOPT_SEARCH_ENGINE)) {
91 #ifdef USE_XAPIAN
92     case IMAP_ENUM_SEARCH_ENGINE_XAPIAN:
93         return &xapian_search_engine;
94 #endif
95 #ifdef USE_SQUAT
96     case IMAP_ENUM_SEARCH_ENGINE_SQUAT:
97         return &squat_search_engine;
98 #endif
99     default:
100         return &default_search_engine;
101     }
102 }
103 
104 EXPORTED search_snippet_markup_t default_snippet_markup = {
105     "<b>", "</b>", "..."
106 };
107 
search_part_as_string(int part)108 EXPORTED const char *search_part_as_string(int part)
109 {
110     static const char *names[SEARCH_NUM_PARTS] = {
111         /* ANY */NULL, "FROM", "TO", "CC",
112         "BCC", "SUBJECT", "LISTID", "TYPE",
113         "HEADERS", "BODY", "LOCATION", "ATTACHMENTNAME",
114         "ATTACHMENTBODY", "DELIVEREDTO", "LANGUAGE"
115     };
116 
117     return (part < 0 || part >= SEARCH_NUM_PARTS ? NULL : names[part]);
118 }
119 
search_part_is_body(int part)120 EXPORTED int search_part_is_body(int part)
121 {
122     return part == SEARCH_PART_BODY ||
123            part == SEARCH_PART_LOCATION ||
124            part == SEARCH_PART_ATTACHMENTBODY;
125 }
126 
127 
search_begin_search(struct mailbox * mailbox,int opts)128 EXPORTED search_builder_t *search_begin_search(struct mailbox *mailbox, int opts)
129 {
130     const struct search_engine *se = search_engine();
131     return (se->begin_search ?
132             se->begin_search(mailbox, opts) : NULL);
133 }
134 
search_end_search(search_builder_t * bx)135 EXPORTED void search_end_search(search_builder_t *bx)
136 {
137     const struct search_engine *se = search_engine();
138     if (se->end_search) se->end_search(bx);
139 }
140 
search_begin_update(int verbose)141 EXPORTED search_text_receiver_t *search_begin_update(int verbose)
142 {
143     const struct search_engine *se = search_engine();
144     /* We don't fallback to the default search engine here
145      * because the default behaviour is not to index anything */
146     return (se->begin_update ? se->begin_update(verbose) : NULL);
147 }
148 
search_batch_size(void)149 static int search_batch_size(void)
150 {
151     const struct search_engine *se = search_engine();
152     return (se->flags & SEARCH_FLAG_CAN_BATCH ?
153             config_getint(IMAPOPT_SEARCH_BATCHSIZE) : INT_MAX);
154 }
155 
156 /*
157  * Flush a batch of messages to the search engine's indexer code.  We
158  * drop the index lock during the presumably CPU and IO heavy parts of
159  * the procedure and re-acquire it afterward, to avoid delaying other
160  * processes like imapds.  The reacquisition may of course fail.
161  * Returns an IMAP error code or 0 on success.
162  */
flush_batch(search_text_receiver_t * rx,struct mailbox * mailbox,int flags,ptrarray_t * batch)163 static int flush_batch(search_text_receiver_t *rx,
164                        struct mailbox *mailbox,
165                        int flags,
166                        ptrarray_t *batch)
167 {
168     int i;
169     int r = 0;
170     int indexflags = 0;
171 
172     /* give someone else a chance */
173     mailbox_unlock_index(mailbox, NULL);
174 
175     /* prefetch files */
176     for (i = 0 ; i < batch->count ; i++) {
177         message_t *msg = ptrarray_nth(batch, i);
178 
179         const char *fname;
180         r = message_get_fname(msg, &fname);
181         if (r) return r;
182         r = warmup_file(fname, 0, 0);
183         if (r) return r; /* means we failed to open a file,
184                             so we'll fail later anyway */
185     }
186 
187     if (flags & SEARCH_UPDATE_ALLOW_PARTIALS)
188         indexflags |= INDEX_GETSEARCHTEXT_PARTIALS;
189 
190     for (i = 0 ; i < batch->count ; i++) {
191         message_t *msg = ptrarray_nth(batch, i);
192         if (!r) r = index_getsearchtext(msg, NULL, rx, indexflags);
193         message_unref(&msg);
194     }
195     ptrarray_truncate(batch, 0);
196 
197     if (r) return r;
198 
199     if (rx->flush) {
200         r = rx->flush(rx);
201         if (r) return r;
202     }
203 
204     return r;
205 }
206 
search_update_mailbox(search_text_receiver_t * rx,struct mailbox * mailbox,int min_indexlevel,int flags)207 EXPORTED int search_update_mailbox(search_text_receiver_t *rx,
208                                    struct mailbox *mailbox,
209                                    int min_indexlevel,
210                                    int flags)
211 {
212     int r = 0;                  /* Using IMAP_* not SQUAT_* return codes here */
213     int r2;
214     int incomplete_batch = 0;
215     int batch_size = search_batch_size();
216     ptrarray_t batch = PTRARRAY_INITIALIZER;
217     const message_t *msg;
218     int reindex_partials = flags & SEARCH_UPDATE_REINDEX_PARTIALS;
219 
220     r = rx->begin_mailbox(rx, mailbox, flags);
221     if (r) goto done;
222 
223     /* we want to index EXPUNGED messages too, because otherwise when we check the
224      * ranges matching the GUID in conversations DB later, we might think we've
225      * indexed it when we actually haven't */
226     struct mailbox_iter *iter = mailbox_iter_init(mailbox, 0, ITER_SKIP_UNLINKED);
227     if ((flags & SEARCH_UPDATE_INCREMENTAL) && !reindex_partials)
228         mailbox_iter_startuid(iter, rx->first_unindexed_uid(rx));
229 
230     while ((msg = mailbox_iter_step(iter))) {
231         const struct index_record *record = msg_record(msg);
232         if ((flags & SEARCH_UPDATE_BATCH) && batch.count >= batch_size) {
233             syslog(LOG_INFO, "search_update_mailbox batching %u messages to %s",
234                    batch.count, mailbox->name);
235             incomplete_batch = 1;
236             break;
237         }
238 
239         message_t *msg = message_new_from_record(mailbox, record);
240 
241         uint8_t indexlevel = rx->is_indexed(rx, msg);
242         if ((reindex_partials && (indexlevel & SEARCH_INDEXLEVEL_PARTIAL)) ||
243             (min_indexlevel && indexlevel < min_indexlevel)) {
244             /* Reindex that message */
245             indexlevel = 0;
246         }
247 
248         if (!indexlevel)
249             ptrarray_append(&batch, msg);
250         else
251             message_unref(&msg);
252     }
253     mailbox_iter_done(&iter);
254 
255     if (batch.count)
256         r = flush_batch(rx, mailbox, flags, &batch);
257 
258  done:
259     ptrarray_fini(&batch);
260     r2 = rx->end_mailbox(rx, mailbox);
261     if (r) return r;
262     if (r2) return r2;
263     if (incomplete_batch) return IMAP_AGAIN;
264     return 0;
265 }
266 
search_end_update(search_text_receiver_t * rx)267 EXPORTED int search_end_update(search_text_receiver_t *rx)
268 {
269     const struct search_engine *se = search_engine();
270     /* We don't fallback to the default search engine here
271      * because the default behaviour is not to index anything */
272     return (se->end_update ? se->end_update(rx) : 0);
273 }
274 
search_begin_snippets(void * internalised,int verbose,search_snippet_markup_t * markup,search_snippet_cb_t proc,void * rock)275 EXPORTED search_text_receiver_t *search_begin_snippets(void *internalised,
276                                                        int verbose,
277                                                        search_snippet_markup_t *markup,
278                                                        search_snippet_cb_t proc,
279                                                        void *rock)
280 {
281     const struct search_engine *se = search_engine();
282     return (se->begin_snippets ? se->begin_snippets(internalised,
283                                     verbose, markup, proc, rock) : NULL);
284 }
285 
search_end_snippets(search_text_receiver_t * rx)286 EXPORTED int search_end_snippets(search_text_receiver_t *rx)
287 {
288     const struct search_engine *se = search_engine();
289     return (se->end_snippets ? se->end_snippets(rx) : 0);
290 }
291 
search_describe_internalised(void * internalised)292 EXPORTED char *search_describe_internalised(void *internalised)
293 {
294     const struct search_engine *se = search_engine();
295     return (se->describe_internalised ?
296             se->describe_internalised(internalised) : 0);
297 }
298 
search_free_internalised(void * internalised)299 EXPORTED void search_free_internalised(void *internalised)
300 {
301     const struct search_engine *se = search_engine();
302     if (se->free_internalised) se->free_internalised(internalised);
303 }
304 
search_list_files(const char * userid,strarray_t * files)305 EXPORTED int search_list_files(const char *userid,
306                                strarray_t *files)
307 {
308     const struct search_engine *se = search_engine();
309     return (se->list_files ? se->list_files(userid, files) : 0);
310 }
311 
search_compact(const char * userid,const strarray_t * reindextiers,const strarray_t * srctiers,const char * desttier,int flags)312 EXPORTED int search_compact(const char *userid,
313                             const strarray_t *reindextiers,
314                             const strarray_t *srctiers,
315                             const char *desttier,
316                             int flags)
317 {
318     const struct search_engine *se = search_engine();
319     return (se->compact ? se->compact(userid, reindextiers, srctiers, desttier, flags) : 0);
320 }
321 
search_deluser(const char * userid)322 EXPORTED int search_deluser(const char *userid)
323 {
324     const struct search_engine *se = search_engine();
325     return (se->deluser ? se->deluser(userid) : 0);
326 }
327 
search_check_config(char ** errstr)328 EXPORTED int search_check_config(char **errstr)
329 {
330     const struct search_engine *se = search_engine();
331     return (se->check_config ? se->check_config(errstr) : 0);
332 }
333 
search_op_as_string(int op)334 const char *search_op_as_string(int op)
335 {
336     static char buf[33];
337 
338     switch (op) {
339     case SEARCH_OP_AND: return "AND";
340     case SEARCH_OP_OR: return "OR";
341     case SEARCH_OP_NOT: return "NOT";
342     default:
343         snprintf(buf, sizeof(buf), "(%d)", op);
344         return buf;
345     }
346 }
347 
search_can_match(enum search_op matchop,int partnum)348 EXPORTED int search_can_match(enum search_op matchop, int partnum)
349 {
350     const struct search_engine *se = search_engine();
351     return (se->can_match ? se->can_match(matchop, partnum) : 0);
352 }
353