1 /* search_engines.h --  Prefiltering routines for SEARCH
2  *
3  * Copyright (c) 1994-2008 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 #ifndef INCLUDED_SEARCH_ENGINES_H
44 #define INCLUDED_SEARCH_ENGINES_H
45 
46 #include "mailbox.h"
47 #include "message_guid.h"
48 #include "util.h"
49 #include "strarray.h"
50 #include "bitvector.h"
51 
52 #include "search_expr.h"
53 #include "search_part.h"
54 #include "search_sort.h"
55 
56 typedef int (*search_hit_cb_t)(const char *mboxname, uint32_t uidvalidity,
57                                uint32_t uid, const strarray_t *partids,
58                                void *rock);
59 
60 typedef int (*search_hitguid_cb_t)(const conv_guidrec_t *rec, size_t nguids,
61                                    void *rock);
62 
63 typedef int (*search_snippet_cb_t)(struct mailbox *, uint32_t uid,
64                                    /* SEARCH_PART_* constants */int part,
65                                    const char *snippet, void *rock);
66 
67 typedef struct search_builder search_builder_t;
68 struct search_builder {
69 /* These values are carefully chosen a) not to clash with the
70  * SEARCH_PART_* constants, and b) to reflect operator precedence */
71 /* Values > 1024 are reserved for search engine implementations */
72 #define SEARCH_OP_AND       101
73 #define SEARCH_OP_OR        102
74 #define SEARCH_OP_NOT       103
75 #define SEARCH_OP_TRUE      104
76 #define SEARCH_OP_FALSE     105
77     void (*begin_boolean)(search_builder_t *, int op);
78     void (*end_boolean)(search_builder_t *, int op);
79     void (*match)(search_builder_t *, int part, const char *str);
80     void (*matchlist)(search_builder_t *, int part, const strarray_t *items);
81     void *(*get_internalised)(search_builder_t *);
82     int (*run)(search_builder_t *, search_hit_cb_t proc, void *rock);
83     /* XXX - guidsearch is a hack for speeding up JMAP email queries */
84     int (*run_guidsearch)(search_builder_t *, search_hitguid_cb_t proc, void *rock);
85 };
86 
87 typedef struct search_snippet_markup {
88     const char *hi_start;
89     const char *hi_end;
90     const char *omit;
91 } search_snippet_markup_t;
92 
93 extern search_snippet_markup_t default_snippet_markup;
94 
95 /* The functions in search_text_receiver_t get called at least once for each part of every message.
96    The invocations form a sequence:
97        begin_message(message_t)
98        receiver->begin_part(<part1>, <contentid>)
99        receiver->append_text(<text>)     (1 or more times)
100        receiver->end_part(<part1>)
101        ...
102        receiver->begin_part(<partN>)
103        receiver->append_text(<text>)     (1 or more times)
104        receiver->end_part(<partN>)
105        receiver->end_message()
106 
107    The parts need not arrive in any particular order, but each part
108    can only participate in one begin_part ... append_text ... end_part
109    sequence, and the sequences for different parts cannot be interleaved.
110    Multiple parts can share the same <contentid>.
111 */
112 typedef struct search_text_receiver search_text_receiver_t;
113 struct search_text_receiver {
114     int (*begin_mailbox)(search_text_receiver_t *,
115                          struct mailbox *, int incremental);
116     uint32_t (*first_unindexed_uid)(search_text_receiver_t *);
117     int (*is_indexed)(search_text_receiver_t *, message_t *msg);
118     int (*begin_message)(search_text_receiver_t *, message_t *msg);
119     void (*begin_part)(search_text_receiver_t *, int part,
120                        const struct message_guid *content_guid);
121     void (*append_text)(search_text_receiver_t *, const struct buf *);
122     void (*end_part)(search_text_receiver_t *, int part);
123     int (*end_message)(search_text_receiver_t *);
124     int (*end_mailbox)(search_text_receiver_t *,
125                        struct mailbox *);
126     int (*flush)(search_text_receiver_t *);
127     int (*audit_mailbox)(search_text_receiver_t *, bitvector_t *unindexed);
128     int (*index_charset_flags)(int base_flags);
129 };
130 
131 struct search_lang_stats {
132     char *iso_lang;
133     double weight; // of total indexed docs
134 };
135 
136 #define SEARCH_FLAG_CAN_BATCH      (1<<0)
137 #define SEARCH_FLAG_CAN_GUIDSEARCH (1<<1)
138 struct search_engine {
139     const char *name;
140     unsigned int flags;
141 #define _SEARCH_VERBOSE_MASK    (0x7)
142 #define SEARCH_VERBOSE(v)       ((v)&_SEARCH_VERBOSE_MASK)
143 #define SEARCH_MULTIPLE         (1<<3)  /* return results from
144                                          * multiple folders */
145 // DEPRECATED: #define SEARCH_UNINDEXED   (1<<4)
146 #define SEARCH_COMPACT_COPYONE  (1<<5)  /* if only one source, just copy */
147 #define SEARCH_COMPACT_FILTER   (1<<6)  /* filter resulting DB for
148                                          * expunged records */
149 #define SEARCH_COMPACT_REINDEX  (1<<7)  /* re-index all matching messages */
150 #define SEARCH_COMPACT_ONLYUPGRADE (1<<8) /* only compact if reindexing */
151 #define SEARCH_COMPACT_XAPINDEXED (1<<9) /* use XAPIAN index */
152     search_builder_t *(*begin_search)(struct mailbox *, int opts);
153     void (*end_search)(search_builder_t *);
154     search_text_receiver_t *(*begin_update)(int verbose);
155     int (*end_update)(search_text_receiver_t *);
156     search_text_receiver_t *(*begin_snippets)(void *internalised,
157                                               int verbose,
158                                               search_snippet_markup_t *markup,
159                                               search_snippet_cb_t,
160                                               void *rock);
161     int (*end_snippets)(search_text_receiver_t *);
162     char *(*describe_internalised)(void *);
163     void (*free_internalised)(void *);
164     int (*list_files)(const char *userid, strarray_t *);
165     int (*compact)(const char *userid, const char *tempdir,
166                    const strarray_t *srctiers, const char *desttier,
167                    int flags);
168     int (*deluser)(const char *userid);
169     int (*check_config)(char **errstr);
170 };
171 
172 /* Returns the configured search engine */
173 extern const struct search_engine *search_engine();
174 
175 /*
176  * Search for messages which could match the query built with the
177  * search_builder_t.  Calls 'proc' once for each hit found.  If 'single'
178  * is true, only hits in 'mailbox' are reported; otherwise hits in any
179  * folder in the same conversation scope (i.e. the same user) as
180  * reported.
181  */
182 extern search_builder_t *search_begin_search(struct mailbox *, int opts);
183 extern void search_end_search(search_builder_t *);
184 
185 #define SEARCH_UPDATE_INCREMENTAL (1<<0)
186 #define SEARCH_UPDATE_NONBLOCKING (1<<1)
187 #define SEARCH_UPDATE_BATCH (1<<2)
188 #define SEARCH_UPDATE_XAPINDEXED (1<<3)
189 #define SEARCH_UPDATE_AUDIT (1<<4)
190 search_text_receiver_t *search_begin_update(int verbose);
191 int search_update_mailbox(search_text_receiver_t *rx,
192                           struct mailbox *mailbox,
193                           int flags);
194 int search_end_update(search_text_receiver_t *rx);
195 
196 /* Create a search text receiver for snippets. For each non-empty
197  * snippet generated from a message search part, callback proc is called.
198  *
199  * The callback is called in ascending order of SEARCH_PART definitions,
200  * where higher valued search parts are more costly to generate snippets.
201  * The callback may return 0 to continue snippet generation for the
202  * message, or return IMAP_OK_COMPLETED to indicate that it does not
203  * require more snippets for this message. It still must be prepared
204  * to receive more snippets for this message. */
205 search_text_receiver_t *search_begin_snippets(void *internalised,
206                                               int verbose,
207                                               search_snippet_markup_t *markup,
208                                               search_snippet_cb_t proc,
209                                               void *rock);
210 int search_end_snippets(search_text_receiver_t *rx);
211 /* Returns a new string which describes the internalised query, and must
212  * be free()d by the caller.  Only useful for whitebox testing.  */
213 char *search_describe_internalised(void *internalised);
214 void search_free_internalised(void *internalised);
215 int search_list_files(const char *userid, strarray_t *);
216 int search_compact(const char *userid, const char *tempdir,
217                    const strarray_t *srctiers, const char *desttier, int verbose);
218 int search_deluser(const char *userid);
219 int search_check_config(char **errstr);
220 
221 
222 /* for debugging */
223 extern const char *search_op_as_string(int op);
224 
225 
226 
227 #endif
228