1 /* search_engines.h -- Prefiltering routines for SEARCH 2 * 3 * Copyright (c) 1994-2008 Carnegie Mellon University. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * 17 * 3. The name "Carnegie Mellon University" must not be used to 18 * endorse or promote products derived from this software without 19 * prior written permission. For permission or any legal 20 * details, please contact 21 * Carnegie Mellon University 22 * Center for Technology Transfer and Enterprise Creation 23 * 4615 Forbes Avenue 24 * Suite 302 25 * Pittsburgh, PA 15213 26 * (412) 268-7393, fax: (412) 268-7395 27 * innovation@andrew.cmu.edu 28 * 29 * 4. Redistributions of any form whatsoever must retain the following 30 * acknowledgment: 31 * "This product includes software developed by Computing Services 32 * at Carnegie Mellon University (http://www.cmu.edu/computing/)." 33 * 34 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO 35 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 36 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE 37 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 38 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 39 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 40 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 41 */ 42 43 #ifndef INCLUDED_SEARCH_ENGINES_H 44 #define INCLUDED_SEARCH_ENGINES_H 45 46 #include "mailbox.h" 47 #include "message_guid.h" 48 #include "util.h" 49 #include "strarray.h" 50 #include "bitvector.h" 51 52 #include "search_expr.h" 53 #include "search_part.h" 54 #include "search_sort.h" 55 56 typedef int (*search_hit_cb_t)(const char *mboxname, uint32_t uidvalidity, 57 uint32_t uid, const strarray_t *partids, 58 void *rock); 59 60 typedef int (*search_hitguid_cb_t)(const conv_guidrec_t *rec, size_t nguids, 61 void *rock); 62 63 typedef int (*search_snippet_cb_t)(struct mailbox *, uint32_t uid, 64 /* SEARCH_PART_* constants */int part, 65 const char *snippet, void *rock); 66 67 typedef struct search_builder search_builder_t; 68 struct search_builder { 69 /* These values are carefully chosen a) not to clash with the 70 * SEARCH_PART_* constants, and b) to reflect operator precedence */ 71 /* Values > 1024 are reserved for search engine implementations */ 72 #define SEARCH_OP_AND 101 73 #define SEARCH_OP_OR 102 74 #define SEARCH_OP_NOT 103 75 #define SEARCH_OP_TRUE 104 76 #define SEARCH_OP_FALSE 105 77 void (*begin_boolean)(search_builder_t *, int op); 78 void (*end_boolean)(search_builder_t *, int op); 79 void (*match)(search_builder_t *, int part, const char *str); 80 void (*matchlist)(search_builder_t *, int part, const strarray_t *items); 81 void *(*get_internalised)(search_builder_t *); 82 int (*run)(search_builder_t *, search_hit_cb_t proc, void *rock); 83 /* XXX - guidsearch is a hack for speeding up JMAP email queries */ 84 int (*run_guidsearch)(search_builder_t *, search_hitguid_cb_t proc, void *rock); 85 }; 86 87 typedef struct search_snippet_markup { 88 const char *hi_start; 89 const char *hi_end; 90 const char *omit; 91 } search_snippet_markup_t; 92 93 extern search_snippet_markup_t default_snippet_markup; 94 95 /* The functions in search_text_receiver_t get called at least once for each part of every message. 96 The invocations form a sequence: 97 begin_message(message_t) 98 receiver->begin_part(<part1>, <contentid>) 99 receiver->append_text(<text>) (1 or more times) 100 receiver->end_part(<part1>) 101 ... 102 receiver->begin_part(<partN>) 103 receiver->append_text(<text>) (1 or more times) 104 receiver->end_part(<partN>) 105 receiver->end_message() 106 107 The parts need not arrive in any particular order, but each part 108 can only participate in one begin_part ... append_text ... end_part 109 sequence, and the sequences for different parts cannot be interleaved. 110 Multiple parts can share the same <contentid>. 111 */ 112 typedef struct search_text_receiver search_text_receiver_t; 113 struct search_text_receiver { 114 int (*begin_mailbox)(search_text_receiver_t *, 115 struct mailbox *, int incremental); 116 uint32_t (*first_unindexed_uid)(search_text_receiver_t *); 117 int (*is_indexed)(search_text_receiver_t *, message_t *msg); 118 int (*begin_message)(search_text_receiver_t *, message_t *msg); 119 void (*begin_part)(search_text_receiver_t *, int part, 120 const struct message_guid *content_guid); 121 void (*append_text)(search_text_receiver_t *, const struct buf *); 122 void (*end_part)(search_text_receiver_t *, int part); 123 int (*end_message)(search_text_receiver_t *); 124 int (*end_mailbox)(search_text_receiver_t *, 125 struct mailbox *); 126 int (*flush)(search_text_receiver_t *); 127 int (*audit_mailbox)(search_text_receiver_t *, bitvector_t *unindexed); 128 int (*index_charset_flags)(int base_flags); 129 }; 130 131 struct search_lang_stats { 132 char *iso_lang; 133 double weight; // of total indexed docs 134 }; 135 136 #define SEARCH_FLAG_CAN_BATCH (1<<0) 137 #define SEARCH_FLAG_CAN_GUIDSEARCH (1<<1) 138 struct search_engine { 139 const char *name; 140 unsigned int flags; 141 #define _SEARCH_VERBOSE_MASK (0x7) 142 #define SEARCH_VERBOSE(v) ((v)&_SEARCH_VERBOSE_MASK) 143 #define SEARCH_MULTIPLE (1<<3) /* return results from 144 * multiple folders */ 145 // DEPRECATED: #define SEARCH_UNINDEXED (1<<4) 146 #define SEARCH_COMPACT_COPYONE (1<<5) /* if only one source, just copy */ 147 #define SEARCH_COMPACT_FILTER (1<<6) /* filter resulting DB for 148 * expunged records */ 149 #define SEARCH_COMPACT_REINDEX (1<<7) /* re-index all matching messages */ 150 #define SEARCH_COMPACT_ONLYUPGRADE (1<<8) /* only compact if reindexing */ 151 #define SEARCH_COMPACT_XAPINDEXED (1<<9) /* use XAPIAN index */ 152 search_builder_t *(*begin_search)(struct mailbox *, int opts); 153 void (*end_search)(search_builder_t *); 154 search_text_receiver_t *(*begin_update)(int verbose); 155 int (*end_update)(search_text_receiver_t *); 156 search_text_receiver_t *(*begin_snippets)(void *internalised, 157 int verbose, 158 search_snippet_markup_t *markup, 159 search_snippet_cb_t, 160 void *rock); 161 int (*end_snippets)(search_text_receiver_t *); 162 char *(*describe_internalised)(void *); 163 void (*free_internalised)(void *); 164 int (*list_files)(const char *userid, strarray_t *); 165 int (*compact)(const char *userid, const char *tempdir, 166 const strarray_t *srctiers, const char *desttier, 167 int flags); 168 int (*deluser)(const char *userid); 169 int (*check_config)(char **errstr); 170 }; 171 172 /* Returns the configured search engine */ 173 extern const struct search_engine *search_engine(); 174 175 /* 176 * Search for messages which could match the query built with the 177 * search_builder_t. Calls 'proc' once for each hit found. If 'single' 178 * is true, only hits in 'mailbox' are reported; otherwise hits in any 179 * folder in the same conversation scope (i.e. the same user) as 180 * reported. 181 */ 182 extern search_builder_t *search_begin_search(struct mailbox *, int opts); 183 extern void search_end_search(search_builder_t *); 184 185 #define SEARCH_UPDATE_INCREMENTAL (1<<0) 186 #define SEARCH_UPDATE_NONBLOCKING (1<<1) 187 #define SEARCH_UPDATE_BATCH (1<<2) 188 #define SEARCH_UPDATE_XAPINDEXED (1<<3) 189 #define SEARCH_UPDATE_AUDIT (1<<4) 190 search_text_receiver_t *search_begin_update(int verbose); 191 int search_update_mailbox(search_text_receiver_t *rx, 192 struct mailbox *mailbox, 193 int flags); 194 int search_end_update(search_text_receiver_t *rx); 195 196 /* Create a search text receiver for snippets. For each non-empty 197 * snippet generated from a message search part, callback proc is called. 198 * 199 * The callback is called in ascending order of SEARCH_PART definitions, 200 * where higher valued search parts are more costly to generate snippets. 201 * The callback may return 0 to continue snippet generation for the 202 * message, or return IMAP_OK_COMPLETED to indicate that it does not 203 * require more snippets for this message. It still must be prepared 204 * to receive more snippets for this message. */ 205 search_text_receiver_t *search_begin_snippets(void *internalised, 206 int verbose, 207 search_snippet_markup_t *markup, 208 search_snippet_cb_t proc, 209 void *rock); 210 int search_end_snippets(search_text_receiver_t *rx); 211 /* Returns a new string which describes the internalised query, and must 212 * be free()d by the caller. Only useful for whitebox testing. */ 213 char *search_describe_internalised(void *internalised); 214 void search_free_internalised(void *internalised); 215 int search_list_files(const char *userid, strarray_t *); 216 int search_compact(const char *userid, const char *tempdir, 217 const strarray_t *srctiers, const char *desttier, int verbose); 218 int search_deluser(const char *userid); 219 int search_check_config(char **errstr); 220 221 222 /* for debugging */ 223 extern const char *search_op_as_string(int op); 224 225 226 227 #endif 228