1 /* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
2
3 #include "lib.h"
4 #include "array.h"
5 #include "hex-binary.h"
6 #include "mail-index.h"
7 #include "mail-namespace.h"
8 #include "mail-storage-private.h"
9 #include "mailbox-list-iter.h"
10 #include "mail-search.h"
11 #include "fts-api-private.h"
12
13 struct event_category event_category_fts = {
14 .name = "fts",
15 };
16
17 static ARRAY(const struct fts_backend *) backends;
18
fts_backend_register(const struct fts_backend * backend)19 void fts_backend_register(const struct fts_backend *backend)
20 {
21 if (!array_is_created(&backends))
22 i_array_init(&backends, 4);
23 array_push_back(&backends, &backend);
24 }
25
fts_backend_unregister(const char * name)26 void fts_backend_unregister(const char *name)
27 {
28 const struct fts_backend *const *be;
29 unsigned int i, count;
30
31 be = array_get(&backends, &count);
32 for (i = 0; i < count; i++) {
33 if (strcmp(be[i]->name, name) == 0) {
34 array_delete(&backends, i, 1);
35 break;
36 }
37 }
38 if (i == count)
39 i_panic("fts_backend_unregister(%s): unknown backend", name);
40
41 if (count == 1)
42 array_free(&backends);
43 }
44
45 static const struct fts_backend *
fts_backend_class_lookup(const char * backend_name)46 fts_backend_class_lookup(const char *backend_name)
47 {
48 const struct fts_backend *const *be;
49 unsigned int i, count;
50
51 if (array_is_created(&backends)) {
52 be = array_get(&backends, &count);
53 for (i = 0; i < count; i++) {
54 if (strcmp(be[i]->name, backend_name) == 0)
55 return be[i];
56 }
57 }
58 return NULL;
59 }
60
fts_backend_init(const char * backend_name,struct mail_namespace * ns,const char ** error_r,struct fts_backend ** backend_r)61 int fts_backend_init(const char *backend_name, struct mail_namespace *ns,
62 const char **error_r, struct fts_backend **backend_r)
63 {
64 const struct fts_backend *be;
65 struct fts_backend *backend;
66
67 be = fts_backend_class_lookup(backend_name);
68 if (be == NULL) {
69 *error_r = "Unknown backend";
70 return -1;
71 }
72
73 backend = be->v.alloc();
74 backend->ns = ns;
75 if (backend->v.init(backend, error_r) < 0) {
76 i_free(backend);
77 return -1;
78 }
79 *backend_r = backend;
80 return 0;
81 }
82
fts_backend_deinit(struct fts_backend ** _backend)83 void fts_backend_deinit(struct fts_backend **_backend)
84 {
85 struct fts_backend *backend = *_backend;
86
87 *_backend = NULL;
88 backend->v.deinit(backend);
89 }
90
fts_backend_get_last_uid(struct fts_backend * backend,struct mailbox * box,uint32_t * last_uid_r)91 int fts_backend_get_last_uid(struct fts_backend *backend, struct mailbox *box,
92 uint32_t *last_uid_r)
93 {
94 struct fts_index_header hdr;
95
96 if (box->virtual_vfuncs != NULL) {
97 /* virtual mailboxes themselves don't have any indexes,
98 so catch this call here */
99 if (!fts_index_get_header(box, &hdr))
100 *last_uid_r = 0;
101 else
102 *last_uid_r = hdr.last_indexed_uid;
103 return 0;
104 }
105
106 return backend->v.get_last_uid(backend, box, last_uid_r);
107 }
108
fts_backend_is_updating(struct fts_backend * backend)109 bool fts_backend_is_updating(struct fts_backend *backend)
110 {
111 return backend->updating;
112 }
113
114 struct fts_backend_update_context *
fts_backend_update_init(struct fts_backend * backend)115 fts_backend_update_init(struct fts_backend *backend)
116 {
117 struct fts_backend_update_context *ctx;
118
119 i_assert(!backend->updating);
120
121 backend->updating = TRUE;
122 ctx = backend->v.update_init(backend);
123 if ((backend->flags & FTS_BACKEND_FLAG_NORMALIZE_INPUT) != 0)
124 ctx->normalizer = backend->ns->user->default_normalizer;
125 return ctx;
126 }
127
fts_backend_set_cur_mailbox(struct fts_backend_update_context * ctx)128 static void fts_backend_set_cur_mailbox(struct fts_backend_update_context *ctx)
129 {
130 fts_backend_update_unset_build_key(ctx);
131 if (ctx->backend_box != ctx->cur_box) {
132 ctx->backend->v.update_set_mailbox(ctx, ctx->cur_box);
133 ctx->backend_box = ctx->cur_box;
134 }
135 }
136
fts_backend_update_deinit(struct fts_backend_update_context ** _ctx)137 int fts_backend_update_deinit(struct fts_backend_update_context **_ctx)
138 {
139 struct fts_backend_update_context *ctx = *_ctx;
140 struct fts_backend *backend = ctx->backend;
141 int ret;
142
143 *_ctx = NULL;
144
145 ctx->cur_box = NULL;
146 fts_backend_set_cur_mailbox(ctx);
147
148 ret = backend->v.update_deinit(ctx);
149 backend->updating = FALSE;
150 return ret;
151 }
152
fts_backend_update_set_mailbox(struct fts_backend_update_context * ctx,struct mailbox * box)153 void fts_backend_update_set_mailbox(struct fts_backend_update_context *ctx,
154 struct mailbox *box)
155 {
156 if (ctx->backend_box != NULL && box != ctx->backend_box) {
157 /* make sure we don't reference the backend box anymore */
158 ctx->backend->v.update_set_mailbox(ctx, NULL);
159 ctx->backend_box = NULL;
160 }
161 ctx->cur_box = box;
162 }
163
fts_backend_update_expunge(struct fts_backend_update_context * ctx,uint32_t uid)164 void fts_backend_update_expunge(struct fts_backend_update_context *ctx,
165 uint32_t uid)
166 {
167 fts_backend_set_cur_mailbox(ctx);
168 ctx->backend->v.update_expunge(ctx, uid);
169 }
170
fts_backend_update_set_build_key(struct fts_backend_update_context * ctx,const struct fts_backend_build_key * key)171 bool fts_backend_update_set_build_key(struct fts_backend_update_context *ctx,
172 const struct fts_backend_build_key *key)
173 {
174 fts_backend_set_cur_mailbox(ctx);
175
176 i_assert(ctx->cur_box != NULL);
177
178 if (!ctx->backend->v.update_set_build_key(ctx, key))
179 return FALSE;
180 ctx->build_key_open = TRUE;
181 return TRUE;
182 }
183
fts_backend_update_unset_build_key(struct fts_backend_update_context * ctx)184 void fts_backend_update_unset_build_key(struct fts_backend_update_context *ctx)
185 {
186 if (ctx->build_key_open) {
187 ctx->backend->v.update_unset_build_key(ctx);
188 ctx->build_key_open = FALSE;
189 }
190 }
191
fts_backend_update_build_more(struct fts_backend_update_context * ctx,const unsigned char * data,size_t size)192 int fts_backend_update_build_more(struct fts_backend_update_context *ctx,
193 const unsigned char *data, size_t size)
194 {
195 i_assert(ctx->build_key_open);
196
197 return ctx->backend->v.update_build_more(ctx, data, size);
198 }
199
fts_backend_refresh(struct fts_backend * backend)200 int fts_backend_refresh(struct fts_backend *backend)
201 {
202 return backend->v.refresh(backend);
203 }
204
fts_backend_reset_last_uids(struct fts_backend * backend)205 int fts_backend_reset_last_uids(struct fts_backend *backend)
206 {
207 struct mailbox_list_iterate_context *iter;
208 const struct mailbox_info *info;
209 struct mailbox *box;
210 int ret = 0;
211
212 iter = mailbox_list_iter_init(backend->ns->list, "*",
213 MAILBOX_LIST_ITER_SKIP_ALIASES |
214 MAILBOX_LIST_ITER_NO_AUTO_BOXES);
215 while ((info = mailbox_list_iter_next(iter)) != NULL) {
216 if ((info->flags &
217 (MAILBOX_NONEXISTENT | MAILBOX_NOSELECT)) != 0)
218 continue;
219
220 box = mailbox_alloc(info->ns->list, info->vname, 0);
221 if (mailbox_open(box) == 0) {
222 if (fts_index_set_last_uid(box, 0) < 0)
223 ret = -1;
224 }
225 mailbox_free(&box);
226 }
227 if (mailbox_list_iter_deinit(&iter) < 0)
228 ret = -1;
229 return ret;
230 }
231
fts_backend_rescan(struct fts_backend * backend)232 int fts_backend_rescan(struct fts_backend *backend)
233 {
234 struct mailbox *box;
235 bool virtual_storage;
236
237 box = mailbox_alloc(backend->ns->list, "", 0);
238 virtual_storage = box->virtual_vfuncs != NULL;
239 mailbox_free(&box);
240
241 if (virtual_storage) {
242 /* just reset the last-uids for a virtual storage. */
243 return fts_backend_reset_last_uids(backend);
244 }
245
246 return backend->v.rescan == NULL ? 0 :
247 backend->v.rescan(backend);
248 }
249
fts_backend_optimize(struct fts_backend * backend)250 int fts_backend_optimize(struct fts_backend *backend)
251 {
252 return backend->v.optimize == NULL ? 0 :
253 backend->v.optimize(backend);
254 }
255
256 static void
fts_merge_maybies(ARRAY_TYPE (seq_range)* dest_maybe,const ARRAY_TYPE (seq_range)* dest_definite,const ARRAY_TYPE (seq_range)* src_maybe,const ARRAY_TYPE (seq_range)* src_definite)257 fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
258 const ARRAY_TYPE(seq_range) *dest_definite,
259 const ARRAY_TYPE(seq_range) *src_maybe,
260 const ARRAY_TYPE(seq_range) *src_definite)
261 {
262 ARRAY_TYPE(seq_range) src_unwanted;
263 const struct seq_range *range;
264 struct seq_range new_range;
265 unsigned int i, count;
266 uint32_t seq;
267
268 /* add/leave to dest_maybe if at least one list has maybe,
269 and no lists have none */
270
271 /* create unwanted sequences list from both sources */
272 t_array_init(&src_unwanted, 128);
273 new_range.seq1 = 0; new_range.seq2 = (uint32_t)-1;
274 array_push_back(&src_unwanted, &new_range);
275 seq_range_array_remove_seq_range(&src_unwanted, src_maybe);
276 seq_range_array_remove_seq_range(&src_unwanted, src_definite);
277
278 /* drop unwanted uids */
279 seq_range_array_remove_seq_range(dest_maybe, &src_unwanted);
280
281 /* add uids that are in dest_definite and src_maybe lists */
282 range = array_get(dest_definite, &count);
283 for (i = 0; i < count; i++) {
284 for (seq = range[i].seq1; seq <= range[i].seq2; seq++) {
285 if (seq_range_exists(src_maybe, seq))
286 seq_range_array_add(dest_maybe, seq);
287 }
288 }
289 }
290
fts_filter_uids(ARRAY_TYPE (seq_range)* definite_dest,const ARRAY_TYPE (seq_range)* definite_filter,ARRAY_TYPE (seq_range)* maybe_dest,const ARRAY_TYPE (seq_range)* maybe_filter)291 void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
292 const ARRAY_TYPE(seq_range) *definite_filter,
293 ARRAY_TYPE(seq_range) *maybe_dest,
294 const ARRAY_TYPE(seq_range) *maybe_filter)
295 {
296 T_BEGIN {
297 fts_merge_maybies(maybe_dest, definite_dest,
298 maybe_filter, definite_filter);
299 } T_END;
300 /* keep only what exists in both lists. the rest is in
301 maybies or not wanted */
302 seq_range_array_intersect(definite_dest, definite_filter);
303 }
304
fts_backend_default_can_lookup(struct fts_backend * backend,const struct mail_search_arg * args)305 bool fts_backend_default_can_lookup(struct fts_backend *backend,
306 const struct mail_search_arg *args)
307 {
308 for (; args != NULL; args = args->next) {
309 switch (args->type) {
310 case SEARCH_OR:
311 case SEARCH_SUB:
312 case SEARCH_INTHREAD:
313 if (fts_backend_default_can_lookup(backend,
314 args->value.subargs))
315 return TRUE;
316 break;
317 case SEARCH_HEADER:
318 case SEARCH_HEADER_ADDRESS:
319 case SEARCH_HEADER_COMPRESS_LWSP:
320 case SEARCH_BODY:
321 case SEARCH_TEXT:
322 if (!args->no_fts)
323 return TRUE;
324 break;
325 default:
326 break;
327 }
328 }
329 return FALSE;
330 }
331
fts_backend_can_lookup(struct fts_backend * backend,const struct mail_search_arg * args)332 bool fts_backend_can_lookup(struct fts_backend *backend,
333 const struct mail_search_arg *args)
334 {
335 return backend->v.can_lookup(backend, args);
336 }
337
fts_score_map_sort(const struct fts_score_map * m1,const struct fts_score_map * m2)338 static int fts_score_map_sort(const struct fts_score_map *m1,
339 const struct fts_score_map *m2)
340 {
341 if (m1->uid < m2->uid)
342 return -1;
343 if (m1->uid > m2->uid)
344 return 1;
345 return 0;
346 }
347
fts_backend_lookup(struct fts_backend * backend,struct mailbox * box,struct mail_search_arg * args,enum fts_lookup_flags flags,struct fts_result * result)348 int fts_backend_lookup(struct fts_backend *backend, struct mailbox *box,
349 struct mail_search_arg *args,
350 enum fts_lookup_flags flags,
351 struct fts_result *result)
352 {
353 array_clear(&result->definite_uids);
354 array_clear(&result->maybe_uids);
355 array_clear(&result->scores);
356
357 if (backend->v.lookup(backend, box, args, flags, result) < 0)
358 return -1;
359
360 if (!result->scores_sorted && array_is_created(&result->scores)) {
361 array_sort(&result->scores, fts_score_map_sort);
362 result->scores_sorted = TRUE;
363 }
364 return 0;
365 }
366
fts_backend_lookup_multi(struct fts_backend * backend,struct mailbox * const boxes[],struct mail_search_arg * args,enum fts_lookup_flags flags,struct fts_multi_result * result)367 int fts_backend_lookup_multi(struct fts_backend *backend,
368 struct mailbox *const boxes[],
369 struct mail_search_arg *args,
370 enum fts_lookup_flags flags,
371 struct fts_multi_result *result)
372 {
373 unsigned int i;
374
375 i_assert(boxes[0] != NULL);
376
377 if (backend->v.lookup_multi != NULL) {
378 if (backend->v.lookup_multi(backend, boxes, args,
379 flags, result) < 0)
380 return -1;
381 if (result->box_results == NULL) {
382 result->box_results = p_new(result->pool,
383 struct fts_result, 1);
384 }
385 return 0;
386 }
387
388 for (i = 0; boxes[i] != NULL; i++) ;
389 result->box_results = p_new(result->pool, struct fts_result, i+1);
390
391 for (i = 0; boxes[i] != NULL; i++) {
392 struct fts_result *box_result = &result->box_results[i];
393
394 p_array_init(&box_result->definite_uids, result->pool, 32);
395 p_array_init(&box_result->maybe_uids, result->pool, 32);
396 p_array_init(&box_result->scores, result->pool, 32);
397 if (backend->v.lookup(backend, boxes[i], args,
398 flags, box_result) < 0)
399 return -1;
400 }
401 return 0;
402 }
403
fts_backend_lookup_done(struct fts_backend * backend)404 void fts_backend_lookup_done(struct fts_backend *backend)
405 {
406 if (backend->v.lookup_done != NULL)
407 backend->v.lookup_done(backend);
408 }
409
fts_index_get_ext_id(struct mailbox * box)410 static uint32_t fts_index_get_ext_id(struct mailbox *box)
411 {
412 return mail_index_ext_register(box->index, "fts",
413 sizeof(struct fts_index_header),
414 0, 0);
415 }
416
fts_index_get_header(struct mailbox * box,struct fts_index_header * hdr_r)417 bool fts_index_get_header(struct mailbox *box, struct fts_index_header *hdr_r)
418 {
419 struct mail_index_view *view;
420 const void *data;
421 size_t data_size;
422 bool ret;
423
424 mail_index_refresh(box->index);
425 view = mail_index_view_open(box->index);
426 mail_index_get_header_ext(view, fts_index_get_ext_id(box),
427 &data, &data_size);
428 if (data_size < sizeof(*hdr_r)) {
429 i_zero(hdr_r);
430 ret = FALSE;
431 } else {
432 memcpy(hdr_r, data, sizeof(*hdr_r));
433 ret = TRUE;
434 }
435 mail_index_view_close(&view);
436 return ret;
437 }
438
fts_index_set_header(struct mailbox * box,const struct fts_index_header * hdr)439 int fts_index_set_header(struct mailbox *box,
440 const struct fts_index_header *hdr)
441 {
442 struct mail_index_transaction *trans;
443 uint32_t ext_id = fts_index_get_ext_id(box);
444
445 trans = mail_index_transaction_begin(box->view, 0);
446 mail_index_update_header_ext(trans, ext_id, 0, hdr, sizeof(*hdr));
447 return mail_index_transaction_commit(&trans);
448 }
449
fts_index_set_last_uid(struct mailbox * box,uint32_t last_uid)450 int fts_index_set_last_uid(struct mailbox *box, uint32_t last_uid)
451 {
452 struct fts_index_header hdr;
453
454 (void)fts_index_get_header(box, &hdr);
455 hdr.last_indexed_uid = last_uid;
456 return fts_index_set_header(box, &hdr);
457 }
458
fts_index_have_compatible_settings(struct mailbox_list * list,uint32_t checksum)459 int fts_index_have_compatible_settings(struct mailbox_list *list,
460 uint32_t checksum)
461 {
462 struct mail_namespace *ns = mailbox_list_get_namespace(list);
463 struct mailbox *box;
464 struct fts_index_header hdr;
465 const char *vname;
466 size_t len;
467 int ret;
468
469 if ((ns->flags & NAMESPACE_FLAG_INBOX_USER) != 0)
470 vname = "INBOX";
471 else {
472 len = strlen(ns->prefix);
473 if (len > 0 && ns->prefix[len-1] == mail_namespace_get_sep(ns))
474 len--;
475 vname = t_strndup(ns->prefix, len);
476 }
477
478 box = mailbox_alloc(list, vname, 0);
479 if (mailbox_sync(box, (enum mailbox_sync_flags)0) < 0) {
480 i_error("fts: Failed to sync mailbox %s: %s", vname,
481 mailbox_get_last_internal_error(box, NULL));
482 ret = -1;
483 } else {
484 ret = fts_index_get_header(box, &hdr) &&
485 hdr.settings_checksum == checksum ? 1 : 0;
486 }
487 mailbox_free(&box);
488 return ret;
489 }
490
491 static const char *indexed_headers[] = {
492 "From", "To", "Cc", "Bcc", "Subject"
493 };
494
fts_header_want_indexed(const char * hdr_name)495 bool fts_header_want_indexed(const char *hdr_name)
496 {
497 unsigned int i;
498
499 for (i = 0; i < N_ELEMENTS(indexed_headers); i++) {
500 if (strcasecmp(hdr_name, indexed_headers[i]) == 0)
501 return TRUE;
502 }
503 return FALSE;
504 }
505
fts_header_has_language(const char * hdr_name)506 bool fts_header_has_language(const char *hdr_name)
507 {
508 /* FIXME: should email address headers be detected as different
509 languages? That mainly contains people's names.. */
510 /*if (message_header_is_address(hdr_name))
511 return TRUE;*/
512
513 /* Subject definitely contains language-specific data that can be
514 detected. Comment and Keywords headers also could contain, although
515 just about nobody uses those headers.
516
517 For now we assume that other headers contain non-language specific
518 data that we don't want to filter in special ways. For example
519 it is good to be able to search for Message-IDs. */
520 return strcasecmp(hdr_name, "Subject") == 0 ||
521 strcasecmp(hdr_name, "Comments") == 0 ||
522 strcasecmp(hdr_name, "Keywords") == 0;
523 }
524
fts_mailbox_get_guid(struct mailbox * box,const char ** guid_r)525 int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r)
526 {
527 struct mailbox_metadata metadata;
528
529 if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID, &metadata) < 0)
530 return -1;
531
532 *guid_r = guid_128_to_string(metadata.guid);
533 return 0;
534 }
535