1 /* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
2 
3 #include "lib.h"
4 #include "array.h"
5 #include "hex-binary.h"
6 #include "mail-index.h"
7 #include "mail-namespace.h"
8 #include "mail-storage-private.h"
9 #include "mailbox-list-iter.h"
10 #include "mail-search.h"
11 #include "fts-api-private.h"
12 
13 struct event_category event_category_fts = {
14 	.name = "fts",
15 };
16 
17 static ARRAY(const struct fts_backend *) backends;
18 
fts_backend_register(const struct fts_backend * backend)19 void fts_backend_register(const struct fts_backend *backend)
20 {
21 	if (!array_is_created(&backends))
22 		i_array_init(&backends, 4);
23 	array_push_back(&backends, &backend);
24 }
25 
fts_backend_unregister(const char * name)26 void fts_backend_unregister(const char *name)
27 {
28 	const struct fts_backend *const *be;
29 	unsigned int i, count;
30 
31 	be = array_get(&backends, &count);
32 	for (i = 0; i < count; i++) {
33 		if (strcmp(be[i]->name, name) == 0) {
34 			array_delete(&backends, i, 1);
35 			break;
36 		}
37 	}
38 	if (i == count)
39 		i_panic("fts_backend_unregister(%s): unknown backend", name);
40 
41 	if (count == 1)
42 		array_free(&backends);
43 }
44 
45 static const struct fts_backend *
fts_backend_class_lookup(const char * backend_name)46 fts_backend_class_lookup(const char *backend_name)
47 {
48 	const struct fts_backend *const *be;
49 	unsigned int i, count;
50 
51 	if (array_is_created(&backends)) {
52 		be = array_get(&backends, &count);
53 		for (i = 0; i < count; i++) {
54 			if (strcmp(be[i]->name, backend_name) == 0)
55 				return be[i];
56 		}
57 	}
58 	return NULL;
59 }
60 
fts_backend_init(const char * backend_name,struct mail_namespace * ns,const char ** error_r,struct fts_backend ** backend_r)61 int fts_backend_init(const char *backend_name, struct mail_namespace *ns,
62 		     const char **error_r, struct fts_backend **backend_r)
63 {
64 	const struct fts_backend *be;
65 	struct fts_backend *backend;
66 
67 	be = fts_backend_class_lookup(backend_name);
68 	if (be == NULL) {
69 		*error_r = "Unknown backend";
70 		return -1;
71 	}
72 
73 	backend = be->v.alloc();
74 	backend->ns = ns;
75 	if (backend->v.init(backend, error_r) < 0) {
76 		i_free(backend);
77 		return -1;
78 	}
79 	*backend_r = backend;
80 	return 0;
81 }
82 
fts_backend_deinit(struct fts_backend ** _backend)83 void fts_backend_deinit(struct fts_backend **_backend)
84 {
85 	struct fts_backend *backend = *_backend;
86 
87 	*_backend = NULL;
88 	backend->v.deinit(backend);
89 }
90 
fts_backend_get_last_uid(struct fts_backend * backend,struct mailbox * box,uint32_t * last_uid_r)91 int fts_backend_get_last_uid(struct fts_backend *backend, struct mailbox *box,
92 			     uint32_t *last_uid_r)
93 {
94 	struct fts_index_header hdr;
95 
96 	if (box->virtual_vfuncs != NULL) {
97 		/* virtual mailboxes themselves don't have any indexes,
98 		   so catch this call here */
99 		if (!fts_index_get_header(box, &hdr))
100 			*last_uid_r = 0;
101 		else
102 			*last_uid_r = hdr.last_indexed_uid;
103 		return 0;
104 	}
105 
106 	return backend->v.get_last_uid(backend, box, last_uid_r);
107 }
108 
fts_backend_is_updating(struct fts_backend * backend)109 bool fts_backend_is_updating(struct fts_backend *backend)
110 {
111 	return backend->updating;
112 }
113 
114 struct fts_backend_update_context *
fts_backend_update_init(struct fts_backend * backend)115 fts_backend_update_init(struct fts_backend *backend)
116 {
117 	struct fts_backend_update_context *ctx;
118 
119 	i_assert(!backend->updating);
120 
121 	backend->updating = TRUE;
122 	ctx = backend->v.update_init(backend);
123 	if ((backend->flags & FTS_BACKEND_FLAG_NORMALIZE_INPUT) != 0)
124 		ctx->normalizer = backend->ns->user->default_normalizer;
125 	return ctx;
126 }
127 
fts_backend_set_cur_mailbox(struct fts_backend_update_context * ctx)128 static void fts_backend_set_cur_mailbox(struct fts_backend_update_context *ctx)
129 {
130 	fts_backend_update_unset_build_key(ctx);
131 	if (ctx->backend_box != ctx->cur_box) {
132 		ctx->backend->v.update_set_mailbox(ctx, ctx->cur_box);
133 		ctx->backend_box = ctx->cur_box;
134 	}
135 }
136 
fts_backend_update_deinit(struct fts_backend_update_context ** _ctx)137 int fts_backend_update_deinit(struct fts_backend_update_context **_ctx)
138 {
139 	struct fts_backend_update_context *ctx = *_ctx;
140 	struct fts_backend *backend = ctx->backend;
141 	int ret;
142 
143 	*_ctx = NULL;
144 
145 	ctx->cur_box = NULL;
146 	fts_backend_set_cur_mailbox(ctx);
147 
148 	ret = backend->v.update_deinit(ctx);
149 	backend->updating = FALSE;
150 	return ret;
151 }
152 
fts_backend_update_set_mailbox(struct fts_backend_update_context * ctx,struct mailbox * box)153 void fts_backend_update_set_mailbox(struct fts_backend_update_context *ctx,
154 				    struct mailbox *box)
155 {
156 	if (ctx->backend_box != NULL && box != ctx->backend_box) {
157 		/* make sure we don't reference the backend box anymore */
158 		ctx->backend->v.update_set_mailbox(ctx, NULL);
159 		ctx->backend_box = NULL;
160 	}
161 	ctx->cur_box = box;
162 }
163 
fts_backend_update_expunge(struct fts_backend_update_context * ctx,uint32_t uid)164 void fts_backend_update_expunge(struct fts_backend_update_context *ctx,
165 				uint32_t uid)
166 {
167 	fts_backend_set_cur_mailbox(ctx);
168 	ctx->backend->v.update_expunge(ctx, uid);
169 }
170 
fts_backend_update_set_build_key(struct fts_backend_update_context * ctx,const struct fts_backend_build_key * key)171 bool fts_backend_update_set_build_key(struct fts_backend_update_context *ctx,
172 				      const struct fts_backend_build_key *key)
173 {
174 	fts_backend_set_cur_mailbox(ctx);
175 
176 	i_assert(ctx->cur_box != NULL);
177 
178 	if (!ctx->backend->v.update_set_build_key(ctx, key))
179 		return FALSE;
180 	ctx->build_key_open = TRUE;
181 	return TRUE;
182 }
183 
fts_backend_update_unset_build_key(struct fts_backend_update_context * ctx)184 void fts_backend_update_unset_build_key(struct fts_backend_update_context *ctx)
185 {
186 	if (ctx->build_key_open) {
187 		ctx->backend->v.update_unset_build_key(ctx);
188 		ctx->build_key_open = FALSE;
189 	}
190 }
191 
fts_backend_update_build_more(struct fts_backend_update_context * ctx,const unsigned char * data,size_t size)192 int fts_backend_update_build_more(struct fts_backend_update_context *ctx,
193 				  const unsigned char *data, size_t size)
194 {
195 	i_assert(ctx->build_key_open);
196 
197 	return ctx->backend->v.update_build_more(ctx, data, size);
198 }
199 
fts_backend_refresh(struct fts_backend * backend)200 int fts_backend_refresh(struct fts_backend *backend)
201 {
202 	return backend->v.refresh(backend);
203 }
204 
fts_backend_reset_last_uids(struct fts_backend * backend)205 int fts_backend_reset_last_uids(struct fts_backend *backend)
206 {
207 	struct mailbox_list_iterate_context *iter;
208 	const struct mailbox_info *info;
209 	struct mailbox *box;
210 	int ret = 0;
211 
212 	iter = mailbox_list_iter_init(backend->ns->list, "*",
213 				      MAILBOX_LIST_ITER_SKIP_ALIASES |
214 				      MAILBOX_LIST_ITER_NO_AUTO_BOXES);
215 	while ((info = mailbox_list_iter_next(iter)) != NULL) {
216 		if ((info->flags &
217 		     (MAILBOX_NONEXISTENT | MAILBOX_NOSELECT)) != 0)
218 			continue;
219 
220 		box = mailbox_alloc(info->ns->list, info->vname, 0);
221 		if (mailbox_open(box) == 0) {
222 			if (fts_index_set_last_uid(box, 0) < 0)
223 				ret = -1;
224 		}
225 		mailbox_free(&box);
226 	}
227 	if (mailbox_list_iter_deinit(&iter) < 0)
228 		ret = -1;
229 	return ret;
230 }
231 
fts_backend_rescan(struct fts_backend * backend)232 int fts_backend_rescan(struct fts_backend *backend)
233 {
234 	struct mailbox *box;
235 	bool virtual_storage;
236 
237 	box = mailbox_alloc(backend->ns->list, "", 0);
238 	virtual_storage = box->virtual_vfuncs != NULL;
239 	mailbox_free(&box);
240 
241 	if (virtual_storage) {
242 		/* just reset the last-uids for a virtual storage. */
243 		return fts_backend_reset_last_uids(backend);
244 	}
245 
246 	return backend->v.rescan == NULL ? 0 :
247 		backend->v.rescan(backend);
248 }
249 
fts_backend_optimize(struct fts_backend * backend)250 int fts_backend_optimize(struct fts_backend *backend)
251 {
252 	return backend->v.optimize == NULL ? 0 :
253 		backend->v.optimize(backend);
254 }
255 
256 static void
fts_merge_maybies(ARRAY_TYPE (seq_range)* dest_maybe,const ARRAY_TYPE (seq_range)* dest_definite,const ARRAY_TYPE (seq_range)* src_maybe,const ARRAY_TYPE (seq_range)* src_definite)257 fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
258 		  const ARRAY_TYPE(seq_range) *dest_definite,
259 		  const ARRAY_TYPE(seq_range) *src_maybe,
260 		  const ARRAY_TYPE(seq_range) *src_definite)
261 {
262 	ARRAY_TYPE(seq_range) src_unwanted;
263 	const struct seq_range *range;
264 	struct seq_range new_range;
265 	unsigned int i, count;
266 	uint32_t seq;
267 
268 	/* add/leave to dest_maybe if at least one list has maybe,
269 	   and no lists have none */
270 
271 	/* create unwanted sequences list from both sources */
272 	t_array_init(&src_unwanted, 128);
273 	new_range.seq1 = 0; new_range.seq2 = (uint32_t)-1;
274 	array_push_back(&src_unwanted, &new_range);
275 	seq_range_array_remove_seq_range(&src_unwanted, src_maybe);
276 	seq_range_array_remove_seq_range(&src_unwanted, src_definite);
277 
278 	/* drop unwanted uids */
279 	seq_range_array_remove_seq_range(dest_maybe, &src_unwanted);
280 
281 	/* add uids that are in dest_definite and src_maybe lists */
282 	range = array_get(dest_definite, &count);
283 	for (i = 0; i < count; i++) {
284 		for (seq = range[i].seq1; seq <= range[i].seq2; seq++) {
285 			if (seq_range_exists(src_maybe, seq))
286 				seq_range_array_add(dest_maybe, seq);
287 		}
288 	}
289 }
290 
fts_filter_uids(ARRAY_TYPE (seq_range)* definite_dest,const ARRAY_TYPE (seq_range)* definite_filter,ARRAY_TYPE (seq_range)* maybe_dest,const ARRAY_TYPE (seq_range)* maybe_filter)291 void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
292 		     const ARRAY_TYPE(seq_range) *definite_filter,
293 		     ARRAY_TYPE(seq_range) *maybe_dest,
294 		     const ARRAY_TYPE(seq_range) *maybe_filter)
295 {
296 	T_BEGIN {
297 		fts_merge_maybies(maybe_dest, definite_dest,
298 				  maybe_filter, definite_filter);
299 	} T_END;
300 	/* keep only what exists in both lists. the rest is in
301 	   maybies or not wanted */
302 	seq_range_array_intersect(definite_dest, definite_filter);
303 }
304 
fts_backend_default_can_lookup(struct fts_backend * backend,const struct mail_search_arg * args)305 bool fts_backend_default_can_lookup(struct fts_backend *backend,
306 				    const struct mail_search_arg *args)
307 {
308 	for (; args != NULL; args = args->next) {
309 		switch (args->type) {
310 		case SEARCH_OR:
311 		case SEARCH_SUB:
312 		case SEARCH_INTHREAD:
313 			if (fts_backend_default_can_lookup(backend,
314 							   args->value.subargs))
315 				return TRUE;
316 			break;
317 		case SEARCH_HEADER:
318 		case SEARCH_HEADER_ADDRESS:
319 		case SEARCH_HEADER_COMPRESS_LWSP:
320 		case SEARCH_BODY:
321 		case SEARCH_TEXT:
322 			if (!args->no_fts)
323 				return TRUE;
324 			break;
325 		default:
326 			break;
327 		}
328 	}
329 	return FALSE;
330 }
331 
fts_backend_can_lookup(struct fts_backend * backend,const struct mail_search_arg * args)332 bool fts_backend_can_lookup(struct fts_backend *backend,
333 			    const struct mail_search_arg *args)
334 {
335 	return backend->v.can_lookup(backend, args);
336 }
337 
fts_score_map_sort(const struct fts_score_map * m1,const struct fts_score_map * m2)338 static int fts_score_map_sort(const struct fts_score_map *m1,
339 			      const struct fts_score_map *m2)
340 {
341 	if (m1->uid < m2->uid)
342 		return -1;
343 	if (m1->uid > m2->uid)
344 		return 1;
345 	return 0;
346 }
347 
fts_backend_lookup(struct fts_backend * backend,struct mailbox * box,struct mail_search_arg * args,enum fts_lookup_flags flags,struct fts_result * result)348 int fts_backend_lookup(struct fts_backend *backend, struct mailbox *box,
349 		       struct mail_search_arg *args,
350 		       enum fts_lookup_flags flags,
351 		       struct fts_result *result)
352 {
353 	array_clear(&result->definite_uids);
354 	array_clear(&result->maybe_uids);
355 	array_clear(&result->scores);
356 
357 	if (backend->v.lookup(backend, box, args, flags, result) < 0)
358 		return -1;
359 
360 	if (!result->scores_sorted && array_is_created(&result->scores)) {
361 		array_sort(&result->scores, fts_score_map_sort);
362 		result->scores_sorted = TRUE;
363 	}
364 	return 0;
365 }
366 
fts_backend_lookup_multi(struct fts_backend * backend,struct mailbox * const boxes[],struct mail_search_arg * args,enum fts_lookup_flags flags,struct fts_multi_result * result)367 int fts_backend_lookup_multi(struct fts_backend *backend,
368 			     struct mailbox *const boxes[],
369 			     struct mail_search_arg *args,
370 			     enum fts_lookup_flags flags,
371 			     struct fts_multi_result *result)
372 {
373 	unsigned int i;
374 
375 	i_assert(boxes[0] != NULL);
376 
377 	if (backend->v.lookup_multi != NULL) {
378 		if (backend->v.lookup_multi(backend, boxes, args,
379 					    flags, result) < 0)
380 			return -1;
381 		if (result->box_results == NULL) {
382 			result->box_results = p_new(result->pool,
383 						    struct fts_result, 1);
384 		}
385 		return 0;
386 	}
387 
388 	for (i = 0; boxes[i] != NULL; i++) ;
389 	result->box_results = p_new(result->pool, struct fts_result, i+1);
390 
391 	for (i = 0; boxes[i] != NULL; i++) {
392 		struct fts_result *box_result = &result->box_results[i];
393 
394 		p_array_init(&box_result->definite_uids, result->pool, 32);
395 		p_array_init(&box_result->maybe_uids, result->pool, 32);
396 		p_array_init(&box_result->scores, result->pool, 32);
397 		if (backend->v.lookup(backend, boxes[i], args,
398 				      flags, box_result) < 0)
399 			return -1;
400 	}
401 	return 0;
402 }
403 
fts_backend_lookup_done(struct fts_backend * backend)404 void fts_backend_lookup_done(struct fts_backend *backend)
405 {
406 	if (backend->v.lookup_done != NULL)
407 		backend->v.lookup_done(backend);
408 }
409 
fts_index_get_ext_id(struct mailbox * box)410 static uint32_t fts_index_get_ext_id(struct mailbox *box)
411 {
412 	return mail_index_ext_register(box->index, "fts",
413 				       sizeof(struct fts_index_header),
414 				       0, 0);
415 }
416 
fts_index_get_header(struct mailbox * box,struct fts_index_header * hdr_r)417 bool fts_index_get_header(struct mailbox *box, struct fts_index_header *hdr_r)
418 {
419 	struct mail_index_view *view;
420 	const void *data;
421 	size_t data_size;
422 	bool ret;
423 
424 	mail_index_refresh(box->index);
425 	view = mail_index_view_open(box->index);
426 	mail_index_get_header_ext(view, fts_index_get_ext_id(box),
427 				  &data, &data_size);
428 	if (data_size < sizeof(*hdr_r)) {
429 		i_zero(hdr_r);
430 		ret = FALSE;
431 	} else {
432 		memcpy(hdr_r, data, sizeof(*hdr_r));
433 		ret = TRUE;
434 	}
435 	mail_index_view_close(&view);
436 	return ret;
437 }
438 
fts_index_set_header(struct mailbox * box,const struct fts_index_header * hdr)439 int fts_index_set_header(struct mailbox *box,
440 			 const struct fts_index_header *hdr)
441 {
442 	struct mail_index_transaction *trans;
443 	uint32_t ext_id = fts_index_get_ext_id(box);
444 
445 	trans = mail_index_transaction_begin(box->view, 0);
446 	mail_index_update_header_ext(trans, ext_id, 0, hdr, sizeof(*hdr));
447 	return mail_index_transaction_commit(&trans);
448 }
449 
fts_index_set_last_uid(struct mailbox * box,uint32_t last_uid)450 int fts_index_set_last_uid(struct mailbox *box, uint32_t last_uid)
451 {
452 	struct fts_index_header hdr;
453 
454 	(void)fts_index_get_header(box, &hdr);
455 	hdr.last_indexed_uid = last_uid;
456 	return fts_index_set_header(box, &hdr);
457 }
458 
fts_index_have_compatible_settings(struct mailbox_list * list,uint32_t checksum)459 int fts_index_have_compatible_settings(struct mailbox_list *list,
460 				       uint32_t checksum)
461 {
462 	struct mail_namespace *ns = mailbox_list_get_namespace(list);
463 	struct mailbox *box;
464 	struct fts_index_header hdr;
465 	const char *vname;
466 	size_t len;
467 	int ret;
468 
469 	if ((ns->flags & NAMESPACE_FLAG_INBOX_USER) != 0)
470 		vname = "INBOX";
471 	else {
472 		len = strlen(ns->prefix);
473 		if (len > 0 && ns->prefix[len-1] == mail_namespace_get_sep(ns))
474 			len--;
475 		vname = t_strndup(ns->prefix, len);
476 	}
477 
478 	box = mailbox_alloc(list, vname, 0);
479 	if (mailbox_sync(box, (enum mailbox_sync_flags)0) < 0) {
480 		i_error("fts: Failed to sync mailbox %s: %s", vname,
481 			mailbox_get_last_internal_error(box, NULL));
482 		ret = -1;
483 	} else {
484 		ret = fts_index_get_header(box, &hdr) &&
485 			hdr.settings_checksum == checksum ? 1 : 0;
486 	}
487 	mailbox_free(&box);
488 	return ret;
489 }
490 
491 static const char *indexed_headers[] = {
492 	"From", "To", "Cc", "Bcc", "Subject"
493 };
494 
fts_header_want_indexed(const char * hdr_name)495 bool fts_header_want_indexed(const char *hdr_name)
496 {
497 	unsigned int i;
498 
499 	for (i = 0; i < N_ELEMENTS(indexed_headers); i++) {
500 		if (strcasecmp(hdr_name, indexed_headers[i]) == 0)
501 			return TRUE;
502 	}
503 	return FALSE;
504 }
505 
fts_header_has_language(const char * hdr_name)506 bool fts_header_has_language(const char *hdr_name)
507 {
508 	/* FIXME: should email address headers be detected as different
509 	   languages? That mainly contains people's names.. */
510 	/*if (message_header_is_address(hdr_name))
511 		return TRUE;*/
512 
513 	/* Subject definitely contains language-specific data that can be
514 	   detected. Comment and Keywords headers also could contain, although
515 	   just about nobody uses those headers.
516 
517 	   For now we assume that other headers contain non-language specific
518 	   data that we don't want to filter in special ways. For example
519 	   it is good to be able to search for Message-IDs. */
520 	return strcasecmp(hdr_name, "Subject") == 0 ||
521 		strcasecmp(hdr_name, "Comments") == 0 ||
522 		strcasecmp(hdr_name, "Keywords") == 0;
523 }
524 
fts_mailbox_get_guid(struct mailbox * box,const char ** guid_r)525 int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r)
526 {
527 	struct mailbox_metadata metadata;
528 
529 	if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID, &metadata) < 0)
530 		return -1;
531 
532 	*guid_r = guid_128_to_string(metadata.guid);
533 	return 0;
534 }
535