1 /* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */
2 
3 #include "lib.h"
4 #include "module-context.h"
5 #include "mail-user.h"
6 #include "mail-storage-private.h"
7 #include "mailbox-match-plugin.h"
8 #include "fts-language.h"
9 #include "fts-filter.h"
10 #include "fts-tokenizer.h"
11 #include "fts-user.h"
12 
13 #define FTS_USER_CONTEXT(obj) \
14 	MODULE_CONTEXT(obj, fts_user_module)
15 
16 struct fts_user {
17 	union mail_user_module_context module_ctx;
18 	int refcount;
19 
20 	struct fts_language_list *lang_list;
21 	struct fts_user_language *data_lang;
22 	ARRAY_TYPE(fts_user_language) languages, data_languages;
23 
24 	struct mailbox_match_plugin *autoindex_exclude;
25 };
26 
27 static MODULE_CONTEXT_DEFINE_INIT(fts_user_module,
28 				  &mail_user_module_register);
29 
str_keyvalues_to_array(const char * str)30 static const char *const *str_keyvalues_to_array(const char *str)
31 {
32 	const char *key, *value, *const *keyvalues;
33 	ARRAY_TYPE(const_string) arr;
34 	unsigned int i;
35 
36 	if (str == NULL)
37 		return NULL;
38 
39 	t_array_init(&arr, 8);
40 	keyvalues = t_strsplit_spaces(str, " ");
41 	for (i = 0; keyvalues[i] != NULL; i++) {
42 		value = strchr(keyvalues[i], '=');
43 		if (value != NULL)
44 			key = t_strdup_until(keyvalues[i], value++);
45 		else {
46 			key = keyvalues[i];
47 			value = "";
48 		}
49 		array_push_back(&arr, &key);
50 		array_push_back(&arr, &value);
51 	}
52 	array_append_zero(&arr);
53 	return array_front(&arr);
54 }
55 
56 static int
fts_user_init_languages(struct mail_user * user,struct fts_user * fuser,const char ** error_r)57 fts_user_init_languages(struct mail_user *user, struct fts_user *fuser,
58 			const char **error_r)
59 {
60 	const char *languages, *unknown;
61 	const char *lang_config[3] = {NULL, NULL, NULL};
62 
63 	languages = mail_user_plugin_getenv(user, "fts_languages");
64 	if (languages == NULL) {
65 		*error_r = "fts_languages setting is missing";
66 		return -1;
67 	}
68 
69 	lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config");
70 	if (lang_config[1] != NULL)
71 		lang_config[0] = "fts_language_config";
72 	if (fts_language_list_init(lang_config, &fuser->lang_list, error_r) < 0)
73 		return -1;
74 
75 	if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) {
76 		*error_r = t_strdup_printf(
77 			"fts_languages: Unknown language '%s'", unknown);
78 		return -1;
79 	}
80 	if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) {
81 		*error_r = "fts_languages setting is empty";
82 		return -1;
83 	}
84 	return 0;
85 }
86 
87 static int
fts_user_create_filters(struct mail_user * user,const struct fts_language * lang,struct fts_filter ** filter_r,const char ** error_r)88 fts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
89 			struct fts_filter **filter_r, const char **error_r)
90 {
91 	const struct fts_filter *filter_class;
92 	struct fts_filter *filter = NULL, *parent = NULL;
93 	const char *filters_key, *const *filters, *filter_set_name;
94 	const char *str, *error, *set_key;
95 	unsigned int i;
96 	int ret = 0;
97 
98 	/* try to get the language-specific filters first */
99 	filters_key = t_strconcat("fts_filters_", lang->name, NULL);
100 	str = mail_user_plugin_getenv(user, filters_key);
101 	if (str == NULL) {
102 		/* fallback to global filters */
103 		filters_key = "fts_filters";
104 		str = mail_user_plugin_getenv(user, filters_key);
105 		if (str == NULL) {
106 			/* No filters */
107 			*filter_r = NULL;
108 			return 0;
109 		}
110 	}
111 
112 	filters = t_strsplit_spaces(str, " ");
113 	for (i = 0; filters[i] != NULL; i++) {
114 		filter_class = fts_filter_find(filters[i]);
115 		if (filter_class == NULL) {
116 			*error_r = t_strdup_printf("%s: Unknown filter '%s'",
117 						   filters_key, filters[i]);
118 			ret = -1;
119 			break;
120 		}
121 
122 		/* try the language-specific setting first */
123 		filter_set_name = t_str_replace(filters[i], '-', '_');
124 		set_key = t_strdup_printf("fts_filter_%s_%s",
125 					  lang->name, filter_set_name);
126 		str = mail_user_plugin_getenv(user, set_key);
127 		if (str == NULL) {
128 			set_key = t_strdup_printf("fts_filter_%s", filter_set_name);
129 			str = mail_user_plugin_getenv(user, set_key);
130 		}
131 
132 		if (fts_filter_create(filter_class, parent, lang,
133 				      str_keyvalues_to_array(str),
134 				      &filter, &error) < 0) {
135 			*error_r = t_strdup_printf("%s: %s", set_key, error);
136 			ret = -1;
137 			break;
138 		}
139 		if (parent != NULL)
140 			fts_filter_unref(&parent);
141 		parent = filter;
142 	}
143 	if (ret < 0) {
144 		if (parent != NULL)
145 			fts_filter_unref(&parent);
146 		return -1;
147 	}
148 	*filter_r = filter;
149 	return 0;
150 }
151 
152 static int
fts_user_create_tokenizer(struct mail_user * user,const struct fts_language * lang,struct fts_tokenizer ** tokenizer_r,bool search,const char ** error_r)153 fts_user_create_tokenizer(struct mail_user *user,
154 			  const struct fts_language *lang,
155 			  struct fts_tokenizer **tokenizer_r, bool search,
156 			  const char **error_r)
157 {
158 	const struct fts_tokenizer *tokenizer_class;
159 	struct fts_tokenizer *tokenizer = NULL, *parent = NULL;
160 	const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name;
161 	const char *str, *error, *set_key;
162 	unsigned int i;
163 	int ret = 0;
164 
165 	tokenizers_key = t_strconcat("fts_tokenizers_", lang->name, NULL);
166 	str = mail_user_plugin_getenv(user, tokenizers_key);
167 	if (str == NULL) {
168 		str = mail_user_plugin_getenv(user, "fts_tokenizers");
169 		if (str == NULL) {
170 			*error_r = t_strdup_printf("%s or fts_tokenizers setting must exist", tokenizers_key);
171 			return -1;
172 		}
173 		tokenizers_key = "fts_tokenizers";
174 	}
175 
176 	tokenizers = t_strsplit_spaces(str, " ");
177 
178 	for (i = 0; tokenizers[i] != NULL; i++) {
179 		tokenizer_class = fts_tokenizer_find(tokenizers[i]);
180 		if (tokenizer_class == NULL) {
181 			*error_r = t_strdup_printf("%s: Unknown tokenizer '%s'",
182 						   tokenizers_key, tokenizers[i]);
183 			ret = -1;
184 			break;
185 		}
186 
187 		tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_');
188 		set_key = t_strdup_printf("fts_tokenizer_%s_%s", tokenizer_set_name, lang->name);
189 		str = mail_user_plugin_getenv(user, set_key);
190 		if (str == NULL) {
191 			set_key = t_strdup_printf("fts_tokenizer_%s", tokenizer_set_name);
192 			str = mail_user_plugin_getenv(user, set_key);
193 		}
194 
195 		/* tell the tokenizers that we're tokenizing a search string
196 		   (instead of tokenizing indexed data) */
197 		if (search)
198 			str = t_strconcat("search=yes ", str, NULL);
199 
200 		if (fts_tokenizer_create(tokenizer_class, parent,
201 					 str_keyvalues_to_array(str),
202 					 &tokenizer, &error) < 0) {
203 			*error_r = t_strdup_printf("%s: %s", set_key, error);
204 			ret = -1;
205 			break;
206 		}
207 		if (parent != NULL)
208 			fts_tokenizer_unref(&parent);
209 		parent = tokenizer;
210 	}
211 	if (ret < 0) {
212 		if (parent != NULL)
213 			fts_tokenizer_unref(&parent);
214 		return -1;
215 	}
216 	*tokenizer_r = tokenizer;
217 	return 0;
218 }
219 
220 static int
fts_user_language_init_tokenizers(struct mail_user * user,struct fts_user_language * user_lang,const char ** error_r)221 fts_user_language_init_tokenizers(struct mail_user *user,
222 				  struct fts_user_language *user_lang,
223 				  const char **error_r)
224 {
225 	if (fts_user_create_tokenizer(user, user_lang->lang,
226 				      &user_lang->index_tokenizer, FALSE,
227 	                              error_r) < 0)
228 		return -1;
229 
230 	if (fts_user_create_tokenizer(user, user_lang->lang,
231 				      &user_lang->search_tokenizer, TRUE,
232 	                              error_r) < 0)
233 		return -1;
234 	return 0;
235 }
236 
237 struct fts_user_language *
fts_user_language_find(struct mail_user * user,const struct fts_language * lang)238 fts_user_language_find(struct mail_user *user,
239 		       const struct fts_language *lang)
240 {
241 	struct fts_user_language *user_lang;
242 	struct fts_user *fuser = FTS_USER_CONTEXT(user);
243 
244 	i_assert(fuser != NULL);
245 	array_foreach_elem(&fuser->languages, user_lang) {
246 		if (strcmp(user_lang->lang->name, lang->name) == 0)
247 			return user_lang;
248 	}
249 	return NULL;
250 }
251 
fts_user_language_create(struct mail_user * user,struct fts_user * fuser,const struct fts_language * lang,const char ** error_r)252 static int fts_user_language_create(struct mail_user *user,
253                                     struct fts_user *fuser,
254 				    const struct fts_language *lang,
255 				    const char **error_r)
256 {
257 	struct fts_user_language *user_lang;
258 
259 	user_lang = p_new(user->pool, struct fts_user_language, 1);
260 	user_lang->lang = lang;
261 	array_push_back(&fuser->languages, &user_lang);
262 
263 	if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
264 		return -1;
265 	if (fts_user_create_filters(user, lang, &user_lang->filter, error_r) < 0)
266 		return -1;
267 	return 0;
268 }
269 
fts_user_languages_fill_all(struct mail_user * user,struct fts_user * fuser,const char ** error_r)270 static int fts_user_languages_fill_all(struct mail_user *user,
271                                        struct fts_user *fuser,
272                                        const char **error_r)
273 {
274 	const struct fts_language *lang;
275 
276 	array_foreach_elem(fts_language_list_get_all(fuser->lang_list), lang) {
277 		if (fts_user_language_create(user, fuser, lang, error_r) < 0)
278 			return -1;
279 	}
280 	return 0;
281 }
282 
283 static int
fts_user_init_data_language(struct mail_user * user,struct fts_user * fuser,const char ** error_r)284 fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser,
285 			    const char **error_r)
286 {
287 	struct fts_user_language *user_lang;
288 	const char *error;
289 
290 	user_lang = p_new(user->pool, struct fts_user_language, 1);
291 	user_lang->lang = &fts_language_data;
292 
293 	if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
294 		return -1;
295 
296 	if (fts_filter_create(fts_filter_lowercase, NULL, user_lang->lang, NULL,
297 			      &user_lang->filter, &error) < 0)
298 		i_unreached();
299 	i_assert(user_lang->filter != NULL);
300 
301 	p_array_init(&fuser->data_languages, user->pool, 1);
302 	array_push_back(&fuser->data_languages, &user_lang);
303 	array_push_back(&fuser->languages, &user_lang);
304 
305 	fuser->data_lang = user_lang;
306 	return 0;
307 }
308 
fts_user_get_language_list(struct mail_user * user)309 struct fts_language_list *fts_user_get_language_list(struct mail_user *user)
310 {
311 	struct fts_user *fuser = FTS_USER_CONTEXT(user);
312 
313 	i_assert(fuser != NULL);
314 	return fuser->lang_list;
315 }
316 
ARRAY_TYPE(fts_user_language)317 const ARRAY_TYPE(fts_user_language) *
318 fts_user_get_all_languages(struct mail_user *user)
319 {
320 	struct fts_user *fuser = FTS_USER_CONTEXT(user);
321 
322 	i_assert(fuser != NULL);
323 	return &fuser->languages;
324 }
325 
ARRAY_TYPE(fts_user_language)326 const ARRAY_TYPE(fts_user_language) *
327 fts_user_get_data_languages(struct mail_user *user)
328 {
329 	struct fts_user *fuser = FTS_USER_CONTEXT(user);
330 
331 	i_assert(fuser != NULL);
332 	return &fuser->data_languages;
333 }
334 
fts_user_get_data_lang(struct mail_user * user)335 struct fts_user_language *fts_user_get_data_lang(struct mail_user *user)
336 {
337 	struct fts_user *fuser = FTS_USER_CONTEXT(user);
338 
339 	i_assert(fuser != NULL);
340 	return fuser->data_lang;
341 }
342 
fts_user_autoindex_exclude(struct mailbox * box)343 bool fts_user_autoindex_exclude(struct mailbox *box)
344 {
345 	struct fts_user *fuser = FTS_USER_CONTEXT(box->storage->user);
346 
347 	return mailbox_match_plugin_exclude(fuser->autoindex_exclude, box);
348 }
349 
fts_user_language_free(struct fts_user_language * user_lang)350 static void fts_user_language_free(struct fts_user_language *user_lang)
351 {
352 	if (user_lang->filter != NULL)
353 		fts_filter_unref(&user_lang->filter);
354 	if (user_lang->index_tokenizer != NULL)
355 		fts_tokenizer_unref(&user_lang->index_tokenizer);
356 	if (user_lang->search_tokenizer != NULL)
357 		fts_tokenizer_unref(&user_lang->search_tokenizer);
358 }
359 
fts_user_free(struct fts_user * fuser)360 static void fts_user_free(struct fts_user *fuser)
361 {
362 	struct fts_user_language *user_lang;
363 
364 	if (fuser->lang_list != NULL)
365 		fts_language_list_deinit(&fuser->lang_list);
366 
367 	if (array_is_created(&fuser->languages)) {
368 		array_foreach_elem(&fuser->languages, user_lang)
369 			fts_user_language_free(user_lang);
370 	}
371 	mailbox_match_plugin_deinit(&fuser->autoindex_exclude);
372 }
373 
374 static int
fts_mail_user_init_libfts(struct mail_user * user,struct fts_user * fuser,const char ** error_r)375 fts_mail_user_init_libfts(struct mail_user *user, struct fts_user *fuser,
376 			  const char **error_r)
377 {
378 	p_array_init(&fuser->languages, user->pool, 4);
379 
380 	if (fts_user_init_languages(user, fuser, error_r) < 0 ||
381 	    fts_user_init_data_language(user, fuser, error_r) < 0)
382 		return -1;
383 	if (fts_user_languages_fill_all(user, fuser, error_r) < 0)
384 		return -1;
385 	return 0;
386 }
387 
fts_mail_user_init(struct mail_user * user,bool initialize_libfts,const char ** error_r)388 int fts_mail_user_init(struct mail_user *user, bool initialize_libfts,
389 		       const char **error_r)
390 {
391 	struct fts_user *fuser = FTS_USER_CONTEXT(user);
392 
393 	if (fuser != NULL) {
394 		/* multiple fts plugins are loaded */
395 		fuser->refcount++;
396 		return 0;
397 	}
398 
399 	fuser = p_new(user->pool, struct fts_user, 1);
400 	fuser->refcount = 1;
401 	if (initialize_libfts) {
402 		if (fts_mail_user_init_libfts(user, fuser, error_r) < 0) {
403 			fts_user_free(fuser);
404 			return -1;
405 		}
406 	}
407 	fuser->autoindex_exclude =
408 		mailbox_match_plugin_init(user, "fts_autoindex_exclude");
409 
410 	MODULE_CONTEXT_SET(user, fts_user_module, fuser);
411 	return 0;
412 }
413 
fts_mail_user_deinit(struct mail_user * user)414 void fts_mail_user_deinit(struct mail_user *user)
415 {
416 	struct fts_user *fuser = FTS_USER_CONTEXT(user);
417 
418 	if (fuser != NULL) {
419 		i_assert(fuser->refcount > 0);
420 		if (--fuser->refcount == 0)
421 			fts_user_free(fuser);
422 	}
423 }
424