1 /* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */
2
3 #include "lib.h"
4 #include "module-context.h"
5 #include "mail-user.h"
6 #include "mail-storage-private.h"
7 #include "mailbox-match-plugin.h"
8 #include "fts-language.h"
9 #include "fts-filter.h"
10 #include "fts-tokenizer.h"
11 #include "fts-user.h"
12
13 #define FTS_USER_CONTEXT(obj) \
14 MODULE_CONTEXT(obj, fts_user_module)
15
16 struct fts_user {
17 union mail_user_module_context module_ctx;
18 int refcount;
19
20 struct fts_language_list *lang_list;
21 struct fts_user_language *data_lang;
22 ARRAY_TYPE(fts_user_language) languages, data_languages;
23
24 struct mailbox_match_plugin *autoindex_exclude;
25 };
26
27 static MODULE_CONTEXT_DEFINE_INIT(fts_user_module,
28 &mail_user_module_register);
29
str_keyvalues_to_array(const char * str)30 static const char *const *str_keyvalues_to_array(const char *str)
31 {
32 const char *key, *value, *const *keyvalues;
33 ARRAY_TYPE(const_string) arr;
34 unsigned int i;
35
36 if (str == NULL)
37 return NULL;
38
39 t_array_init(&arr, 8);
40 keyvalues = t_strsplit_spaces(str, " ");
41 for (i = 0; keyvalues[i] != NULL; i++) {
42 value = strchr(keyvalues[i], '=');
43 if (value != NULL)
44 key = t_strdup_until(keyvalues[i], value++);
45 else {
46 key = keyvalues[i];
47 value = "";
48 }
49 array_push_back(&arr, &key);
50 array_push_back(&arr, &value);
51 }
52 array_append_zero(&arr);
53 return array_front(&arr);
54 }
55
56 static int
fts_user_init_languages(struct mail_user * user,struct fts_user * fuser,const char ** error_r)57 fts_user_init_languages(struct mail_user *user, struct fts_user *fuser,
58 const char **error_r)
59 {
60 const char *languages, *unknown;
61 const char *lang_config[3] = {NULL, NULL, NULL};
62
63 languages = mail_user_plugin_getenv(user, "fts_languages");
64 if (languages == NULL) {
65 *error_r = "fts_languages setting is missing";
66 return -1;
67 }
68
69 lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config");
70 if (lang_config[1] != NULL)
71 lang_config[0] = "fts_language_config";
72 if (fts_language_list_init(lang_config, &fuser->lang_list, error_r) < 0)
73 return -1;
74
75 if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) {
76 *error_r = t_strdup_printf(
77 "fts_languages: Unknown language '%s'", unknown);
78 return -1;
79 }
80 if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) {
81 *error_r = "fts_languages setting is empty";
82 return -1;
83 }
84 return 0;
85 }
86
87 static int
fts_user_create_filters(struct mail_user * user,const struct fts_language * lang,struct fts_filter ** filter_r,const char ** error_r)88 fts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
89 struct fts_filter **filter_r, const char **error_r)
90 {
91 const struct fts_filter *filter_class;
92 struct fts_filter *filter = NULL, *parent = NULL;
93 const char *filters_key, *const *filters, *filter_set_name;
94 const char *str, *error, *set_key;
95 unsigned int i;
96 int ret = 0;
97
98 /* try to get the language-specific filters first */
99 filters_key = t_strconcat("fts_filters_", lang->name, NULL);
100 str = mail_user_plugin_getenv(user, filters_key);
101 if (str == NULL) {
102 /* fallback to global filters */
103 filters_key = "fts_filters";
104 str = mail_user_plugin_getenv(user, filters_key);
105 if (str == NULL) {
106 /* No filters */
107 *filter_r = NULL;
108 return 0;
109 }
110 }
111
112 filters = t_strsplit_spaces(str, " ");
113 for (i = 0; filters[i] != NULL; i++) {
114 filter_class = fts_filter_find(filters[i]);
115 if (filter_class == NULL) {
116 *error_r = t_strdup_printf("%s: Unknown filter '%s'",
117 filters_key, filters[i]);
118 ret = -1;
119 break;
120 }
121
122 /* try the language-specific setting first */
123 filter_set_name = t_str_replace(filters[i], '-', '_');
124 set_key = t_strdup_printf("fts_filter_%s_%s",
125 lang->name, filter_set_name);
126 str = mail_user_plugin_getenv(user, set_key);
127 if (str == NULL) {
128 set_key = t_strdup_printf("fts_filter_%s", filter_set_name);
129 str = mail_user_plugin_getenv(user, set_key);
130 }
131
132 if (fts_filter_create(filter_class, parent, lang,
133 str_keyvalues_to_array(str),
134 &filter, &error) < 0) {
135 *error_r = t_strdup_printf("%s: %s", set_key, error);
136 ret = -1;
137 break;
138 }
139 if (parent != NULL)
140 fts_filter_unref(&parent);
141 parent = filter;
142 }
143 if (ret < 0) {
144 if (parent != NULL)
145 fts_filter_unref(&parent);
146 return -1;
147 }
148 *filter_r = filter;
149 return 0;
150 }
151
152 static int
fts_user_create_tokenizer(struct mail_user * user,const struct fts_language * lang,struct fts_tokenizer ** tokenizer_r,bool search,const char ** error_r)153 fts_user_create_tokenizer(struct mail_user *user,
154 const struct fts_language *lang,
155 struct fts_tokenizer **tokenizer_r, bool search,
156 const char **error_r)
157 {
158 const struct fts_tokenizer *tokenizer_class;
159 struct fts_tokenizer *tokenizer = NULL, *parent = NULL;
160 const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name;
161 const char *str, *error, *set_key;
162 unsigned int i;
163 int ret = 0;
164
165 tokenizers_key = t_strconcat("fts_tokenizers_", lang->name, NULL);
166 str = mail_user_plugin_getenv(user, tokenizers_key);
167 if (str == NULL) {
168 str = mail_user_plugin_getenv(user, "fts_tokenizers");
169 if (str == NULL) {
170 *error_r = t_strdup_printf("%s or fts_tokenizers setting must exist", tokenizers_key);
171 return -1;
172 }
173 tokenizers_key = "fts_tokenizers";
174 }
175
176 tokenizers = t_strsplit_spaces(str, " ");
177
178 for (i = 0; tokenizers[i] != NULL; i++) {
179 tokenizer_class = fts_tokenizer_find(tokenizers[i]);
180 if (tokenizer_class == NULL) {
181 *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'",
182 tokenizers_key, tokenizers[i]);
183 ret = -1;
184 break;
185 }
186
187 tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_');
188 set_key = t_strdup_printf("fts_tokenizer_%s_%s", tokenizer_set_name, lang->name);
189 str = mail_user_plugin_getenv(user, set_key);
190 if (str == NULL) {
191 set_key = t_strdup_printf("fts_tokenizer_%s", tokenizer_set_name);
192 str = mail_user_plugin_getenv(user, set_key);
193 }
194
195 /* tell the tokenizers that we're tokenizing a search string
196 (instead of tokenizing indexed data) */
197 if (search)
198 str = t_strconcat("search=yes ", str, NULL);
199
200 if (fts_tokenizer_create(tokenizer_class, parent,
201 str_keyvalues_to_array(str),
202 &tokenizer, &error) < 0) {
203 *error_r = t_strdup_printf("%s: %s", set_key, error);
204 ret = -1;
205 break;
206 }
207 if (parent != NULL)
208 fts_tokenizer_unref(&parent);
209 parent = tokenizer;
210 }
211 if (ret < 0) {
212 if (parent != NULL)
213 fts_tokenizer_unref(&parent);
214 return -1;
215 }
216 *tokenizer_r = tokenizer;
217 return 0;
218 }
219
220 static int
fts_user_language_init_tokenizers(struct mail_user * user,struct fts_user_language * user_lang,const char ** error_r)221 fts_user_language_init_tokenizers(struct mail_user *user,
222 struct fts_user_language *user_lang,
223 const char **error_r)
224 {
225 if (fts_user_create_tokenizer(user, user_lang->lang,
226 &user_lang->index_tokenizer, FALSE,
227 error_r) < 0)
228 return -1;
229
230 if (fts_user_create_tokenizer(user, user_lang->lang,
231 &user_lang->search_tokenizer, TRUE,
232 error_r) < 0)
233 return -1;
234 return 0;
235 }
236
237 struct fts_user_language *
fts_user_language_find(struct mail_user * user,const struct fts_language * lang)238 fts_user_language_find(struct mail_user *user,
239 const struct fts_language *lang)
240 {
241 struct fts_user_language *user_lang;
242 struct fts_user *fuser = FTS_USER_CONTEXT(user);
243
244 i_assert(fuser != NULL);
245 array_foreach_elem(&fuser->languages, user_lang) {
246 if (strcmp(user_lang->lang->name, lang->name) == 0)
247 return user_lang;
248 }
249 return NULL;
250 }
251
fts_user_language_create(struct mail_user * user,struct fts_user * fuser,const struct fts_language * lang,const char ** error_r)252 static int fts_user_language_create(struct mail_user *user,
253 struct fts_user *fuser,
254 const struct fts_language *lang,
255 const char **error_r)
256 {
257 struct fts_user_language *user_lang;
258
259 user_lang = p_new(user->pool, struct fts_user_language, 1);
260 user_lang->lang = lang;
261 array_push_back(&fuser->languages, &user_lang);
262
263 if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
264 return -1;
265 if (fts_user_create_filters(user, lang, &user_lang->filter, error_r) < 0)
266 return -1;
267 return 0;
268 }
269
fts_user_languages_fill_all(struct mail_user * user,struct fts_user * fuser,const char ** error_r)270 static int fts_user_languages_fill_all(struct mail_user *user,
271 struct fts_user *fuser,
272 const char **error_r)
273 {
274 const struct fts_language *lang;
275
276 array_foreach_elem(fts_language_list_get_all(fuser->lang_list), lang) {
277 if (fts_user_language_create(user, fuser, lang, error_r) < 0)
278 return -1;
279 }
280 return 0;
281 }
282
283 static int
fts_user_init_data_language(struct mail_user * user,struct fts_user * fuser,const char ** error_r)284 fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser,
285 const char **error_r)
286 {
287 struct fts_user_language *user_lang;
288 const char *error;
289
290 user_lang = p_new(user->pool, struct fts_user_language, 1);
291 user_lang->lang = &fts_language_data;
292
293 if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
294 return -1;
295
296 if (fts_filter_create(fts_filter_lowercase, NULL, user_lang->lang, NULL,
297 &user_lang->filter, &error) < 0)
298 i_unreached();
299 i_assert(user_lang->filter != NULL);
300
301 p_array_init(&fuser->data_languages, user->pool, 1);
302 array_push_back(&fuser->data_languages, &user_lang);
303 array_push_back(&fuser->languages, &user_lang);
304
305 fuser->data_lang = user_lang;
306 return 0;
307 }
308
fts_user_get_language_list(struct mail_user * user)309 struct fts_language_list *fts_user_get_language_list(struct mail_user *user)
310 {
311 struct fts_user *fuser = FTS_USER_CONTEXT(user);
312
313 i_assert(fuser != NULL);
314 return fuser->lang_list;
315 }
316
ARRAY_TYPE(fts_user_language)317 const ARRAY_TYPE(fts_user_language) *
318 fts_user_get_all_languages(struct mail_user *user)
319 {
320 struct fts_user *fuser = FTS_USER_CONTEXT(user);
321
322 i_assert(fuser != NULL);
323 return &fuser->languages;
324 }
325
ARRAY_TYPE(fts_user_language)326 const ARRAY_TYPE(fts_user_language) *
327 fts_user_get_data_languages(struct mail_user *user)
328 {
329 struct fts_user *fuser = FTS_USER_CONTEXT(user);
330
331 i_assert(fuser != NULL);
332 return &fuser->data_languages;
333 }
334
fts_user_get_data_lang(struct mail_user * user)335 struct fts_user_language *fts_user_get_data_lang(struct mail_user *user)
336 {
337 struct fts_user *fuser = FTS_USER_CONTEXT(user);
338
339 i_assert(fuser != NULL);
340 return fuser->data_lang;
341 }
342
fts_user_autoindex_exclude(struct mailbox * box)343 bool fts_user_autoindex_exclude(struct mailbox *box)
344 {
345 struct fts_user *fuser = FTS_USER_CONTEXT(box->storage->user);
346
347 return mailbox_match_plugin_exclude(fuser->autoindex_exclude, box);
348 }
349
fts_user_language_free(struct fts_user_language * user_lang)350 static void fts_user_language_free(struct fts_user_language *user_lang)
351 {
352 if (user_lang->filter != NULL)
353 fts_filter_unref(&user_lang->filter);
354 if (user_lang->index_tokenizer != NULL)
355 fts_tokenizer_unref(&user_lang->index_tokenizer);
356 if (user_lang->search_tokenizer != NULL)
357 fts_tokenizer_unref(&user_lang->search_tokenizer);
358 }
359
fts_user_free(struct fts_user * fuser)360 static void fts_user_free(struct fts_user *fuser)
361 {
362 struct fts_user_language *user_lang;
363
364 if (fuser->lang_list != NULL)
365 fts_language_list_deinit(&fuser->lang_list);
366
367 if (array_is_created(&fuser->languages)) {
368 array_foreach_elem(&fuser->languages, user_lang)
369 fts_user_language_free(user_lang);
370 }
371 mailbox_match_plugin_deinit(&fuser->autoindex_exclude);
372 }
373
374 static int
fts_mail_user_init_libfts(struct mail_user * user,struct fts_user * fuser,const char ** error_r)375 fts_mail_user_init_libfts(struct mail_user *user, struct fts_user *fuser,
376 const char **error_r)
377 {
378 p_array_init(&fuser->languages, user->pool, 4);
379
380 if (fts_user_init_languages(user, fuser, error_r) < 0 ||
381 fts_user_init_data_language(user, fuser, error_r) < 0)
382 return -1;
383 if (fts_user_languages_fill_all(user, fuser, error_r) < 0)
384 return -1;
385 return 0;
386 }
387
fts_mail_user_init(struct mail_user * user,bool initialize_libfts,const char ** error_r)388 int fts_mail_user_init(struct mail_user *user, bool initialize_libfts,
389 const char **error_r)
390 {
391 struct fts_user *fuser = FTS_USER_CONTEXT(user);
392
393 if (fuser != NULL) {
394 /* multiple fts plugins are loaded */
395 fuser->refcount++;
396 return 0;
397 }
398
399 fuser = p_new(user->pool, struct fts_user, 1);
400 fuser->refcount = 1;
401 if (initialize_libfts) {
402 if (fts_mail_user_init_libfts(user, fuser, error_r) < 0) {
403 fts_user_free(fuser);
404 return -1;
405 }
406 }
407 fuser->autoindex_exclude =
408 mailbox_match_plugin_init(user, "fts_autoindex_exclude");
409
410 MODULE_CONTEXT_SET(user, fts_user_module, fuser);
411 return 0;
412 }
413
fts_mail_user_deinit(struct mail_user * user)414 void fts_mail_user_deinit(struct mail_user *user)
415 {
416 struct fts_user *fuser = FTS_USER_CONTEXT(user);
417
418 if (fuser != NULL) {
419 i_assert(fuser->refcount > 0);
420 if (--fuser->refcount == 0)
421 fts_user_free(fuser);
422 }
423 }
424