1 /*-
2 * Copyright 2018 Vsevolod Stakhov
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "map_helpers.h"
18 #include "map_private.h"
19 #include "khash.h"
20 #include "radix.h"
21 #include "rspamd.h"
22 #include "cryptobox.h"
23 #include "mempool_vars_internal.h"
24 #include "contrib/fastutf8/fastutf8.h"
25 #include "contrib/cdb/cdb.h"
26
27 #ifdef WITH_HYPERSCAN
28 #include "hs.h"
29 #endif
30 #ifndef WITH_PCRE2
31 #include <pcre.h>
32 #else
33 #include <pcre2.h>
34 #endif
35
36
37 static const guint64 map_hash_seed = 0xdeadbabeULL;
38 static const gchar * const hash_fill = "1";
39
40 struct rspamd_map_helper_value {
41 gsize hits;
42 gconstpointer key;
43 gchar value[]; /* Null terminated */
44 };
45
46 #define rspamd_map_ftok_hash(t) (rspamd_icase_hash((t).begin, (t).len, rspamd_hash_seed ()))
47 #define rspamd_map_ftok_equal(a, b) ((a).len == (b).len && rspamd_lc_cmp((a).begin, (b).begin, (a).len) == 0)
48
49 KHASH_INIT (rspamd_map_hash, rspamd_ftok_t,
50 struct rspamd_map_helper_value *, true,
51 rspamd_map_ftok_hash, rspamd_map_ftok_equal);
52
53 struct rspamd_radix_map_helper {
54 rspamd_mempool_t *pool;
55 khash_t(rspamd_map_hash) *htb;
56 radix_compressed_t *trie;
57 struct rspamd_map *map;
58 rspamd_cryptobox_fast_hash_state_t hst;
59 };
60
61 struct rspamd_hash_map_helper {
62 rspamd_mempool_t *pool;
63 khash_t(rspamd_map_hash) *htb;
64 struct rspamd_map *map;
65 rspamd_cryptobox_fast_hash_state_t hst;
66 };
67
68 struct rspamd_cdb_map_helper {
69 GQueue cdbs;
70 struct rspamd_map *map;
71 rspamd_cryptobox_fast_hash_state_t hst;
72 gsize total_size;
73 };
74
75 struct rspamd_regexp_map_helper {
76 rspamd_cryptobox_hash_state_t hst;
77 guchar re_digest[rspamd_cryptobox_HASHBYTES];
78 rspamd_mempool_t *pool;
79 struct rspamd_map *map;
80 GPtrArray *regexps;
81 GPtrArray *values;
82 khash_t(rspamd_map_hash) *htb;
83 enum rspamd_regexp_map_flags map_flags;
84 #ifdef WITH_HYPERSCAN
85 hs_database_t *hs_db;
86 hs_scratch_t *hs_scratch;
87 gchar **patterns;
88 gint *flags;
89 gint *ids;
90 #endif
91 };
92
93 /**
94 * FSM for parsing lists
95 */
96
97 #define MAP_STORE_KEY do { \
98 while (g_ascii_isspace (*c) && p > c) { c ++; } \
99 key = g_malloc (p - c + 1); \
100 rspamd_strlcpy (key, c, p - c + 1); \
101 stripped_key = g_strstrip (key); \
102 } while (0)
103
104 #define MAP_STORE_VALUE do { \
105 while (g_ascii_isspace (*c) && p > c) { c ++; } \
106 value = g_malloc (p - c + 1); \
107 rspamd_strlcpy (value, c, p - c + 1); \
108 stripped_value = g_strstrip (value); \
109 } while (0)
110
111 gchar *
rspamd_parse_kv_list(gchar * chunk,gint len,struct map_cb_data * data,rspamd_map_insert_func func,const gchar * default_value,gboolean final)112 rspamd_parse_kv_list (
113 gchar * chunk,
114 gint len,
115 struct map_cb_data *data,
116 rspamd_map_insert_func func,
117 const gchar *default_value,
118 gboolean final)
119 {
120 enum {
121 map_skip_spaces_before_key = 0,
122 map_read_key,
123 map_read_key_quoted,
124 map_read_key_slashed,
125 map_skip_spaces_after_key,
126 map_backslash_quoted,
127 map_backslash_slashed,
128 map_read_key_after_slash,
129 map_read_value,
130 map_read_comment_start,
131 map_skip_comment,
132 map_read_eol,
133 };
134
135 gchar *c, *p, *key = NULL, *value = NULL, *stripped_key, *stripped_value, *end;
136 struct rspamd_map *map = data->map;
137 guint line_number = 0;
138
139 p = chunk;
140 c = p;
141 end = p + len;
142
143 while (p < end) {
144 switch (data->state) {
145 case map_skip_spaces_before_key:
146 if (g_ascii_isspace (*p)) {
147 p ++;
148 }
149 else {
150 if (*p == '"') {
151 p++;
152 c = p;
153 data->state = map_read_key_quoted;
154 }
155 else if (*p == '/') {
156 /* Note that c is on '/' here as '/' is a part of key */
157 c = p;
158 p++;
159 data->state = map_read_key_slashed;
160 }
161 else {
162 c = p;
163 data->state = map_read_key;
164 }
165 }
166 break;
167 case map_read_key:
168 /* read key */
169 /* Check here comments, eol and end of buffer */
170 if (*p == '#' && (p == c || *(p - 1) != '\\')) {
171 if (p - c > 0) {
172 /* Store a single key */
173 MAP_STORE_KEY;
174 func (data->cur_data, stripped_key, default_value);
175 msg_debug_map ("insert key only pair: %s -> %s; line: %d",
176 stripped_key, default_value, line_number);
177 g_free (key);
178 }
179
180 key = NULL;
181 data->state = map_read_comment_start;
182 }
183 else if (*p == '\r' || *p == '\n') {
184 if (p - c > 0) {
185 /* Store a single key */
186 MAP_STORE_KEY;
187 func (data->cur_data, stripped_key, default_value);
188 msg_debug_map ("insert key only pair: %s -> %s; line: %d",
189 stripped_key, default_value, line_number);
190 g_free (key);
191 }
192
193 data->state = map_read_eol;
194 key = NULL;
195 }
196 else if (g_ascii_isspace (*p)) {
197 if (p - c > 0) {
198 MAP_STORE_KEY;
199 data->state = map_skip_spaces_after_key;
200 }
201 else {
202 msg_err_map ("empty or invalid key found on line %d", line_number);
203 data->state = map_skip_comment;
204 }
205 }
206 else {
207 p++;
208 }
209 break;
210 case map_read_key_quoted:
211 if (*p == '\\') {
212 data->state = map_backslash_quoted;
213 p ++;
214 }
215 else if (*p == '"') {
216 /* Allow empty keys in this case */
217 if (p - c >= 0) {
218 MAP_STORE_KEY;
219 data->state = map_skip_spaces_after_key;
220 }
221 else {
222 g_assert_not_reached ();
223 }
224 p ++;
225 }
226 else {
227 p ++;
228 }
229 break;
230 case map_read_key_slashed:
231 if (*p == '\\') {
232 data->state = map_backslash_slashed;
233 p ++;
234 }
235 else if (*p == '/') {
236 /* Allow empty keys in this case */
237 if (p - c >= 0) {
238 data->state = map_read_key_after_slash;
239 }
240 else {
241 g_assert_not_reached ();
242 }
243 }
244 else {
245 p ++;
246 }
247 break;
248 case map_read_key_after_slash:
249 /*
250 * This state is equal to reading of key but '/' is not
251 * treated specially
252 */
253 if (*p == '#') {
254 if (p - c > 0) {
255 /* Store a single key */
256 MAP_STORE_KEY;
257 func (data->cur_data, stripped_key, default_value);
258 msg_debug_map ("insert key only pair: %s -> %s; line: %d",
259 stripped_key, default_value, line_number);
260 g_free (key);
261 key = NULL;
262 }
263
264 data->state = map_read_comment_start;
265 }
266 else if (*p == '\r' || *p == '\n') {
267 if (p - c > 0) {
268 /* Store a single key */
269 MAP_STORE_KEY;
270 func (data->cur_data, stripped_key, default_value);
271
272 msg_debug_map ("insert key only pair: %s -> %s; line: %d",
273 stripped_key, default_value, line_number);
274 g_free (key);
275 key = NULL;
276 }
277
278 data->state = map_read_eol;
279 key = NULL;
280 }
281 else if (g_ascii_isspace (*p)) {
282 if (p - c > 0) {
283 MAP_STORE_KEY;
284 data->state = map_skip_spaces_after_key;
285 }
286 else {
287 msg_err_map ("empty or invalid key found on line %d", line_number);
288 data->state = map_skip_comment;
289 }
290 }
291 else {
292 p ++;
293 }
294 break;
295 case map_backslash_quoted:
296 p ++;
297 data->state = map_read_key_quoted;
298 break;
299 case map_backslash_slashed:
300 p ++;
301 data->state = map_read_key_slashed;
302 break;
303 case map_skip_spaces_after_key:
304 if (*p == ' ' || *p == '\t') {
305 p ++;
306 }
307 else {
308 c = p;
309 data->state = map_read_value;
310 }
311 break;
312 case map_read_value:
313 if (key == NULL) {
314 /* Ignore line */
315 msg_err_map ("empty or invalid key found on line %d", line_number);
316 data->state = map_skip_comment;
317 }
318 else {
319 if (*p == '#') {
320 if (p - c > 0) {
321 /* Store a single key */
322 MAP_STORE_VALUE;
323 func (data->cur_data, stripped_key, stripped_value);
324 msg_debug_map ("insert key value pair: %s -> %s; line: %d",
325 stripped_key, stripped_value, line_number);
326 g_free (key);
327 g_free (value);
328 key = NULL;
329 value = NULL;
330 } else {
331 func (data->cur_data, stripped_key, default_value);
332 msg_debug_map ("insert key only pair: %s -> %s; line: %d",
333 stripped_key, default_value, line_number);
334 g_free (key);
335 key = NULL;
336 }
337
338 data->state = map_read_comment_start;
339 } else if (*p == '\r' || *p == '\n') {
340 if (p - c > 0) {
341 /* Store a single key */
342 MAP_STORE_VALUE;
343 func (data->cur_data, stripped_key, stripped_value);
344 msg_debug_map ("insert key value pair: %s -> %s",
345 stripped_key, stripped_value);
346 g_free (key);
347 g_free (value);
348 key = NULL;
349 value = NULL;
350 } else {
351 func (data->cur_data, stripped_key, default_value);
352 msg_debug_map ("insert key only pair: %s -> %s",
353 stripped_key, default_value);
354 g_free (key);
355 key = NULL;
356 }
357
358 data->state = map_read_eol;
359 key = NULL;
360 }
361 else {
362 p++;
363 }
364 }
365 break;
366 case map_read_comment_start:
367 if (*p == '#') {
368 data->state = map_skip_comment;
369 p ++;
370 key = NULL;
371 value = NULL;
372 }
373 else {
374 g_assert_not_reached ();
375 }
376 break;
377 case map_skip_comment:
378 if (*p == '\r' || *p == '\n') {
379 data->state = map_read_eol;
380 }
381 else {
382 p ++;
383 }
384 break;
385 case map_read_eol:
386 /* Skip \r\n and whitespaces */
387 if (*p == '\r' || *p == '\n') {
388 if (*p == '\n') {
389 /* We don't care about \r only line separators, they are too rare */
390 line_number ++;
391 }
392 p++;
393 }
394 else {
395 data->state = map_skip_spaces_before_key;
396 }
397 break;
398 default:
399 g_assert_not_reached ();
400 break;
401 }
402 }
403
404 if (final) {
405 /* Examine the state */
406 switch (data->state) {
407 case map_read_key:
408 if (p - c > 0) {
409 /* Store a single key */
410 MAP_STORE_KEY;
411 func (data->cur_data, stripped_key, default_value);
412 msg_debug_map ("insert key only pair: %s -> %s",
413 stripped_key, default_value);
414 g_free (key);
415 key = NULL;
416 }
417 break;
418 case map_read_value:
419 if (key == NULL) {
420 /* Ignore line */
421 msg_err_map ("empty or invalid key found on line %d", line_number);
422 data->state = map_skip_comment;
423 }
424 else {
425 if (p - c > 0) {
426 /* Store a single key */
427 MAP_STORE_VALUE;
428 func (data->cur_data, stripped_key, stripped_value);
429 msg_debug_map ("insert key value pair: %s -> %s",
430 stripped_key, stripped_value);
431 g_free (key);
432 g_free (value);
433 key = NULL;
434 value = NULL;
435 } else {
436 func (data->cur_data, stripped_key, default_value);
437 msg_debug_map ("insert key only pair: %s -> %s",
438 stripped_key, default_value);
439 g_free (key);
440 key = NULL;
441 }
442 }
443 break;
444 }
445
446 data->state = map_skip_spaces_before_key;
447 }
448
449 return c;
450 }
451
452 /**
453 * Radix tree helper function
454 */
455 void
rspamd_map_helper_insert_radix(gpointer st,gconstpointer key,gconstpointer value)456 rspamd_map_helper_insert_radix (gpointer st, gconstpointer key, gconstpointer value)
457 {
458 struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st;
459 struct rspamd_map_helper_value *val;
460 gsize vlen;
461 khiter_t k;
462 gconstpointer nk;
463 rspamd_ftok_t tok;
464 gint res;
465 struct rspamd_map *map;
466
467 map = r->map;
468 tok.begin = key;
469 tok.len = strlen (key);
470
471 k = kh_get (rspamd_map_hash, r->htb, tok);
472
473 if (k == kh_end (r->htb)) {
474 nk = rspamd_mempool_strdup (r->pool, key);
475 tok.begin = nk;
476 k = kh_put (rspamd_map_hash, r->htb, tok, &res);
477 }
478 else {
479 val = kh_value (r->htb, k);
480
481 if (strcmp (value, val->value) == 0) {
482 /* Same element, skip */
483 return;
484 }
485 else {
486 msg_warn_map ("duplicate radix entry found for map %s: %s (old value: '%s', new: '%s')",
487 map->name, key, val->value, value);
488 }
489
490 nk = kh_key (r->htb, k).begin;
491 val->key = nk;
492 kh_value (r->htb, k) = val;
493
494 return; /* do not touch radix in case of exact duplicate */
495 }
496
497 vlen = strlen (value);
498 val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) +
499 vlen + 1);
500 memcpy (val->value, value, vlen);
501
502 nk = kh_key (r->htb, k).begin;
503 val->key = nk;
504 kh_value (r->htb, k) = val;
505 rspamd_radix_add_iplist (key, ",", r->trie, val, FALSE,
506 r->map->name);
507 rspamd_cryptobox_fast_hash_update (&r->hst, nk, tok.len);
508 }
509
510 void
rspamd_map_helper_insert_radix_resolve(gpointer st,gconstpointer key,gconstpointer value)511 rspamd_map_helper_insert_radix_resolve (gpointer st, gconstpointer key, gconstpointer value)
512 {
513 struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st;
514 struct rspamd_map_helper_value *val;
515 gsize vlen;
516 khiter_t k;
517 gconstpointer nk;
518 rspamd_ftok_t tok;
519 gint res;
520 struct rspamd_map *map;
521
522 map = r->map;
523 tok.begin = key;
524 tok.len = strlen (key);
525
526 k = kh_get (rspamd_map_hash, r->htb, tok);
527
528 if (k == kh_end (r->htb)) {
529 nk = rspamd_mempool_strdup (r->pool, key);
530 tok.begin = nk;
531 k = kh_put (rspamd_map_hash, r->htb, tok, &res);
532 }
533 else {
534 val = kh_value (r->htb, k);
535
536 if (strcmp (value, val->value) == 0) {
537 /* Same element, skip */
538 return;
539 }
540 else {
541 msg_warn_map ("duplicate radix entry found for map %s: %s (old value: '%s', new: '%s')",
542 map->name, key, val->value, value);
543 }
544
545 nk = kh_key (r->htb, k).begin;
546 val->key = nk;
547 kh_value (r->htb, k) = val;
548
549 return; /* do not touch radix in case of exact duplicate */
550 }
551
552 vlen = strlen (value);
553 val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) +
554 vlen + 1);
555 memcpy (val->value, value, vlen);
556 nk = kh_key (r->htb, k).begin;
557 val->key = nk;
558 kh_value (r->htb, k) = val;
559 rspamd_radix_add_iplist (key, ",", r->trie, val, TRUE,
560 r->map->name);
561 rspamd_cryptobox_fast_hash_update (&r->hst, nk, tok.len);
562 }
563
564 void
rspamd_map_helper_insert_hash(gpointer st,gconstpointer key,gconstpointer value)565 rspamd_map_helper_insert_hash (gpointer st, gconstpointer key, gconstpointer value)
566 {
567 struct rspamd_hash_map_helper *ht = st;
568 struct rspamd_map_helper_value *val;
569 khiter_t k;
570 gconstpointer nk;
571 gsize vlen;
572 gint r;
573 rspamd_ftok_t tok;
574 struct rspamd_map *map;
575
576 tok.begin = key;
577 tok.len = strlen (key);
578 map = ht->map;
579
580 k = kh_get (rspamd_map_hash, ht->htb, tok);
581
582 if (k == kh_end (ht->htb)) {
583 nk = rspamd_mempool_strdup (ht->pool, key);
584 tok.begin = nk;
585 k = kh_put (rspamd_map_hash, ht->htb, tok, &r);
586 }
587 else {
588 val = kh_value (ht->htb, k);
589
590 if (strcmp (value, val->value) == 0) {
591 /* Same element, skip */
592 return;
593 }
594 else {
595 msg_warn_map ("duplicate hash entry found for map %s: %s (old value: '%s', new: '%s')",
596 map->name, key, val->value, value);
597 }
598 }
599
600 /* Null termination due to alloc0 */
601 vlen = strlen (value);
602 val = rspamd_mempool_alloc0 (ht->pool, sizeof (*val) + vlen + 1);
603 memcpy (val->value, value, vlen);
604
605 tok = kh_key (ht->htb, k);
606 nk = tok.begin;
607 val->key = nk;
608 kh_value (ht->htb, k) = val;
609
610 rspamd_cryptobox_fast_hash_update (&ht->hst, nk, tok.len);
611 }
612
613 void
rspamd_map_helper_insert_re(gpointer st,gconstpointer key,gconstpointer value)614 rspamd_map_helper_insert_re (gpointer st, gconstpointer key, gconstpointer value)
615 {
616 struct rspamd_regexp_map_helper *re_map = st;
617 struct rspamd_map *map;
618 rspamd_regexp_t *re;
619 gchar *escaped;
620 GError *err = NULL;
621 gint pcre_flags;
622 gsize escaped_len;
623 struct rspamd_map_helper_value *val;
624 khiter_t k;
625 rspamd_ftok_t tok;
626 gconstpointer nk;
627 gsize vlen;
628 gint r;
629
630 map = re_map->map;
631
632 tok.begin = key;
633 tok.len = strlen (key);
634
635 k = kh_get (rspamd_map_hash, re_map->htb, tok);
636
637 if (k == kh_end (re_map->htb)) {
638 nk = rspamd_mempool_strdup (re_map->pool, key);
639 tok.begin = nk;
640 k = kh_put (rspamd_map_hash, re_map->htb, tok, &r);
641 }
642 else {
643 val = kh_value (re_map->htb, k);
644
645 /* Always warn about regexp duplicate as it's likely a bad mistake */
646 msg_warn_map ("duplicate re entry found for map %s: %s (old value: '%s', new: '%s')",
647 map->name, key, val->value, value);
648
649 if (strcmp (val->value, value) == 0) {
650 /* Same value, skip */
651 return;
652 }
653
654 /* Replace value but do not touch regexp */
655 nk = kh_key (re_map->htb, k).begin;
656 val->key = nk;
657 kh_value (re_map->htb, k) = val;
658
659 return;
660 }
661
662 /* Check regexp stuff */
663 if (re_map->map_flags & RSPAMD_REGEXP_MAP_FLAG_GLOB) {
664 escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len,
665 RSPAMD_REGEXP_ESCAPE_GLOB|RSPAMD_REGEXP_ESCAPE_UTF);
666 re = rspamd_regexp_new (escaped, NULL, &err);
667 g_free (escaped);
668 }
669 else {
670 re = rspamd_regexp_new (key, NULL, &err);
671 }
672
673 if (re == NULL) {
674 msg_err_map ("cannot parse regexp %s: %e", key, err);
675
676 if (err) {
677 g_error_free (err);
678 }
679
680 return;
681 }
682
683 vlen = strlen (value);
684 val = rspamd_mempool_alloc0 (re_map->pool, sizeof (*val) +
685 vlen + 1);
686 memcpy (val->value, value, vlen); /* Null terminated due to alloc0 previously */
687 nk = kh_key (re_map->htb, k).begin;
688 val->key = nk;
689 kh_value (re_map->htb, k) = val;
690 rspamd_cryptobox_hash_update (&re_map->hst, nk, tok.len);
691
692 pcre_flags = rspamd_regexp_get_pcre_flags (re);
693
694 #ifndef WITH_PCRE2
695 if (pcre_flags & PCRE_FLAG(UTF8)) {
696 re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF;
697 }
698 #else
699 if (pcre_flags & PCRE_FLAG(UTF)) {
700 re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF;
701 }
702 #endif
703
704 g_ptr_array_add (re_map->regexps, re);
705 g_ptr_array_add (re_map->values, val);
706 }
707
708 static void
rspamd_map_helper_traverse_regexp(void * data,rspamd_map_traverse_cb cb,gpointer cbdata,gboolean reset_hits)709 rspamd_map_helper_traverse_regexp (void *data,
710 rspamd_map_traverse_cb cb,
711 gpointer cbdata,
712 gboolean reset_hits)
713 {
714 rspamd_ftok_t tok;
715 struct rspamd_map_helper_value *val;
716 struct rspamd_regexp_map_helper *re_map = data;
717
718 kh_foreach (re_map->htb, tok, val, {
719 if (!cb (tok.begin, val->value, val->hits, cbdata)) {
720 break;
721 }
722
723 if (reset_hits) {
724 val->hits = 0;
725 }
726 });
727 }
728
729 struct rspamd_hash_map_helper *
rspamd_map_helper_new_hash(struct rspamd_map * map)730 rspamd_map_helper_new_hash (struct rspamd_map *map)
731 {
732 struct rspamd_hash_map_helper *htb;
733 rspamd_mempool_t *pool;
734
735 if (map) {
736 pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
737 map->tag, 0);
738 }
739 else {
740 pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
741 NULL, 0);
742 }
743
744 htb = rspamd_mempool_alloc0_type(pool, struct rspamd_hash_map_helper);
745 htb->htb = kh_init (rspamd_map_hash);
746 htb->pool = pool;
747 htb->map = map;
748 rspamd_cryptobox_fast_hash_init (&htb->hst, map_hash_seed);
749
750 return htb;
751 }
752
753 void
rspamd_map_helper_destroy_hash(struct rspamd_hash_map_helper * r)754 rspamd_map_helper_destroy_hash (struct rspamd_hash_map_helper *r)
755 {
756 if (r == NULL || r->pool == NULL) {
757 return;
758 }
759
760 rspamd_mempool_t *pool = r->pool;
761 kh_destroy (rspamd_map_hash, r->htb);
762 memset (r, 0, sizeof (*r));
763 rspamd_mempool_delete (pool);
764 }
765
766 static void
rspamd_map_helper_traverse_hash(void * data,rspamd_map_traverse_cb cb,gpointer cbdata,gboolean reset_hits)767 rspamd_map_helper_traverse_hash (void *data,
768 rspamd_map_traverse_cb cb,
769 gpointer cbdata,
770 gboolean reset_hits)
771 {
772 rspamd_ftok_t tok;
773 struct rspamd_map_helper_value *val;
774 struct rspamd_hash_map_helper *ht = data;
775
776 kh_foreach (ht->htb, tok, val, {
777 if (!cb (tok.begin, val->value, val->hits, cbdata)) {
778 break;
779 }
780
781 if (reset_hits) {
782 val->hits = 0;
783 }
784 });
785 }
786
787 struct rspamd_radix_map_helper *
rspamd_map_helper_new_radix(struct rspamd_map * map)788 rspamd_map_helper_new_radix (struct rspamd_map *map)
789 {
790 struct rspamd_radix_map_helper *r;
791 rspamd_mempool_t *pool;
792 const gchar *name = "unnamed";
793
794 if (map) {
795 pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
796 map->tag, 0);
797 name = map->name;
798 }
799 else {
800 pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
801 NULL, 0);
802 }
803
804 r = rspamd_mempool_alloc0_type (pool, struct rspamd_radix_map_helper);
805 r->trie = radix_create_compressed_with_pool (pool, name);
806 r->htb = kh_init (rspamd_map_hash);
807 r->pool = pool;
808 r->map = map;
809 rspamd_cryptobox_fast_hash_init (&r->hst, map_hash_seed);
810
811 return r;
812 }
813
814 void
rspamd_map_helper_destroy_radix(struct rspamd_radix_map_helper * r)815 rspamd_map_helper_destroy_radix (struct rspamd_radix_map_helper *r)
816 {
817 if (r == NULL || !r->pool) {
818 return;
819 }
820
821 kh_destroy (rspamd_map_hash, r->htb);
822 rspamd_mempool_t *pool = r->pool;
823 memset (r, 0, sizeof (*r));
824 rspamd_mempool_delete (pool);
825 }
826
827 static void
rspamd_map_helper_traverse_radix(void * data,rspamd_map_traverse_cb cb,gpointer cbdata,gboolean reset_hits)828 rspamd_map_helper_traverse_radix (void *data,
829 rspamd_map_traverse_cb cb,
830 gpointer cbdata,
831 gboolean reset_hits)
832 {
833 rspamd_ftok_t tok;
834 struct rspamd_map_helper_value *val;
835 struct rspamd_radix_map_helper *r = data;
836
837 kh_foreach (r->htb, tok, val, {
838 if (!cb (tok.begin, val->value, val->hits, cbdata)) {
839 break;
840 }
841
842 if (reset_hits) {
843 val->hits = 0;
844 }
845 });
846 }
847
848 struct rspamd_regexp_map_helper *
rspamd_map_helper_new_regexp(struct rspamd_map * map,enum rspamd_regexp_map_flags flags)849 rspamd_map_helper_new_regexp (struct rspamd_map *map,
850 enum rspamd_regexp_map_flags flags)
851 {
852 struct rspamd_regexp_map_helper *re_map;
853 rspamd_mempool_t *pool;
854
855 pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
856 map->tag, 0);
857
858 re_map = rspamd_mempool_alloc0_type (pool, struct rspamd_regexp_map_helper);
859 re_map->pool = pool;
860 re_map->values = g_ptr_array_new ();
861 re_map->regexps = g_ptr_array_new ();
862 re_map->map = map;
863 re_map->map_flags = flags;
864 re_map->htb = kh_init (rspamd_map_hash);
865 rspamd_cryptobox_hash_init (&re_map->hst, NULL, 0);
866
867 return re_map;
868 }
869
870
871 void
rspamd_map_helper_destroy_regexp(struct rspamd_regexp_map_helper * re_map)872 rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map)
873 {
874 rspamd_regexp_t *re;
875 guint i;
876
877 if (!re_map || !re_map->regexps) {
878 return;
879 }
880
881 #ifdef WITH_HYPERSCAN
882 if (re_map->hs_scratch) {
883 hs_free_scratch (re_map->hs_scratch);
884 }
885 if (re_map->hs_db) {
886 hs_free_database (re_map->hs_db);
887 }
888 if (re_map->patterns) {
889 for (i = 0; i < re_map->regexps->len; i ++) {
890 g_free (re_map->patterns[i]);
891 }
892
893 g_free (re_map->patterns);
894 }
895 if (re_map->flags) {
896 g_free (re_map->flags);
897 }
898 if (re_map->ids) {
899 g_free (re_map->ids);
900 }
901 #endif
902
903 for (i = 0; i < re_map->regexps->len; i ++) {
904 re = g_ptr_array_index (re_map->regexps, i);
905 rspamd_regexp_unref (re);
906 }
907
908 g_ptr_array_free (re_map->regexps, TRUE);
909 g_ptr_array_free (re_map->values, TRUE);
910 kh_destroy (rspamd_map_hash, re_map->htb);
911
912 rspamd_mempool_t *pool = re_map->pool;
913 memset (re_map, 0, sizeof (*re_map));
914 rspamd_mempool_delete (pool);
915 }
916
917 gchar *
rspamd_kv_list_read(gchar * chunk,gint len,struct map_cb_data * data,gboolean final)918 rspamd_kv_list_read (
919 gchar * chunk,
920 gint len,
921 struct map_cb_data *data,
922 gboolean final)
923 {
924 if (data->cur_data == NULL) {
925 data->cur_data = rspamd_map_helper_new_hash (data->map);
926 }
927
928 return rspamd_parse_kv_list (
929 chunk,
930 len,
931 data,
932 rspamd_map_helper_insert_hash,
933 "",
934 final);
935 }
936
937 void
rspamd_kv_list_fin(struct map_cb_data * data,void ** target)938 rspamd_kv_list_fin (struct map_cb_data *data, void **target)
939 {
940 struct rspamd_map *map = data->map;
941 struct rspamd_hash_map_helper *htb;
942
943 if (data->cur_data) {
944 htb = (struct rspamd_hash_map_helper *)data->cur_data;
945 msg_info_map ("read hash of %d elements from %s", kh_size (htb->htb),
946 map->name);
947 data->map->traverse_function = rspamd_map_helper_traverse_hash;
948 data->map->nelts = kh_size (htb->htb);
949 data->map->digest = rspamd_cryptobox_fast_hash_final (&htb->hst);
950 }
951
952 if (target) {
953 *target = data->cur_data;
954 }
955
956 if (data->prev_data) {
957 htb = (struct rspamd_hash_map_helper *)data->prev_data;
958 rspamd_map_helper_destroy_hash (htb);
959 }
960 }
961
962 void
rspamd_kv_list_dtor(struct map_cb_data * data)963 rspamd_kv_list_dtor (struct map_cb_data *data)
964 {
965 struct rspamd_hash_map_helper *htb;
966
967 if (data->cur_data) {
968 htb = (struct rspamd_hash_map_helper *)data->cur_data;
969 rspamd_map_helper_destroy_hash (htb);
970 }
971 }
972
973 gchar *
rspamd_radix_read(gchar * chunk,gint len,struct map_cb_data * data,gboolean final)974 rspamd_radix_read (
975 gchar * chunk,
976 gint len,
977 struct map_cb_data *data,
978 gboolean final)
979 {
980 struct rspamd_radix_map_helper *r;
981 struct rspamd_map *map = data->map;
982
983 if (data->cur_data == NULL) {
984 r = rspamd_map_helper_new_radix (map);
985 data->cur_data = r;
986 }
987
988 return rspamd_parse_kv_list (
989 chunk,
990 len,
991 data,
992 rspamd_map_helper_insert_radix,
993 hash_fill,
994 final);
995 }
996
997 void
rspamd_radix_fin(struct map_cb_data * data,void ** target)998 rspamd_radix_fin (struct map_cb_data *data, void **target)
999 {
1000 struct rspamd_map *map = data->map;
1001 struct rspamd_radix_map_helper *r;
1002
1003 if (data->cur_data) {
1004 r = (struct rspamd_radix_map_helper *)data->cur_data;
1005 msg_info_map ("read radix trie of %z elements: %s",
1006 radix_get_size (r->trie), radix_get_info (r->trie));
1007 data->map->traverse_function = rspamd_map_helper_traverse_radix;
1008 data->map->nelts = kh_size (r->htb);
1009 data->map->digest = rspamd_cryptobox_fast_hash_final (&r->hst);
1010 }
1011
1012 if (target) {
1013 *target = data->cur_data;
1014 }
1015
1016 if (data->prev_data) {
1017 r = (struct rspamd_radix_map_helper *)data->prev_data;
1018 rspamd_map_helper_destroy_radix (r);
1019 }
1020 }
1021
1022 void
rspamd_radix_dtor(struct map_cb_data * data)1023 rspamd_radix_dtor (struct map_cb_data *data)
1024 {
1025 struct rspamd_radix_map_helper *r;
1026
1027 if (data->cur_data) {
1028 r = (struct rspamd_radix_map_helper *)data->cur_data;
1029 rspamd_map_helper_destroy_radix (r);
1030 }
1031 }
1032
1033 #ifdef WITH_HYPERSCAN
1034 struct rspamd_re_maps_cache_dtor_cbdata {
1035 struct rspamd_config *cfg;
1036 GHashTable *valid_re_hashes;
1037 gchar *dirname;
1038 };
1039
1040 static void
rspamd_re_maps_cache_cleanup_dtor(gpointer ud)1041 rspamd_re_maps_cache_cleanup_dtor (gpointer ud)
1042 {
1043 struct rspamd_re_maps_cache_dtor_cbdata *cbd =
1044 (struct rspamd_re_maps_cache_dtor_cbdata *)ud;
1045 GPtrArray *cache_files;
1046 GError *err = NULL;
1047 struct rspamd_config *cfg;
1048
1049 cfg = cbd->cfg;
1050
1051 if (cfg->cur_worker != NULL) {
1052 /* Skip dtor, limit it to main process only */
1053 return;
1054 }
1055
1056 cache_files = rspamd_glob_path (cbd->dirname, "*.hsmc", FALSE, &err);
1057
1058 if (!cache_files) {
1059 msg_err_config ("cannot glob files in %s: %e", cbd->dirname, err);
1060 g_error_free (err);
1061 }
1062 else {
1063 const gchar *fname;
1064 guint i;
1065
1066 PTR_ARRAY_FOREACH (cache_files, i, fname) {
1067 gchar *basename = g_path_get_basename (fname);
1068
1069 if (g_hash_table_lookup (cbd->valid_re_hashes, basename) == NULL) {
1070 gchar *dir;
1071
1072 dir = g_path_get_dirname (fname);
1073
1074 /* Sanity check to avoid removal of something bad */
1075 if (strcmp (dir, cbd->dirname) != 0) {
1076 msg_err_config ("bogus file found: %s in %s, skip deleting",
1077 fname, dir);
1078 }
1079 else {
1080 if (unlink (fname) == -1) {
1081 msg_err_config ("cannot delete obsolete file %s in %s: %s",
1082 fname, dir, strerror (errno));
1083 }
1084 else {
1085 msg_info_config ("deleted obsolete file %s in %s",
1086 fname, dir);
1087 }
1088 }
1089
1090 g_free (dir);
1091 }
1092 else {
1093 msg_debug_config ("valid re cache file %s", fname);
1094 }
1095
1096 g_free (basename);
1097 }
1098
1099 g_ptr_array_free (cache_files, TRUE);
1100 }
1101
1102 g_hash_table_unref (cbd->valid_re_hashes);
1103 g_free (cbd->dirname);
1104 }
1105
1106 static void
rspamd_re_map_cache_update(const gchar * fname,struct rspamd_config * cfg)1107 rspamd_re_map_cache_update (const gchar *fname, struct rspamd_config *cfg)
1108 {
1109 GHashTable *valid_re_hashes;
1110
1111 valid_re_hashes = rspamd_mempool_get_variable (cfg->cfg_pool,
1112 RSPAMD_MEMPOOL_RE_MAPS_CACHE);
1113
1114 if (!valid_re_hashes) {
1115 valid_re_hashes = g_hash_table_new_full (g_str_hash, g_str_equal,
1116 g_free, NULL);
1117 rspamd_mempool_set_variable (cfg->cfg_pool,
1118 RSPAMD_MEMPOOL_RE_MAPS_CACHE,
1119 valid_re_hashes, (rspamd_mempool_destruct_t)g_hash_table_unref);
1120
1121 /* We also add a cleanup dtor for all hashes */
1122 static struct rspamd_re_maps_cache_dtor_cbdata cbd;
1123
1124 cbd.valid_re_hashes = g_hash_table_ref (valid_re_hashes);
1125 cbd.cfg = cfg;
1126 cbd.dirname = g_path_get_dirname (fname);
1127 rspamd_mempool_add_destructor (cfg->cfg_pool,
1128 rspamd_re_maps_cache_cleanup_dtor, &cbd);
1129 }
1130
1131 g_hash_table_insert (valid_re_hashes, g_path_get_basename (fname), "1");
1132 }
1133
1134 static gboolean
rspamd_try_load_re_map_cache(struct rspamd_regexp_map_helper * re_map)1135 rspamd_try_load_re_map_cache (struct rspamd_regexp_map_helper *re_map)
1136 {
1137 gchar fp[PATH_MAX];
1138 gpointer data;
1139 gsize len;
1140 struct rspamd_map *map;
1141
1142 map = re_map->map;
1143
1144 if (!map->cfg->hs_cache_dir) {
1145 return FALSE;
1146 }
1147
1148 rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc",
1149 map->cfg->hs_cache_dir,
1150 (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
1151
1152 if ((data = rspamd_file_xmap (fp, PROT_READ, &len, TRUE)) != NULL) {
1153 if (hs_deserialize_database (data, len, &re_map->hs_db) == HS_SUCCESS) {
1154 rspamd_re_map_cache_update (fp, map->cfg);
1155 munmap (data, len);
1156
1157 msg_info_map ("loaded hypersan cache from %s (%Hz length) for %s",
1158 fp, len, map->name);
1159
1160 return TRUE;
1161 }
1162
1163 msg_info_map ("invalid hypersan cache in %s (%Hz length) for %s, removing file",
1164 fp, len, map->name);
1165 munmap (data, len);
1166 /* Remove stale file */
1167 (void)unlink (fp);
1168 }
1169
1170 return FALSE;
1171 }
1172
1173 static gboolean
rspamd_try_save_re_map_cache(struct rspamd_regexp_map_helper * re_map)1174 rspamd_try_save_re_map_cache (struct rspamd_regexp_map_helper *re_map)
1175 {
1176 gchar fp[PATH_MAX], np[PATH_MAX];
1177 gsize len;
1178 gint fd;
1179 char *bytes = NULL;
1180 struct rspamd_map *map;
1181
1182 map = re_map->map;
1183
1184 if (!map->cfg->hs_cache_dir) {
1185 return FALSE;
1186 }
1187
1188 rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc.tmp",
1189 re_map->map->cfg->hs_cache_dir,
1190 (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
1191
1192 if ((fd = rspamd_file_xopen (fp, O_WRONLY | O_CREAT | O_EXCL, 00644, 0)) != -1) {
1193 if (hs_serialize_database (re_map->hs_db, &bytes, &len) == HS_SUCCESS) {
1194 if (write (fd, bytes, len) == -1) {
1195 msg_warn_map ("cannot write hyperscan cache to %s: %s",
1196 fp, strerror (errno));
1197 unlink (fp);
1198 free (bytes);
1199 }
1200 else {
1201 free (bytes);
1202 fsync (fd);
1203
1204 rspamd_snprintf (np, sizeof (np), "%s/%*xs.hsmc",
1205 re_map->map->cfg->hs_cache_dir,
1206 (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
1207
1208 if (rename (fp, np) == -1) {
1209 msg_warn_map ("cannot rename hyperscan cache from %s to %s: %s",
1210 fp, np, strerror (errno));
1211 unlink (fp);
1212 }
1213 else {
1214 msg_info_map ("written cached hyperscan data for %s to %s (%Hz length)",
1215 map->name, np, len);
1216
1217 rspamd_re_map_cache_update (np, map->cfg);
1218 }
1219 }
1220 }
1221 else {
1222 msg_warn_map ("cannot serialize hyperscan cache to %s: %s",
1223 fp, strerror (errno));
1224 unlink (fp);
1225 }
1226
1227
1228 close (fd);
1229 }
1230
1231 return FALSE;
1232 }
1233
1234 static gboolean
rspamd_re_map_cache_cleanup_old(struct rspamd_regexp_map_helper * old_re_map)1235 rspamd_re_map_cache_cleanup_old (struct rspamd_regexp_map_helper *old_re_map)
1236 {
1237 gchar fp[PATH_MAX];
1238 struct rspamd_map *map;
1239
1240 map = old_re_map->map;
1241
1242 if (!map->cfg->hs_cache_dir) {
1243 return FALSE;
1244 }
1245
1246 rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc",
1247 map->cfg->hs_cache_dir,
1248 (gint)rspamd_cryptobox_HASHBYTES / 2, old_re_map->re_digest);
1249
1250 msg_info_map ("unlink stale cache file for %s: %s", map->name, fp);
1251
1252 if (unlink (fp) == -1) {
1253 msg_warn_map ("cannot unlink stale cache file for %s (%s): %s",
1254 map->name, fp, strerror (errno));
1255 return FALSE;
1256 }
1257
1258 GHashTable *valid_re_hashes;
1259
1260 valid_re_hashes = rspamd_mempool_get_variable (map->cfg->cfg_pool,
1261 RSPAMD_MEMPOOL_RE_MAPS_CACHE);
1262
1263 if (valid_re_hashes) {
1264 g_hash_table_remove (valid_re_hashes, fp);
1265 }
1266
1267 return TRUE;
1268 }
1269
1270 #endif
1271
1272 static void
rspamd_re_map_finalize(struct rspamd_regexp_map_helper * re_map)1273 rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
1274 {
1275 #ifdef WITH_HYPERSCAN
1276 guint i;
1277 hs_platform_info_t plt;
1278 hs_compile_error_t *err;
1279 struct rspamd_map *map;
1280 rspamd_regexp_t *re;
1281 gint pcre_flags;
1282
1283 map = re_map->map;
1284
1285 #ifndef __aarch64__
1286 if (!(map->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
1287 msg_info_map ("disable hyperscan for map %s, ssse3 instructons are not supported by CPU",
1288 map->name);
1289 return;
1290 }
1291 #endif
1292
1293 if (hs_populate_platform (&plt) != HS_SUCCESS) {
1294 msg_err_map ("cannot populate hyperscan platform");
1295 return;
1296 }
1297
1298 re_map->patterns = g_new (gchar *, re_map->regexps->len);
1299 re_map->flags = g_new (gint, re_map->regexps->len);
1300 re_map->ids = g_new (gint, re_map->regexps->len);
1301
1302 for (i = 0; i < re_map->regexps->len; i ++) {
1303 const gchar *pat;
1304 gchar *escaped;
1305 gint pat_flags;
1306
1307 re = g_ptr_array_index (re_map->regexps, i);
1308 pcre_flags = rspamd_regexp_get_pcre_flags (re);
1309 pat = rspamd_regexp_get_pattern (re);
1310 pat_flags = rspamd_regexp_get_flags (re);
1311
1312 if (pat_flags & RSPAMD_REGEXP_FLAG_UTF) {
1313 escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL,
1314 RSPAMD_REGEXP_ESCAPE_RE|RSPAMD_REGEXP_ESCAPE_UTF);
1315 re_map->flags[i] |= HS_FLAG_UTF8;
1316 }
1317 else {
1318 escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL,
1319 RSPAMD_REGEXP_ESCAPE_RE);
1320 }
1321
1322 re_map->patterns[i] = escaped;
1323 re_map->flags[i] = HS_FLAG_SINGLEMATCH;
1324
1325 #ifndef WITH_PCRE2
1326 if (pcre_flags & PCRE_FLAG(UTF8)) {
1327 re_map->flags[i] |= HS_FLAG_UTF8;
1328 }
1329 #else
1330 if (pcre_flags & PCRE_FLAG(UTF)) {
1331 re_map->flags[i] |= HS_FLAG_UTF8;
1332 }
1333 #endif
1334 if (pcre_flags & PCRE_FLAG(CASELESS)) {
1335 re_map->flags[i] |= HS_FLAG_CASELESS;
1336 }
1337 if (pcre_flags & PCRE_FLAG(MULTILINE)) {
1338 re_map->flags[i] |= HS_FLAG_MULTILINE;
1339 }
1340 if (pcre_flags & PCRE_FLAG(DOTALL)) {
1341 re_map->flags[i] |= HS_FLAG_DOTALL;
1342 }
1343 if (rspamd_regexp_get_maxhits (re) == 1) {
1344 re_map->flags[i] |= HS_FLAG_SINGLEMATCH;
1345 }
1346
1347 re_map->ids[i] = i;
1348 }
1349
1350 if (re_map->regexps->len > 0 && re_map->patterns) {
1351
1352 if (!rspamd_try_load_re_map_cache (re_map)) {
1353 gdouble ts1 = rspamd_get_ticks (FALSE);
1354
1355 if (hs_compile_multi ((const gchar **) re_map->patterns,
1356 re_map->flags,
1357 re_map->ids,
1358 re_map->regexps->len,
1359 HS_MODE_BLOCK,
1360 &plt,
1361 &re_map->hs_db,
1362 &err) != HS_SUCCESS) {
1363
1364 msg_err_map ("cannot create tree of regexp when processing '%s': %s",
1365 err->expression >= 0 ?
1366 re_map->patterns[err->expression] :
1367 "unknown regexp", err->message);
1368 re_map->hs_db = NULL;
1369 hs_free_compile_error (err);
1370
1371 return;
1372 }
1373
1374 ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
1375 msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
1376 re_map->regexps->len, re_map->map->name, ts1);
1377 rspamd_try_save_re_map_cache (re_map);
1378 }
1379 else {
1380 msg_info_map ("hyperscan read %d cached regular expressions from %s",
1381 re_map->regexps->len, re_map->map->name);
1382 }
1383
1384 if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) {
1385 msg_err_map ("cannot allocate scratch space for hyperscan");
1386 hs_free_database (re_map->hs_db);
1387 re_map->hs_db = NULL;
1388 }
1389 }
1390 else {
1391 msg_err_map ("regexp map is empty");
1392 }
1393 #endif
1394 }
1395
1396 gchar *
rspamd_regexp_list_read_single(gchar * chunk,gint len,struct map_cb_data * data,gboolean final)1397 rspamd_regexp_list_read_single (
1398 gchar *chunk,
1399 gint len,
1400 struct map_cb_data *data,
1401 gboolean final)
1402 {
1403 struct rspamd_regexp_map_helper *re_map;
1404
1405 if (data->cur_data == NULL) {
1406 re_map = rspamd_map_helper_new_regexp (data->map, 0);
1407 data->cur_data = re_map;
1408 }
1409
1410 return rspamd_parse_kv_list (
1411 chunk,
1412 len,
1413 data,
1414 rspamd_map_helper_insert_re,
1415 hash_fill,
1416 final);
1417 }
1418
1419 gchar *
rspamd_glob_list_read_single(gchar * chunk,gint len,struct map_cb_data * data,gboolean final)1420 rspamd_glob_list_read_single (
1421 gchar *chunk,
1422 gint len,
1423 struct map_cb_data *data,
1424 gboolean final)
1425 {
1426 struct rspamd_regexp_map_helper *re_map;
1427
1428 if (data->cur_data == NULL) {
1429 re_map = rspamd_map_helper_new_regexp (data->map, RSPAMD_REGEXP_MAP_FLAG_GLOB);
1430 data->cur_data = re_map;
1431 }
1432
1433 return rspamd_parse_kv_list (
1434 chunk,
1435 len,
1436 data,
1437 rspamd_map_helper_insert_re,
1438 hash_fill,
1439 final);
1440 }
1441
1442 gchar *
rspamd_regexp_list_read_multiple(gchar * chunk,gint len,struct map_cb_data * data,gboolean final)1443 rspamd_regexp_list_read_multiple (
1444 gchar *chunk,
1445 gint len,
1446 struct map_cb_data *data,
1447 gboolean final)
1448 {
1449 struct rspamd_regexp_map_helper *re_map;
1450
1451 if (data->cur_data == NULL) {
1452 re_map = rspamd_map_helper_new_regexp (data->map,
1453 RSPAMD_REGEXP_MAP_FLAG_MULTIPLE);
1454 data->cur_data = re_map;
1455 }
1456
1457 return rspamd_parse_kv_list (
1458 chunk,
1459 len,
1460 data,
1461 rspamd_map_helper_insert_re,
1462 hash_fill,
1463 final);
1464 }
1465
1466 gchar *
rspamd_glob_list_read_multiple(gchar * chunk,gint len,struct map_cb_data * data,gboolean final)1467 rspamd_glob_list_read_multiple (
1468 gchar *chunk,
1469 gint len,
1470 struct map_cb_data *data,
1471 gboolean final)
1472 {
1473 struct rspamd_regexp_map_helper *re_map;
1474
1475 if (data->cur_data == NULL) {
1476 re_map = rspamd_map_helper_new_regexp (data->map,
1477 RSPAMD_REGEXP_MAP_FLAG_GLOB|RSPAMD_REGEXP_MAP_FLAG_MULTIPLE);
1478 data->cur_data = re_map;
1479 }
1480
1481 return rspamd_parse_kv_list (
1482 chunk,
1483 len,
1484 data,
1485 rspamd_map_helper_insert_re,
1486 hash_fill,
1487 final);
1488 }
1489
1490
1491 void
rspamd_regexp_list_fin(struct map_cb_data * data,void ** target)1492 rspamd_regexp_list_fin (struct map_cb_data *data, void **target)
1493 {
1494 struct rspamd_regexp_map_helper *re_map = NULL, *old_re_map;
1495 struct rspamd_map *map = data->map;
1496
1497 if (data->cur_data) {
1498 re_map = data->cur_data;
1499 rspamd_cryptobox_hash_final (&re_map->hst, re_map->re_digest);
1500 memcpy (&data->map->digest, re_map->re_digest, sizeof (data->map->digest));
1501 rspamd_re_map_finalize (re_map);
1502 msg_info_map ("read regexp list of %ud elements",
1503 re_map->regexps->len);
1504 data->map->traverse_function = rspamd_map_helper_traverse_regexp;
1505 data->map->nelts = kh_size (re_map->htb);
1506 }
1507
1508 if (target) {
1509 *target = data->cur_data;
1510 }
1511
1512 if (data->prev_data) {
1513 old_re_map = data->prev_data;
1514
1515 #ifdef WITH_HYPERSCAN
1516 if (re_map && memcmp (re_map->re_digest, old_re_map->re_digest,
1517 sizeof (re_map->re_digest)) != 0) {
1518 /* Cleanup old stuff */
1519 rspamd_re_map_cache_cleanup_old (old_re_map);
1520 }
1521 #endif
1522
1523 rspamd_map_helper_destroy_regexp (old_re_map);
1524 }
1525 }
1526 void
rspamd_regexp_list_dtor(struct map_cb_data * data)1527 rspamd_regexp_list_dtor (struct map_cb_data *data)
1528 {
1529 if (data->cur_data) {
1530 rspamd_map_helper_destroy_regexp (data->cur_data);
1531 }
1532 }
1533
1534 #ifdef WITH_HYPERSCAN
1535 static int
rspamd_match_hs_single_handler(unsigned int id,unsigned long long from,unsigned long long to,unsigned int flags,void * context)1536 rspamd_match_hs_single_handler (unsigned int id, unsigned long long from,
1537 unsigned long long to,
1538 unsigned int flags, void *context)
1539 {
1540 guint *i = context;
1541 /* Always return non-zero as we need a single match here */
1542
1543 *i = id;
1544
1545 return 1;
1546 }
1547 #endif
1548
1549 gconstpointer
rspamd_match_regexp_map_single(struct rspamd_regexp_map_helper * map,const gchar * in,gsize len)1550 rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
1551 const gchar *in, gsize len)
1552 {
1553 guint i;
1554 rspamd_regexp_t *re;
1555 gint res = 0;
1556 gpointer ret = NULL;
1557 struct rspamd_map_helper_value *val;
1558 gboolean validated = FALSE;
1559
1560 g_assert (in != NULL);
1561
1562 if (map == NULL || len == 0 || map->regexps == NULL) {
1563 return NULL;
1564 }
1565
1566 if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
1567 if (rspamd_fast_utf8_validate (in, len) == 0) {
1568 validated = TRUE;
1569 }
1570 }
1571 else {
1572 validated = TRUE;
1573 }
1574
1575 #ifdef WITH_HYPERSCAN
1576 if (map->hs_db && map->hs_scratch) {
1577
1578 if (validated) {
1579
1580 res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
1581 rspamd_match_hs_single_handler, (void *)&i);
1582
1583 if (res == HS_SCAN_TERMINATED) {
1584 res = 1;
1585 val = g_ptr_array_index (map->values, i);
1586
1587 ret = val->value;
1588 val->hits ++;
1589 }
1590
1591 return ret;
1592 }
1593 }
1594 #endif
1595
1596 if (!res) {
1597 /* PCRE version */
1598 for (i = 0; i < map->regexps->len; i ++) {
1599 re = g_ptr_array_index (map->regexps, i);
1600
1601 if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) {
1602 val = g_ptr_array_index (map->values, i);
1603
1604 ret = val->value;
1605 val->hits ++;
1606 break;
1607 }
1608 }
1609 }
1610
1611 return ret;
1612 }
1613
1614 #ifdef WITH_HYPERSCAN
1615 struct rspamd_multiple_cbdata {
1616 GPtrArray *ar;
1617 struct rspamd_regexp_map_helper *map;
1618 };
1619
1620 static int
rspamd_match_hs_multiple_handler(unsigned int id,unsigned long long from,unsigned long long to,unsigned int flags,void * context)1621 rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from,
1622 unsigned long long to,
1623 unsigned int flags, void *context)
1624 {
1625 struct rspamd_multiple_cbdata *cbd = context;
1626 struct rspamd_map_helper_value *val;
1627
1628
1629 if (id < cbd->map->values->len) {
1630 val = g_ptr_array_index (cbd->map->values, id);
1631 val->hits ++;
1632 g_ptr_array_add (cbd->ar, val->value);
1633 }
1634
1635 /* Always return zero as we need all matches here */
1636 return 0;
1637 }
1638 #endif
1639
1640 GPtrArray*
rspamd_match_regexp_map_all(struct rspamd_regexp_map_helper * map,const gchar * in,gsize len)1641 rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
1642 const gchar *in, gsize len)
1643 {
1644 guint i;
1645 rspamd_regexp_t *re;
1646 GPtrArray *ret;
1647 gint res = 0;
1648 gboolean validated = FALSE;
1649 struct rspamd_map_helper_value *val;
1650
1651 if (map == NULL || map->regexps == NULL || len == 0) {
1652 return NULL;
1653 }
1654
1655 g_assert (in != NULL);
1656
1657 if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
1658 if (rspamd_fast_utf8_validate (in, len) == 0) {
1659 validated = TRUE;
1660 }
1661 }
1662 else {
1663 validated = TRUE;
1664 }
1665
1666 ret = g_ptr_array_new ();
1667
1668 #ifdef WITH_HYPERSCAN
1669 if (map->hs_db && map->hs_scratch) {
1670
1671 if (validated) {
1672 struct rspamd_multiple_cbdata cbd;
1673
1674 cbd.ar = ret;
1675 cbd.map = map;
1676
1677 if (hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
1678 rspamd_match_hs_multiple_handler, &cbd) == HS_SUCCESS) {
1679 res = 1;
1680 }
1681 }
1682 }
1683 #endif
1684
1685 if (!res) {
1686 /* PCRE version */
1687 for (i = 0; i < map->regexps->len; i ++) {
1688 re = g_ptr_array_index (map->regexps, i);
1689
1690 if (rspamd_regexp_search (re, in, len, NULL, NULL,
1691 !validated, NULL)) {
1692 val = g_ptr_array_index (map->values, i);
1693 val->hits ++;
1694 g_ptr_array_add (ret, val->value);
1695 }
1696 }
1697 }
1698
1699 if (ret->len > 0) {
1700 return ret;
1701 }
1702
1703 g_ptr_array_free (ret, TRUE);
1704
1705 return NULL;
1706 }
1707
1708 gconstpointer
rspamd_match_hash_map(struct rspamd_hash_map_helper * map,const gchar * in,gsize len)1709 rspamd_match_hash_map (struct rspamd_hash_map_helper *map, const gchar *in,
1710 gsize len)
1711 {
1712 khiter_t k;
1713 struct rspamd_map_helper_value *val;
1714 rspamd_ftok_t tok;
1715
1716 if (map == NULL || map->htb == NULL) {
1717 return NULL;
1718 }
1719
1720 tok.begin = in;
1721 tok.len = len;
1722
1723 k = kh_get (rspamd_map_hash, map->htb, tok);
1724
1725 if (k != kh_end (map->htb)) {
1726 val = kh_value (map->htb, k);
1727 val->hits ++;
1728
1729 return val->value;
1730 }
1731
1732 return NULL;
1733 }
1734
1735 gconstpointer
rspamd_match_radix_map(struct rspamd_radix_map_helper * map,const guchar * in,gsize inlen)1736 rspamd_match_radix_map (struct rspamd_radix_map_helper *map,
1737 const guchar *in, gsize inlen)
1738 {
1739 struct rspamd_map_helper_value *val;
1740
1741 if (map == NULL || map->trie == NULL) {
1742 return NULL;
1743 }
1744
1745 val = (struct rspamd_map_helper_value *)radix_find_compressed (map->trie,
1746 in, inlen);
1747
1748 if (val != (gconstpointer)RADIX_NO_VALUE) {
1749 val->hits ++;
1750
1751 return val->value;
1752 }
1753
1754 return NULL;
1755 }
1756
1757 gconstpointer
rspamd_match_radix_map_addr(struct rspamd_radix_map_helper * map,const rspamd_inet_addr_t * addr)1758 rspamd_match_radix_map_addr (struct rspamd_radix_map_helper *map,
1759 const rspamd_inet_addr_t *addr)
1760 {
1761 struct rspamd_map_helper_value *val;
1762
1763 if (map == NULL || map->trie == NULL) {
1764 return NULL;
1765 }
1766
1767 val = (struct rspamd_map_helper_value *)radix_find_compressed_addr (map->trie, addr);
1768
1769 if (val != (gconstpointer)RADIX_NO_VALUE) {
1770 val->hits ++;
1771
1772 return val->value;
1773 }
1774
1775 return NULL;
1776 }
1777
1778
1779 /*
1780 * CBD stuff
1781 */
1782
1783 struct rspamd_cdb_map_helper *
rspamd_map_helper_new_cdb(struct rspamd_map * map)1784 rspamd_map_helper_new_cdb (struct rspamd_map *map)
1785 {
1786 struct rspamd_cdb_map_helper *n;
1787
1788 n = g_malloc0 (sizeof (*n));
1789 n->cdbs = (GQueue)G_QUEUE_INIT;
1790 n->map = map;
1791
1792 rspamd_cryptobox_fast_hash_init (&n->hst, map_hash_seed);
1793
1794 return n;
1795 }
1796
1797 void
rspamd_map_helper_destroy_cdb(struct rspamd_cdb_map_helper * c)1798 rspamd_map_helper_destroy_cdb (struct rspamd_cdb_map_helper *c)
1799 {
1800 if (c == NULL) {
1801 return;
1802 }
1803
1804 GList *cur = c->cdbs.head;
1805
1806 while (cur) {
1807 struct cdb *cdb = (struct cdb *)cur->data;
1808
1809 cdb_free (cdb);
1810 g_free (cdb->filename);
1811 close (cdb->cdb_fd);
1812 g_free (cdb);
1813
1814 cur = g_list_next (cur);
1815 }
1816
1817 g_queue_clear (&c->cdbs);
1818
1819 g_free (c);
1820 }
1821
1822 gchar *
rspamd_cdb_list_read(gchar * chunk,gint len,struct map_cb_data * data,gboolean final)1823 rspamd_cdb_list_read (gchar *chunk,
1824 gint len,
1825 struct map_cb_data *data,
1826 gboolean final)
1827 {
1828 struct rspamd_cdb_map_helper *cdb_data;
1829 struct cdb *found = NULL;
1830 struct rspamd_map *map = data->map;
1831
1832 g_assert (map->no_file_read);
1833
1834 if (data->cur_data == NULL) {
1835 cdb_data = rspamd_map_helper_new_cdb (data->map);
1836 data->cur_data = cdb_data;
1837 }
1838 else {
1839 cdb_data = (struct rspamd_cdb_map_helper *)data->cur_data;
1840 }
1841
1842 GList *cur = cdb_data->cdbs.head;
1843
1844 while (cur) {
1845 struct cdb *elt = (struct cdb *)cur->data;
1846
1847 if (strcmp (elt->filename, chunk) == 0) {
1848 found = elt;
1849 break;
1850 }
1851
1852 cur = g_list_next (cur);
1853 }
1854
1855 if (found == NULL) {
1856 /* New cdb */
1857 gint fd;
1858 struct cdb *cdb;
1859
1860 fd = rspamd_file_xopen (chunk, O_RDONLY, 0, TRUE);
1861
1862 if (fd == -1) {
1863 msg_err_map ("cannot open cdb map from %s: %s", chunk, strerror (errno));
1864
1865 return NULL;
1866 }
1867
1868 cdb = g_malloc0 (sizeof (struct cdb));
1869
1870 if (cdb_init (cdb, fd) == -1) {
1871 g_free (cdb);
1872 msg_err_map ("cannot init cdb map from %s: %s", chunk, strerror (errno));
1873
1874 return NULL;
1875 }
1876
1877 cdb->filename = g_strdup (chunk);
1878 g_queue_push_tail (&cdb_data->cdbs, cdb);
1879 cdb_data->total_size += cdb->cdb_fsize;
1880 rspamd_cryptobox_fast_hash_update (&cdb_data->hst, chunk, len);
1881 }
1882
1883 return chunk + len;
1884 }
1885
1886 void
rspamd_cdb_list_fin(struct map_cb_data * data,void ** target)1887 rspamd_cdb_list_fin (struct map_cb_data *data, void **target)
1888 {
1889 struct rspamd_map *map = data->map;
1890 struct rspamd_cdb_map_helper *cdb_data;
1891
1892 if (data->cur_data) {
1893 cdb_data = (struct rspamd_cdb_map_helper *)data->cur_data;
1894 msg_info_map ("read cdb of %Hz size", cdb_data->total_size);
1895 data->map->traverse_function = NULL;
1896 data->map->nelts = 0;
1897 data->map->digest = rspamd_cryptobox_fast_hash_final (&cdb_data->hst);
1898 }
1899
1900 if (target) {
1901 *target = data->cur_data;
1902 }
1903
1904 if (data->prev_data) {
1905 cdb_data = (struct rspamd_cdb_map_helper *)data->prev_data;
1906 rspamd_map_helper_destroy_cdb (cdb_data);
1907 }
1908 }
1909 void
rspamd_cdb_list_dtor(struct map_cb_data * data)1910 rspamd_cdb_list_dtor (struct map_cb_data *data)
1911 {
1912 if (data->cur_data) {
1913 rspamd_map_helper_destroy_cdb (data->cur_data);
1914 }
1915 }
1916
1917 gconstpointer
rspamd_match_cdb_map(struct rspamd_cdb_map_helper * map,const gchar * in,gsize inlen)1918 rspamd_match_cdb_map (struct rspamd_cdb_map_helper *map,
1919 const gchar *in, gsize inlen)
1920 {
1921 if (map == NULL || map->cdbs.head == NULL) {
1922 return NULL;
1923 }
1924
1925 GList *cur = map->cdbs.head;
1926 static rspamd_ftok_t found;
1927
1928 while (cur) {
1929 struct cdb *cdb = (struct cdb *)cur->data;
1930
1931 if (cdb_find (cdb, in, inlen) > 0) {
1932 /* Extract and push value to lua as string */
1933 unsigned vlen;
1934 gconstpointer vpos;
1935
1936 vpos = cdb->cdb_mem + cdb_datapos (cdb);
1937 vlen = cdb_datalen (cdb);
1938 found.len = vlen;
1939 found.begin = vpos;
1940
1941 return &found; /* Do not reuse! */
1942 }
1943
1944 cur = g_list_next (cur);
1945 }
1946
1947 return NULL;
1948 }
1949