1 /*-
2  * Copyright 2016 Vsevolod Stakhov
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "config.h"
18 #include "libutil/multipattern.h"
19 #include "libutil/str_util.h"
20 #include "libcryptobox/cryptobox.h"
21 
22 #ifdef WITH_HYPERSCAN
23 #include "logger.h"
24 #include "unix-std.h"
25 #include "hs.h"
26 #endif
27 #include "acism.h"
28 #include "libutil/regexp.h"
29 #include <stdalign.h>
30 
31 #define MAX_SCRATCH 4
32 
33 enum rspamd_hs_check_state {
34 	RSPAMD_HS_UNCHECKED = 0,
35 	RSPAMD_HS_SUPPORTED,
36 	RSPAMD_HS_UNSUPPORTED
37 };
38 
39 static const char *hs_cache_dir = NULL;
40 static enum rspamd_hs_check_state hs_suitable_cpu = RSPAMD_HS_UNCHECKED;
41 
42 
43 struct RSPAMD_ALIGNED(64) rspamd_multipattern {
44 #ifdef WITH_HYPERSCAN
45 	rspamd_cryptobox_hash_state_t hash_state;
46 	hs_database_t *db;
47 	hs_scratch_t *scratch[MAX_SCRATCH];
48 	GArray *hs_pats;
49 	GArray *hs_ids;
50 	GArray *hs_flags;
51 	guint scratch_used;
52 #endif
53 	ac_trie_t *t;
54 	GArray *pats;
55 	GArray *res;
56 
57 	gboolean compiled;
58 	guint cnt;
59 	enum rspamd_multipattern_flags flags;
60 };
61 
62 static GQuark
rspamd_multipattern_quark(void)63 rspamd_multipattern_quark (void)
64 {
65 	return g_quark_from_static_string ("multipattern");
66 }
67 
68 static inline gboolean
rspamd_hs_check(void)69 rspamd_hs_check (void)
70 {
71 #ifdef WITH_HYPERSCAN
72 	if (G_UNLIKELY (hs_suitable_cpu == RSPAMD_HS_UNCHECKED)) {
73 		if (hs_valid_platform () == HS_SUCCESS) {
74 			hs_suitable_cpu = RSPAMD_HS_SUPPORTED;
75 		}
76 		else {
77 			hs_suitable_cpu = RSPAMD_HS_UNSUPPORTED;
78 		}
79 	}
80 #endif
81 
82 	return hs_suitable_cpu == RSPAMD_HS_SUPPORTED;
83 }
84 
85 void
rspamd_multipattern_library_init(const gchar * cache_dir)86 rspamd_multipattern_library_init (const gchar *cache_dir)
87 {
88 	hs_cache_dir = cache_dir;
89 #ifdef WITH_HYPERSCAN
90 	rspamd_hs_check ();
91 #endif
92 }
93 
94 #ifdef WITH_HYPERSCAN
95 static gchar *
rspamd_multipattern_escape_tld_hyperscan(const gchar * pattern,gsize slen,gsize * dst_len)96 rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern, gsize slen,
97 		gsize *dst_len)
98 {
99 	gsize len;
100 	const gchar *p, *prefix, *suffix;
101 	gchar *res;
102 
103 	/*
104 	 * We understand the following cases
105 	 * 1) blah -> .blah\b
106 	 * 2) *.blah -> ..*\\.blah\b|$
107 	 * 3) ???
108 	 */
109 
110 	if (pattern[0] == '*') {
111 		p = strchr (pattern, '.');
112 
113 		if (p == NULL) {
114 			/* XXX: bad */
115 			p = pattern;
116 		}
117 		else {
118 			p ++;
119 		}
120 
121 		prefix = "\\.";
122 		len = slen + strlen (prefix);
123 	}
124 	else {
125 		prefix = "\\.";
126 		p = pattern;
127 		len = slen + strlen (prefix);
128 	}
129 
130 	suffix = "(:?\\b|$)";
131 	len += strlen (suffix);
132 
133 	res = g_malloc (len + 1);
134 	slen = rspamd_strlcpy (res, prefix, len + 1);
135 	slen += rspamd_strlcpy (res + slen, p, len + 1 - slen);
136 	slen += rspamd_strlcpy (res + slen, suffix, len + 1 - slen);
137 
138 	*dst_len = slen;
139 
140 	return res;
141 }
142 
143 #endif
144 static gchar *
rspamd_multipattern_escape_tld_acism(const gchar * pattern,gsize len,gsize * dst_len)145 rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len,
146 		gsize *dst_len)
147 {
148 	gsize dlen, slen;
149 	const gchar *p, *prefix;
150 	gchar *res;
151 
152 	/*
153 	 * We understand the following cases
154 	 * 1) blah -> \\.blah
155 	 * 2) *.blah -> \\..*\\.blah
156 	 * 3) ???
157 	 */
158 	slen = len;
159 
160 	if (pattern[0] == '*') {
161 		dlen = slen;
162 		p = memchr (pattern, '.', len);
163 
164 		if (p == NULL) {
165 			/* XXX: bad */
166 			p = pattern;
167 		}
168 		else {
169 			p ++;
170 		}
171 
172 		dlen -= p - pattern;
173 		prefix = ".";
174 		dlen ++;
175 	}
176 	else {
177 		dlen = slen + 1;
178 		prefix = ".";
179 		p = pattern;
180 	}
181 
182 	res = g_malloc (dlen + 1);
183 	slen = strlen (prefix);
184 	memcpy (res, prefix, slen);
185 	rspamd_strlcpy (res + slen, p, dlen - slen + 1);
186 
187 	*dst_len = dlen;
188 
189 	return res;
190 }
191 
192 /*
193  * Escapes special characters from specific pattern
194  */
195 static gchar *
rspamd_multipattern_pattern_filter(const gchar * pattern,gsize len,enum rspamd_multipattern_flags flags,gsize * dst_len)196 rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
197 		enum rspamd_multipattern_flags flags,
198 		gsize *dst_len)
199 {
200 	gchar *ret = NULL;
201 	gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;
202 
203 	if (flags & RSPAMD_MULTIPATTERN_UTF8) {
204 		gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
205 	}
206 
207 #ifdef WITH_HYPERSCAN
208 	if (rspamd_hs_check ()) {
209 		if (flags & RSPAMD_MULTIPATTERN_TLD) {
210 			gchar *tmp;
211 			gsize tlen;
212 			tmp = rspamd_multipattern_escape_tld_hyperscan (pattern, len, &tlen);
213 
214 			ret = rspamd_str_regexp_escape (tmp, tlen, dst_len,
215 					gl_flags|RSPAMD_REGEXP_ESCAPE_RE);
216 			g_free (tmp);
217 		}
218 		else if (flags & RSPAMD_MULTIPATTERN_RE) {
219 			ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags |
220 					RSPAMD_REGEXP_ESCAPE_RE);
221 		}
222 		else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
223 			ret = rspamd_str_regexp_escape (pattern, len, dst_len,
224 					gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
225 		}
226 		else {
227 			ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags);
228 		}
229 
230 		return ret;
231 	}
232 #endif
233 
234 	if (flags & RSPAMD_MULTIPATTERN_TLD) {
235 		ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len);
236 	}
237 	else if (flags & RSPAMD_MULTIPATTERN_RE) {
238 		ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags |
239 															   RSPAMD_REGEXP_ESCAPE_RE);
240 	}
241 	else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
242 		ret = rspamd_str_regexp_escape (pattern, len, dst_len,
243 				gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
244 	}
245 	else {
246 		ret = malloc (len + 1);
247 		*dst_len = rspamd_strlcpy (ret, pattern, len + 1);
248 	}
249 
250 	return ret;
251 }
252 
253 struct rspamd_multipattern *
rspamd_multipattern_create(enum rspamd_multipattern_flags flags)254 rspamd_multipattern_create (enum rspamd_multipattern_flags flags)
255 {
256 	struct rspamd_multipattern *mp;
257 
258 	/* Align due to blake2b state */
259 	(void) !posix_memalign((void **)&mp, _Alignof (struct rspamd_multipattern),
260 			sizeof (*mp));
261 	g_assert (mp != NULL);
262 	memset (mp, 0, sizeof (*mp));
263 	mp->flags = flags;
264 
265 #ifdef WITH_HYPERSCAN
266 	if (rspamd_hs_check ()) {
267 		mp->hs_pats = g_array_new (FALSE, TRUE, sizeof (gchar *));
268 		mp->hs_flags = g_array_new (FALSE, TRUE, sizeof (gint));
269 		mp->hs_ids = g_array_new (FALSE, TRUE, sizeof (gint));
270 		rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);
271 
272 		return mp;
273 	}
274 #endif
275 
276 	mp->pats = g_array_new (FALSE, TRUE, sizeof (ac_trie_pat_t));
277 
278 	return mp;
279 }
280 
281 struct rspamd_multipattern *
rspamd_multipattern_create_sized(guint npatterns,enum rspamd_multipattern_flags flags)282 rspamd_multipattern_create_sized (guint npatterns,
283 		enum rspamd_multipattern_flags flags)
284 {
285 	struct rspamd_multipattern *mp;
286 
287 	/* Align due to blake2b state */
288 	(void) !posix_memalign((void **)&mp, _Alignof (struct rspamd_multipattern), sizeof (*mp));
289 	g_assert (mp != NULL);
290 	memset (mp, 0, sizeof (*mp));
291 	mp->flags = flags;
292 
293 #ifdef WITH_HYPERSCAN
294 	if (rspamd_hs_check ()) {
295 		mp->hs_pats = g_array_sized_new (FALSE, TRUE, sizeof (gchar *), npatterns);
296 		mp->hs_flags = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
297 		mp->hs_ids = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
298 		rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);
299 
300 		return mp;
301 	}
302 #endif
303 
304 	mp->pats = g_array_sized_new (FALSE, TRUE, sizeof (ac_trie_pat_t), npatterns);
305 
306 	return mp;
307 }
308 
309 void
rspamd_multipattern_add_pattern(struct rspamd_multipattern * mp,const gchar * pattern,gint flags)310 rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp,
311 		const gchar *pattern, gint flags)
312 {
313 	g_assert (pattern != NULL);
314 
315 	rspamd_multipattern_add_pattern_len (mp, pattern, strlen (pattern), flags);
316 }
317 
318 void
rspamd_multipattern_add_pattern_len(struct rspamd_multipattern * mp,const gchar * pattern,gsize patlen,gint flags)319 rspamd_multipattern_add_pattern_len (struct rspamd_multipattern *mp,
320 		const gchar *pattern, gsize patlen, gint flags)
321 {
322 	gsize dlen;
323 
324 	g_assert (pattern != NULL);
325 	g_assert (mp != NULL);
326 	g_assert (!mp->compiled);
327 
328 #ifdef WITH_HYPERSCAN
329 	if (rspamd_hs_check ()) {
330 		gchar *np;
331 		gint fl = HS_FLAG_SOM_LEFTMOST;
332 		gint adjusted_flags = mp->flags | flags;
333 
334 		if (adjusted_flags & RSPAMD_MULTIPATTERN_ICASE) {
335 			fl |= HS_FLAG_CASELESS;
336 		}
337 		if (adjusted_flags & RSPAMD_MULTIPATTERN_UTF8) {
338 			if (adjusted_flags & RSPAMD_MULTIPATTERN_TLD) {
339 				fl |= HS_FLAG_UTF8;
340 			}
341 			else {
342 				fl |= HS_FLAG_UTF8 | HS_FLAG_UCP;
343 			}
344 		}
345 		if (adjusted_flags & RSPAMD_MULTIPATTERN_DOTALL) {
346 			fl |= HS_FLAG_DOTALL;
347 		}
348 		if (adjusted_flags & RSPAMD_MULTIPATTERN_SINGLEMATCH) {
349 			fl |= HS_FLAG_SINGLEMATCH;
350 			fl &= ~HS_FLAG_SOM_LEFTMOST; /* According to hyperscan docs */
351 		}
352 		if (adjusted_flags & RSPAMD_MULTIPATTERN_NO_START) {
353 			fl &= ~HS_FLAG_SOM_LEFTMOST;
354 		}
355 
356 		g_array_append_val (mp->hs_flags, fl);
357 		np = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen);
358 		g_array_append_val (mp->hs_pats, np);
359 		fl = mp->cnt;
360 		g_array_append_val (mp->hs_ids, fl);
361 		rspamd_cryptobox_hash_update (&mp->hash_state, np, dlen);
362 
363 		mp->cnt ++;
364 
365 		return;
366 	}
367 #endif
368 	ac_trie_pat_t pat;
369 
370 	pat.ptr = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen);
371 	pat.len = dlen;
372 
373 	g_array_append_val (mp->pats, pat);
374 
375 	mp->cnt ++;
376 }
377 
378 struct rspamd_multipattern *
rspamd_multipattern_create_full(const gchar ** patterns,guint npatterns,enum rspamd_multipattern_flags flags)379 rspamd_multipattern_create_full (const gchar **patterns,
380 		guint npatterns, enum rspamd_multipattern_flags flags)
381 {
382 	struct rspamd_multipattern *mp;
383 	guint i;
384 
385 	g_assert (npatterns > 0);
386 	g_assert (patterns != NULL);
387 
388 	mp = rspamd_multipattern_create_sized (npatterns, flags);
389 
390 	for (i = 0; i < npatterns; i++) {
391 		rspamd_multipattern_add_pattern (mp, patterns[i], flags);
392 	}
393 
394 	return mp;
395 }
396 
397 #ifdef WITH_HYPERSCAN
398 static gboolean
rspamd_multipattern_try_load_hs(struct rspamd_multipattern * mp,const guchar * hash)399 rspamd_multipattern_try_load_hs (struct rspamd_multipattern *mp,
400 		const guchar *hash)
401 {
402 	gchar fp[PATH_MAX];
403 	gpointer map;
404 	gsize len;
405 
406 	if (hs_cache_dir == NULL) {
407 		return FALSE;
408 	}
409 
410 	rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp", hs_cache_dir,
411 			(gint)rspamd_cryptobox_HASHBYTES / 2, hash);
412 
413 	if ((map = rspamd_file_xmap (fp, PROT_READ, &len, TRUE)) != NULL) {
414 		if (hs_deserialize_database (map, len, &mp->db) == HS_SUCCESS) {
415 			munmap (map, len);
416 			return TRUE;
417 		}
418 
419 		munmap (map, len);
420 		/* Remove stale file */
421 		(void)unlink (fp);
422 	}
423 
424 	return FALSE;
425 }
426 
427 static void
rspamd_multipattern_try_save_hs(struct rspamd_multipattern * mp,const guchar * hash)428 rspamd_multipattern_try_save_hs (struct rspamd_multipattern *mp,
429 		const guchar *hash)
430 {
431 	gchar fp[PATH_MAX], np[PATH_MAX];
432 	char *bytes = NULL;
433 	gsize len;
434 	gint fd;
435 
436 	if (hs_cache_dir == NULL) {
437 		return;
438 	}
439 
440 	rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp.tmp", hs_cache_dir,
441 			(gint)rspamd_cryptobox_HASHBYTES / 2, hash);
442 
443 	if ((fd = rspamd_file_xopen (fp, O_WRONLY | O_CREAT | O_EXCL, 00644, 0)) != -1) {
444 		if (hs_serialize_database (mp->db, &bytes, &len) == HS_SUCCESS) {
445 			if (write (fd, bytes, len) == -1) {
446 				msg_warn ("cannot write hyperscan cache to %s: %s",
447 						fp, strerror (errno));
448 				unlink (fp);
449 				free (bytes);
450 			}
451 			else {
452 				free (bytes);
453 				fsync (fd);
454 
455 				rspamd_snprintf (np, sizeof (np), "%s/%*xs.hsmp", hs_cache_dir,
456 						(gint)rspamd_cryptobox_HASHBYTES / 2, hash);
457 
458 				if (rename (fp, np) == -1) {
459 					msg_warn ("cannot rename hyperscan cache from %s to %s: %s",
460 							fp, np, strerror (errno));
461 					unlink (fp);
462 				}
463 			}
464 		}
465 		else {
466 			msg_warn ("cannot serialize hyperscan cache to %s: %s",
467 					fp, strerror (errno));
468 			unlink (fp);
469 		}
470 
471 
472 		close (fd);
473 	}
474 }
475 #endif
476 
477 gboolean
rspamd_multipattern_compile(struct rspamd_multipattern * mp,GError ** err)478 rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err)
479 {
480 	g_assert (mp != NULL);
481 	g_assert (!mp->compiled);
482 
483 #ifdef WITH_HYPERSCAN
484 	if (rspamd_hs_check ()) {
485 		guint i;
486 		hs_platform_info_t plt;
487 		hs_compile_error_t *hs_errors;
488 		guchar hash[rspamd_cryptobox_HASHBYTES];
489 
490 		if (mp->cnt > 0) {
491 			g_assert (hs_populate_platform (&plt) == HS_SUCCESS);
492 			rspamd_cryptobox_hash_update (&mp->hash_state, (void *)&plt, sizeof (plt));
493 			rspamd_cryptobox_hash_final (&mp->hash_state, hash);
494 
495 			if (!rspamd_multipattern_try_load_hs (mp, hash)) {
496 				if (hs_compile_multi ((const char *const *)mp->hs_pats->data,
497 						(const unsigned int *)mp->hs_flags->data,
498 						(const unsigned int *)mp->hs_ids->data,
499 						mp->cnt,
500 						HS_MODE_BLOCK,
501 						&plt,
502 						&mp->db,
503 						&hs_errors) != HS_SUCCESS) {
504 
505 					g_set_error (err, rspamd_multipattern_quark (), EINVAL,
506 							"cannot create tree of regexp when processing '%s': %s",
507 							g_array_index (mp->hs_pats, char *, hs_errors->expression),
508 							hs_errors->message);
509 					hs_free_compile_error (hs_errors);
510 
511 					return FALSE;
512 				}
513 			}
514 
515 			rspamd_multipattern_try_save_hs (mp, hash);
516 
517 			for (i = 0; i < MAX_SCRATCH; i ++) {
518 				g_assert (hs_alloc_scratch (mp->db, &mp->scratch[i]) == HS_SUCCESS);
519 			}
520 		}
521 
522 		mp->compiled = TRUE;
523 
524 		return TRUE;
525 	}
526 #endif
527 
528 	if (mp->cnt > 0) {
529 
530 		if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
531 			/* Fallback to pcre... */
532 			rspamd_regexp_t *re;
533 			mp->res = g_array_sized_new (FALSE, TRUE,
534 					sizeof (rspamd_regexp_t *), mp->cnt);
535 
536 			for (guint i = 0; i < mp->cnt; i ++) {
537 				const ac_trie_pat_t *pat;
538 				const gchar *pat_flags = NULL;
539 
540 				if (mp->flags & RSPAMD_MULTIPATTERN_UTF8) {
541 					pat_flags = "u";
542 				}
543 
544 				pat = &g_array_index (mp->pats, ac_trie_pat_t, i);
545 				re = rspamd_regexp_new (pat->ptr, pat_flags, err);
546 
547 				if (re == NULL) {
548 					return FALSE;
549 				}
550 
551 				g_array_append_val (mp->res, re);
552 			}
553 		}
554 		else {
555 			mp->t = acism_create ((const ac_trie_pat_t *) mp->pats->data, mp->cnt);
556 		}
557 	}
558 
559 	mp->compiled = TRUE;
560 
561 	return TRUE;
562 }
563 
564 struct rspamd_multipattern_cbdata {
565 	struct rspamd_multipattern *mp;
566 	const gchar *in;
567 	gsize len;
568 	rspamd_multipattern_cb_t cb;
569 	gpointer ud;
570 	guint nfound;
571 	gint ret;
572 };
573 
574 #ifdef WITH_HYPERSCAN
575 static gint
rspamd_multipattern_hs_cb(unsigned int id,unsigned long long from,unsigned long long to,unsigned int flags,void * ud)576 rspamd_multipattern_hs_cb (unsigned int id,
577 		unsigned long long from,
578 		unsigned long long to,
579 		unsigned int flags,
580 		void *ud)
581 {
582 	struct rspamd_multipattern_cbdata *cbd = ud;
583 	gint ret = 0;
584 
585 	if (to > 0) {
586 
587 		if (from == HS_OFFSET_PAST_HORIZON) {
588 			from = 0;
589 		}
590 
591 		ret = cbd->cb (cbd->mp, id, from, to, cbd->in, cbd->len, cbd->ud);
592 
593 		cbd->nfound ++;
594 		cbd->ret = ret;
595 	}
596 
597 	return ret;
598 }
599 #endif
600 
601 static gint
rspamd_multipattern_acism_cb(int strnum,int textpos,void * context)602 rspamd_multipattern_acism_cb (int strnum, int textpos, void *context)
603 {
604 	struct rspamd_multipattern_cbdata *cbd = context;
605 	gint ret;
606 	ac_trie_pat_t pat;
607 
608 	pat = g_array_index (cbd->mp->pats, ac_trie_pat_t, strnum);
609 	ret = cbd->cb (cbd->mp, strnum, textpos - pat.len,
610 			textpos, cbd->in, cbd->len, cbd->ud);
611 
612 	cbd->nfound ++;
613 	cbd->ret = ret;
614 
615 	return ret;
616 }
617 
618 gint
rspamd_multipattern_lookup(struct rspamd_multipattern * mp,const gchar * in,gsize len,rspamd_multipattern_cb_t cb,gpointer ud,guint * pnfound)619 rspamd_multipattern_lookup (struct rspamd_multipattern *mp,
620 		const gchar *in, gsize len, rspamd_multipattern_cb_t cb,
621 		gpointer ud, guint *pnfound)
622 {
623 	struct rspamd_multipattern_cbdata cbd;
624 	gint ret = 0;
625 
626 	g_assert (mp != NULL);
627 
628 	if (mp->cnt == 0 || !mp->compiled || len == 0) {
629 		return 0;
630 	}
631 
632 	cbd.mp = mp;
633 	cbd.in = in;
634 	cbd.len = len;
635 	cbd.cb = cb;
636 	cbd.ud = ud;
637 	cbd.nfound = 0;
638 	cbd.ret = 0;
639 
640 #ifdef WITH_HYPERSCAN
641 	if (rspamd_hs_check ()) {
642 		hs_scratch_t *scr = NULL;
643 		guint i;
644 
645 		for (i = 0; i < MAX_SCRATCH; i ++) {
646 			if (!(mp->scratch_used & (1 << i))) {
647 				mp->scratch_used |= (1 << i);
648 				scr = mp->scratch[i];
649 				break;
650 			}
651 		}
652 
653 		g_assert (scr != NULL);
654 
655 		ret = hs_scan (mp->db, in, len, 0, scr,
656 				rspamd_multipattern_hs_cb, &cbd);
657 
658 		mp->scratch_used &= ~(1 << i);
659 
660 		if (ret == HS_SUCCESS) {
661 			ret = 0;
662 		}
663 		else if (ret == HS_SCAN_TERMINATED) {
664 			ret = cbd.ret;
665 		}
666 
667 		if (pnfound) {
668 			*pnfound = cbd.nfound;
669 		}
670 
671 		return ret;
672 	}
673 #endif
674 
675 	gint state = 0;
676 
677 	if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
678 		/* Terribly inefficient, but who cares - just use hyperscan */
679 		for (guint i = 0; i < mp->cnt; i ++) {
680 			rspamd_regexp_t *re = g_array_index (mp->res, rspamd_regexp_t *, i);
681 			const gchar *start = NULL, *end = NULL;
682 
683 			while (rspamd_regexp_search (re,
684 					in,
685 					len,
686 					&start,
687 					&end,
688 					TRUE,
689 					NULL)) {
690 				if (rspamd_multipattern_acism_cb (i, end - in, &cbd)) {
691 					goto out;
692 				}
693 			}
694 		}
695 out:
696 		ret = cbd.ret;
697 
698 		if (pnfound) {
699 			*pnfound = cbd.nfound;
700 		}
701 	}
702 	else {
703 		/* Plain trie */
704 		ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
705 				&state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
706 
707 		if (pnfound) {
708 			*pnfound = cbd.nfound;
709 		}
710 	}
711 
712 	return ret;
713 }
714 
715 
716 void
rspamd_multipattern_destroy(struct rspamd_multipattern * mp)717 rspamd_multipattern_destroy (struct rspamd_multipattern *mp)
718 {
719 	guint i;
720 
721 	if (mp) {
722 #ifdef WITH_HYPERSCAN
723 		if (rspamd_hs_check ()) {
724 			gchar *p;
725 
726 			if (mp->compiled && mp->cnt > 0) {
727 				for (i = 0; i < MAX_SCRATCH; i ++) {
728 					hs_free_scratch (mp->scratch[i]);
729 				}
730 
731 				hs_free_database (mp->db);
732 			}
733 
734 			for (i = 0; i < mp->cnt; i ++) {
735 				p = g_array_index (mp->hs_pats, gchar *, i);
736 				g_free (p);
737 			}
738 
739 			g_array_free (mp->hs_pats, TRUE);
740 			g_array_free (mp->hs_ids, TRUE);
741 			g_array_free (mp->hs_flags, TRUE);
742 			free (mp); /* Due to posix_memalign */
743 
744 			return;
745 		}
746 #endif
747 		ac_trie_pat_t pat;
748 
749 		if (mp->compiled && mp->cnt > 0) {
750 			acism_destroy (mp->t);
751 		}
752 
753 		for (i = 0; i < mp->cnt; i ++) {
754 			pat = g_array_index (mp->pats, ac_trie_pat_t, i);
755 			g_free ((gchar *)pat.ptr);
756 		}
757 
758 		g_array_free (mp->pats, TRUE);
759 
760 		g_free (mp);
761 	}
762 }
763 
764 const gchar*
rspamd_multipattern_get_pattern(struct rspamd_multipattern * mp,guint index)765 rspamd_multipattern_get_pattern (struct rspamd_multipattern *mp,
766 		guint index)
767 {
768 	g_assert (mp != NULL);
769 	g_assert (index < mp->cnt);
770 
771 #ifdef WITH_HYPERSCAN
772 	if (rspamd_hs_check ()) {
773 		return g_array_index (mp->hs_pats, gchar *, index);
774 	}
775 #endif
776 
777 	ac_trie_pat_t pat;
778 
779 	pat = g_array_index (mp->pats, ac_trie_pat_t, index);
780 
781 	return pat.ptr;
782 }
783 
784 guint
rspamd_multipattern_get_npatterns(struct rspamd_multipattern * mp)785 rspamd_multipattern_get_npatterns (struct rspamd_multipattern *mp)
786 {
787 	g_assert (mp != NULL);
788 
789 	return mp->cnt;
790 }
791 
792 gboolean
rspamd_multipattern_has_hyperscan(void)793 rspamd_multipattern_has_hyperscan (void)
794 {
795 	return rspamd_hs_check ();
796 }
797