1 /*-
2  * Copyright 2016 Vsevolod Stakhov
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <contrib/libucl/ucl.h>
17 #include "config.h"
18 #include "util.h"
19 #include "cfg_file.h"
20 #include "rspamd.h"
21 #include "message.h"
22 #include "mime_expressions.h"
23 #include "libserver/html/html.h"
24 #include "lua/lua_common.h"
25 #include "utlist.h"
26 
27 gboolean rspamd_compare_encoding (struct rspamd_task *task,
28 								  GArray * args,
29 								  void *unused);
30 gboolean rspamd_header_exists (struct rspamd_task *task,
31 							   GArray * args,
32 							   void *unused);
33 gboolean rspamd_parts_distance (struct rspamd_task *task,
34 								GArray * args,
35 								void *unused);
36 gboolean rspamd_recipients_distance (struct rspamd_task *task,
37 									 GArray * args,
38 									 void *unused);
39 gboolean rspamd_has_only_html_part (struct rspamd_task *task,
40 									GArray * args,
41 									void *unused);
42 gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
43 									  GArray * args,
44 									  void *unused);
45 gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
46 										   GArray * args,
47 										   void *unused);
48 gboolean rspamd_is_html_balanced (struct rspamd_task *task,
49 								  GArray * args,
50 								  void *unused);
51 gboolean rspamd_has_html_tag (struct rspamd_task *task,
52 							  GArray * args,
53 							  void *unused);
54 gboolean rspamd_has_fake_html (struct rspamd_task *task,
55 							   GArray * args,
56 							   void *unused);
57 static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
58 										  GArray * args,
59 										  void *unused);
60 static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
61 										GArray * args,
62 										void *unused);
63 static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
64 											 GArray * args,
65 											 void *unused);
66 static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
67 												GArray * args,
68 												void *unused);
69 static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
70 											   GArray * args,
71 											   void *unused);
72 static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
73 												   GArray * args,
74 												   void *unused);
75 static gboolean rspamd_has_content_part (struct rspamd_task *task,
76 										 GArray * args,
77 										 void *unused);
78 static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
79 											 GArray * args,
80 											 void *unused);
81 static gboolean rspamd_is_empty_body (struct rspamd_task *task,
82 									  GArray * args,
83 									  void *unused);
84 static gboolean rspamd_has_flag_expr (struct rspamd_task *task,
85 									  GArray * args,
86 									  void *unused);
87 static gboolean rspamd_has_symbol_expr (struct rspamd_task *task,
88 									  GArray * args,
89 									  void *unused);
90 
91 static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len,
92 		rspamd_mempool_t *pool, gpointer ud, GError **err);
93 static gdouble rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom);
94 static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom);
95 static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom);
96 
97 /**
98  * Regexp structure
99  */
100 struct rspamd_regexp_atom {
101 	enum rspamd_re_type type;                       /**< regexp type										*/
102 	gchar *regexp_text;                             /**< regexp text representation							*/
103 	rspamd_regexp_t *regexp;                        /**< regexp structure									*/
104 	union {
105 		const gchar *header;                        /**< header name for header regexps						*/
106 		const gchar *selector;                      /**< selector name for lua selector regexp				*/
107 	} extra;
108 	gboolean is_test;                               /**< true if this expression must be tested				*/
109 	gboolean is_strong;                             /**< true if headers search must be case sensitive		*/
110 	gboolean is_multiple;                           /**< true if we need to match all inclusions of atom	*/
111 };
112 
113 /**
114  * Rspamd expression function
115  */
116 struct rspamd_function_atom {
117 	gchar *name;	/**< name of function								*/
118 	GArray *args;	/**< its args										*/
119 };
120 
121 enum rspamd_mime_atom_type {
122 	MIME_ATOM_REGEXP = 0,
123 	MIME_ATOM_INTERNAL_FUNCTION,
124 	MIME_ATOM_LUA_FUNCTION,
125 	MIME_ATOM_LOCAL_LUA_FUNCTION, /* New style */
126 };
127 
128 struct rspamd_mime_atom {
129 	gchar *str;
130 	union {
131 		struct rspamd_regexp_atom *re;
132 		struct rspamd_function_atom *func;
133 		const gchar *lua_function;
134 		gint lua_cbref;
135 	} d;
136 	enum rspamd_mime_atom_type type;
137 };
138 
139 /*
140  * List of internal functions of rspamd
141  * Sorted by name to use bsearch
142  */
143 static struct _fl {
144 	const gchar *name;
145 	rspamd_internal_func_t func;
146 	void *user_data;
147 } rspamd_functions_list[] = {
148 		{"check_smtp_data", rspamd_check_smtp_data, NULL},
149 		{"compare_encoding", rspamd_compare_encoding, NULL},
150 		{"compare_parts_distance", rspamd_parts_distance, NULL},
151 		{"compare_recipients_distance", rspamd_recipients_distance, NULL},
152 		{"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
153 		{"content_type_compare_param", rspamd_content_type_compare_param, NULL},
154 		{"content_type_has_param", rspamd_content_type_has_param, NULL},
155 		{"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
156 		{"content_type_is_type", rspamd_content_type_is_type, NULL},
157 		{"has_content_part", rspamd_has_content_part, NULL},
158 		{"has_content_part_len", rspamd_has_content_part_len, NULL},
159 		{"has_fake_html", rspamd_has_fake_html, NULL},
160 		{"has_flag", rspamd_has_flag_expr, NULL},
161 		{"has_html_tag", rspamd_has_html_tag, NULL},
162 		{"has_only_html_part", rspamd_has_only_html_part, NULL},
163 		{"has_symbol", rspamd_has_symbol_expr, NULL},
164 		{"header_exists", rspamd_header_exists, NULL},
165 		{"is_empty_body", rspamd_is_empty_body, NULL},
166 		{"is_html_balanced", rspamd_is_html_balanced, NULL},
167 		{"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
168 		{"raw_header_exists", rspamd_raw_header_exists, NULL},
169 };
170 
171 const struct rspamd_atom_subr mime_expr_subr = {
172 	.parse = rspamd_mime_expr_parse,
173 	.process = rspamd_mime_expr_process,
174 	.priority = rspamd_mime_expr_priority,
175 	.destroy = rspamd_mime_expr_destroy
176 };
177 
178 static struct _fl *list_ptr = &rspamd_functions_list[0];
179 static guint32 functions_number = sizeof (rspamd_functions_list) /
180 	sizeof (struct _fl);
181 static gboolean list_allocated = FALSE;
182 
183 /* Bsearch routine */
184 static gint
fl_cmp(const void * s1,const void * s2)185 fl_cmp (const void *s1, const void *s2)
186 {
187 	struct _fl *fl1 = (struct _fl *)s1;
188 	struct _fl *fl2 = (struct _fl *)s2;
189 	return strcmp (fl1->name, fl2->name);
190 }
191 
192 static GQuark
rspamd_mime_expr_quark(void)193 rspamd_mime_expr_quark (void)
194 {
195 	return g_quark_from_static_string ("mime-expressions");
196 }
197 
198 #define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0)
199 static gboolean
rspamd_parse_long_option(const gchar * start,gsize len,struct rspamd_regexp_atom * a)200 rspamd_parse_long_option (const gchar *start, gsize len,
201 		struct rspamd_regexp_atom *a)
202 {
203 	gboolean ret = FALSE;
204 
205 	if (TYPE_CHECK (start, "body", len)) {
206 		ret = TRUE;
207 		a->type = RSPAMD_RE_BODY;
208 	}
209 	else if (TYPE_CHECK (start, "part", len) ||
210 			TYPE_CHECK (start, "mime", len)) {
211 		ret = TRUE;
212 		a->type = RSPAMD_RE_MIME;
213 	}
214 	else if (TYPE_CHECK (start, "raw_part", len) ||
215 			TYPE_CHECK (start, "raw_mime", len) ||
216 			TYPE_CHECK (start, "mime_raw", len)) {
217 		ret = TRUE;
218 		a->type = RSPAMD_RE_RAWMIME;
219 	}
220 	else if (TYPE_CHECK (start, "header", len)) {
221 		ret = TRUE;
222 		a->type = RSPAMD_RE_HEADER;
223 	}
224 	else if (TYPE_CHECK (start, "mime_header", len) ||
225 			TYPE_CHECK (start, "header_mime", len)) {
226 		ret = TRUE;
227 		a->type = RSPAMD_RE_MIMEHEADER;
228 	}
229 	else if (TYPE_CHECK (start, "raw_header", len) ||
230 			TYPE_CHECK (start, "header_raw", len)) {
231 		ret = TRUE;
232 		a->type = RSPAMD_RE_RAWHEADER;
233 	}
234 	else if (TYPE_CHECK (start, "all_header", len) ||
235 			TYPE_CHECK (start, "header_all", len) ||
236 			TYPE_CHECK (start, "all_headers", len)) {
237 		ret = TRUE;
238 		a->type = RSPAMD_RE_ALLHEADER;
239 	}
240 	else if (TYPE_CHECK (start, "url", len)) {
241 		ret = TRUE;
242 		a->type = RSPAMD_RE_URL;
243 	}
244 	else if (TYPE_CHECK (start, "email", len)) {
245 		ret = TRUE;
246 		a->type = RSPAMD_RE_EMAIL;
247 	}
248 	else if (TYPE_CHECK (start, "sa_body", len)) {
249 		ret = TRUE;
250 		a->type = RSPAMD_RE_SABODY;
251 	}
252 	else if (TYPE_CHECK (start, "sa_raw_body", len) ||
253 			TYPE_CHECK (start, "sa_body_raw", len)) {
254 		ret = TRUE;
255 		a->type = RSPAMD_RE_SARAWBODY;
256 	}
257 	else if (TYPE_CHECK (start, "words", len)) {
258 		ret = TRUE;
259 		a->type = RSPAMD_RE_WORDS;
260 	}
261 	else if (TYPE_CHECK (start, "raw_words", len)) {
262 		ret = TRUE;
263 		a->type = RSPAMD_RE_RAWWORDS;
264 	}
265 	else if (TYPE_CHECK (start, "stem_words", len)) {
266 		ret = TRUE;
267 		a->type = RSPAMD_RE_STEMWORDS;
268 	}
269 	else if (TYPE_CHECK (start, "selector", len)) {
270 		ret = TRUE;
271 		a->type = RSPAMD_RE_SELECTOR;
272 	}
273 
274 	return ret;
275 }
276 
277 /*
278  * Rspamd regexp utility functions
279  */
280 static struct rspamd_regexp_atom *
rspamd_mime_expr_parse_regexp_atom(rspamd_mempool_t * pool,const gchar * line,struct rspamd_config * cfg)281 rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
282 		struct rspamd_config *cfg)
283 {
284 	const gchar *begin, *end, *p, *src, *start, *brace;
285 	gchar *dbegin, *dend, *extra = NULL;
286 	struct rspamd_regexp_atom *result;
287 	GError *err = NULL;
288 	GString *re_flags;
289 
290 	if (line == NULL) {
291 		msg_err_pool ("cannot parse NULL line");
292 		return NULL;
293 	}
294 
295 	src = line;
296 	result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom));
297 	/* Skip whitespaces */
298 	while (g_ascii_isspace (*line)) {
299 		line++;
300 	}
301 	if (*line == '\0') {
302 		msg_warn_pool ("got empty regexp");
303 		return NULL;
304 	}
305 
306 	result->type = RSPAMD_RE_MAX;
307 
308 	start = line;
309 	/* First try to find header name */
310 	begin = strchr (line, '/');
311 	if (begin != NULL) {
312 		p = begin;
313 		end = NULL;
314 		while (p != line) {
315 			if (*p == '=') {
316 				end = p;
317 				break;
318 			}
319 			p--;
320 		}
321 
322 		if (end) {
323 			extra = rspamd_mempool_alloc (pool, end - line + 1);
324 			rspamd_strlcpy (extra, line, end - line + 1);
325 			line = end;
326 		}
327 	}
328 	else {
329 		extra = rspamd_mempool_strdup (pool, line);
330 		result->type = RSPAMD_RE_MAX;
331 		line = start;
332 	}
333 	/* Find begin of regexp */
334 	while (*line && *line != '/') {
335 		line++;
336 	}
337 	if (*line != '\0') {
338 		begin = line + 1;
339 	}
340 	else if (extra == NULL) {
341 		/* Assume that line without // is just a header name */
342 		extra = rspamd_mempool_strdup (pool, line);
343 		result->type = RSPAMD_RE_HEADER;
344 		return result;
345 	}
346 	else {
347 		/* We got header name earlier but have not found // expression, so it is invalid regexp */
348 		msg_warn_pool (
349 			"got no header name (eg. header=) but without corresponding regexp, %s",
350 			src);
351 		return NULL;
352 	}
353 	/* Find end */
354 	end = begin;
355 	while (*end && (*end != '/' || *(end - 1) == '\\')) {
356 		end++;
357 	}
358 	if (end == begin || *end != '/') {
359 		msg_warn_pool ("no trailing / in regexp %s", src);
360 		return NULL;
361 	}
362 	/* Parse flags */
363 	p = end + 1;
364 	re_flags = g_string_sized_new (32);
365 
366 	while (p != NULL) {
367 		switch (*p) {
368 		case 'i':
369 		case 'm':
370 		case 's':
371 		case 'x':
372 		case 'u':
373 		case 'O':
374 		case 'r':
375 		case 'L':
376 			/* Handled by rspamd_regexp_t */
377 			g_string_append_c (re_flags, *p);
378 			p++;
379 			break;
380 		case 'o':
381 			p++;
382 			break;
383 		/* Type flags */
384 		case 'H':
385 			result->type = RSPAMD_RE_HEADER;
386 			p++;
387 			break;
388 		case 'R':
389 			result->type = RSPAMD_RE_ALLHEADER;
390 			p++;
391 			break;
392 		case 'B':
393 			result->type = RSPAMD_RE_MIMEHEADER;
394 			p++;
395 			break;
396 		case 'C':
397 			result->type = RSPAMD_RE_SABODY;
398 			p++;
399 			break;
400 		case 'D':
401 			result->type = RSPAMD_RE_SARAWBODY;
402 			p++;
403 			break;
404 		case 'M':
405 			result->type = RSPAMD_RE_BODY;
406 			p++;
407 			break;
408 		case 'P':
409 			result->type = RSPAMD_RE_MIME;
410 			p++;
411 			break;
412 		case 'Q':
413 			result->type = RSPAMD_RE_RAWMIME;
414 			p++;
415 			break;
416 		case 'U':
417 			result->type = RSPAMD_RE_URL;
418 			p++;
419 			break;
420 		case 'X':
421 			result->type = RSPAMD_RE_RAWHEADER;
422 			p++;
423 			break;
424 		case '$':
425 			result->type = RSPAMD_RE_SELECTOR;
426 			p++;
427 			break;
428 		case '{':
429 			/* Long definition */
430 			if ((brace = strchr (p + 1, '}')) != NULL) {
431 				if (!rspamd_parse_long_option (p + 1, brace - (p + 1), result)) {
432 					msg_warn_pool ("invalid long regexp type: %*s in '%s'",
433 							(int)(brace - (p + 1)), p + 1, src);
434 					p = NULL;
435 				}
436 				else {
437 					p = brace + 1;
438 				}
439 			}
440 			else {
441 				p = NULL;
442 			}
443 			break;
444 		/* Other flags */
445 		case 'T':
446 			result->is_test = TRUE;
447 			p++;
448 			break;
449 		case 'S':
450 			result->is_strong = TRUE;
451 			p++;
452 			break;
453 		case 'A':
454 			result->is_multiple = TRUE;
455 			p++;
456 			break;
457 		/* Stop flags parsing */
458 		default:
459 			p = NULL;
460 			break;
461 		}
462 	}
463 
464 	if (result->type >= RSPAMD_RE_MAX) {
465 		if (extra) {
466 			/* Assume header regexp */
467 			result->extra.header = extra;
468 			result->type = RSPAMD_RE_HEADER;
469 		}
470 		else {
471 			msg_err_pool ("could not read regexp: %s, unknown type", src);
472 			return NULL;
473 		}
474 	}
475 
476 	if ((result->type == RSPAMD_RE_HEADER ||
477 			result->type == RSPAMD_RE_RAWHEADER ||
478 			result->type == RSPAMD_RE_MIMEHEADER)) {
479 		if (extra == NULL) {
480 			msg_err_pool ("header regexp: '%s' has no header part", src);
481 			return NULL;
482 		}
483 		else {
484 			result->extra.header = extra;
485 		}
486 	}
487 
488 	if (result->type == RSPAMD_RE_SELECTOR) {
489 		if (extra == NULL) {
490 			msg_err_pool ("selector regexp: '%s' has no selector part", src);
491 			return NULL;
492 		}
493 		else {
494 			result->extra.selector = extra;
495 		}
496 	}
497 
498 
499 	result->regexp_text = rspamd_mempool_strdup (pool, start);
500 	dbegin = result->regexp_text + (begin - start);
501 	dend = result->regexp_text + (end - start);
502 	*dend = '\0';
503 
504 	result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
505 			&err);
506 
507 	g_string_free (re_flags, TRUE);
508 
509 	if (result->regexp == NULL || err != NULL) {
510 		msg_warn_pool ("could not read regexp: %s while reading regexp %e",
511 				src, err);
512 
513 		if (err) {
514 			g_error_free (err);
515 		}
516 
517 		return NULL;
518 	}
519 
520 	if (result->is_multiple) {
521 		rspamd_regexp_set_maxhits (result->regexp, 0);
522 	}
523 	else {
524 		rspamd_regexp_set_maxhits (result->regexp, 1);
525 	}
526 
527 	rspamd_regexp_set_ud (result->regexp, result);
528 
529 	*dend = '/';
530 
531 	return result;
532 }
533 
534 struct rspamd_function_atom *
rspamd_mime_expr_parse_function_atom(rspamd_mempool_t * pool,const gchar * input)535 rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input)
536 {
537 	const gchar *obrace, *ebrace, *p, *c;
538 	gchar t, *databuf;
539 	guint len;
540 	struct rspamd_function_atom *res;
541 	struct expression_argument arg;
542 	GError *err = NULL;
543 	enum {
544 		start_read_argument = 0,
545 		in_string,
546 		in_regexp,
547 		got_backslash,
548 		got_comma
549 	} state, prev_state = 0;
550 
551 	obrace = strchr (input, '(');
552 	ebrace = strrchr (input, ')');
553 
554 	g_assert (obrace != NULL && ebrace != NULL);
555 
556 	res = rspamd_mempool_alloc0 (pool, sizeof (*res));
557 	res->name = rspamd_mempool_alloc (pool, obrace - input + 1);
558 	rspamd_strlcpy (res->name, input, obrace - input + 1);
559 	res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument));
560 
561 	p = obrace + 1;
562 	c = p;
563 	state = start_read_argument;
564 
565 	/* Read arguments */
566 	while (p <= ebrace) {
567 		t = *p;
568 		switch (state) {
569 		case start_read_argument:
570 			if (t == '/') {
571 				state = in_regexp;
572 				c = p;
573 			}
574 			else if (!g_ascii_isspace (t)) {
575 				state = in_string;
576 
577 				if (t == '\'' || t == '\"') {
578 					c = p + 1;
579 				}
580 				else {
581 					c = p;
582 				}
583 			}
584 			p ++;
585 			break;
586 		case in_regexp:
587 			if (t == '\\') {
588 				state = got_backslash;
589 				prev_state = in_regexp;
590 			}
591 			else if (t == ',' || p == ebrace) {
592 				len = p - c + 1;
593 				databuf = rspamd_mempool_alloc (pool, len);
594 				rspamd_strlcpy (databuf, c, len);
595 				arg.type = EXPRESSION_ARGUMENT_REGEXP;
596 				arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err);
597 
598 				if (arg.data == NULL) {
599 					/* Fallback to string */
600 					msg_warn ("cannot parse slashed argument %s as regexp: %s",
601 							databuf, err->message);
602 					g_error_free (err);
603 					arg.type = EXPRESSION_ARGUMENT_NORMAL;
604 					arg.data = databuf;
605 				}
606 
607 				g_array_append_val (res->args, arg);
608 				state = got_comma;
609 			}
610 			p ++;
611 			break;
612 		case in_string:
613 			if (t == '\\') {
614 				state = got_backslash;
615 				prev_state = in_string;
616 			}
617 			else if (t == ',' || p == ebrace) {
618 				if (*(p - 1) == '\'' || *(p - 1) == '\"') {
619 					len = p - c;
620 				}
621 				else {
622 					len = p - c + 1;
623 				}
624 
625 				databuf = rspamd_mempool_alloc (pool, len);
626 				rspamd_strlcpy (databuf, c, len);
627 				arg.type = EXPRESSION_ARGUMENT_NORMAL;
628 				arg.data = databuf;
629 				g_array_append_val (res->args, arg);
630 				state = got_comma;
631 			}
632 			p ++;
633 			break;
634 		case got_backslash:
635 			state = prev_state;
636 			p ++;
637 			break;
638 		case got_comma:
639 			state = start_read_argument;
640 			break;
641 		}
642 	}
643 
644 	return res;
645 }
646 
647 static rspamd_expression_atom_t *
rspamd_mime_expr_parse(const gchar * line,gsize len,rspamd_mempool_t * pool,gpointer ud,GError ** err)648 rspamd_mime_expr_parse (const gchar *line, gsize len,
649 		rspamd_mempool_t *pool, gpointer ud, GError **err)
650 {
651 	rspamd_expression_atom_t *a = NULL;
652 	struct rspamd_mime_atom *mime_atom = NULL;
653 	const gchar *p, *end, *c = NULL;
654 	struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *)ud;
655 	struct rspamd_config *cfg;
656 	rspamd_regexp_t *own_re;
657 	gchar t;
658 	gint type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
659 	enum {
660 		in_header = 0,
661 		got_slash,
662 		in_regexp,
663 		got_backslash,
664 		got_second_slash,
665 		in_flags,
666 		in_flags_brace,
667 		got_obrace,
668 		in_function,
669 		in_local_function,
670 		got_ebrace,
671 		end_atom,
672 		bad_atom
673 	} state = 0, prev_state = 0;
674 
675 	p = line;
676 	end = p + len;
677 	cfg = real_ud->cfg;
678 
679 	while (p < end) {
680 		t = *p;
681 
682 		switch (state) {
683 		case in_header:
684 			if (t == '/') {
685 				/* Regexp */
686 				state = got_slash;
687 			}
688 			else if (t == '(') {
689 				/* Function */
690 				state = got_obrace;
691 			}
692 			else if (!g_ascii_isalnum (t) && t != '_' && t != '-' && t != '=') {
693 				if (t == ':') {
694 					if (p - line == 3 && memcmp (line, "lua", 3) == 0) {
695 						type = MIME_ATOM_LOCAL_LUA_FUNCTION;
696 						state = in_local_function;
697 						c = p + 1;
698 					}
699 				}
700 				else {
701 					/* Likely lua function, identified by just a string */
702 					type = MIME_ATOM_LUA_FUNCTION;
703 					state = end_atom;
704 					/* Do not increase p */
705 					continue;
706 				}
707 			}
708 			else if (g_ascii_isspace (t)) {
709 				state = bad_atom;
710 			}
711 			p ++;
712 			break;
713 		case got_slash:
714 			state = in_regexp;
715 			break;
716 		case in_regexp:
717 			if (t == '\\') {
718 				state = got_backslash;
719 				prev_state = in_regexp;
720 			}
721 			else if (t == '/') {
722 				state = got_second_slash;
723 			}
724 			p ++;
725 			break;
726 		case got_second_slash:
727 			state = in_flags;
728 			break;
729 		case in_flags:
730 			if (t == '{') {
731 				state = in_flags_brace;
732 				p ++;
733 			}
734 			else if (!g_ascii_isalpha (t) && t != '$') {
735 				state = end_atom;
736 			}
737 			else {
738 				p ++;
739 			}
740 			break;
741 		case in_flags_brace:
742 			if (t == '}') {
743 				state = in_flags;
744 			}
745 			p ++;
746 			break;
747 		case got_backslash:
748 			state = prev_state;
749 			p ++;
750 			break;
751 		case got_obrace:
752 			state = in_function;
753 			type = MIME_ATOM_INTERNAL_FUNCTION;
754 			obraces ++;
755 			break;
756 		case in_function:
757 			if (t == '\\') {
758 				state = got_backslash;
759 				prev_state = in_function;
760 			}
761 			else if (t == '(') {
762 				obraces ++;
763 			}
764 			else if (t == ')') {
765 				ebraces ++;
766 				if (ebraces == obraces) {
767 					state = got_ebrace;
768 				}
769 			}
770 			p ++;
771 			break;
772 		case in_local_function:
773 			if (!(g_ascii_isalnum (t) || t == '-' || t == '_')) {
774 				g_assert (c != NULL);
775 				state = end_atom;
776 			}
777 			else {
778 				p++;
779 			}
780 			break;
781 		case got_ebrace:
782 			state = end_atom;
783 			break;
784 		case bad_atom:
785 			g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse"
786 					" mime atom '%s' when reading symbol '%c' at offset %d, "
787 					"near %*.s", line, t, (gint)(p - line),
788 					(gint)MIN (end - p, 10), p);
789 			return NULL;
790 		case end_atom:
791 			goto set;
792 		}
793 	}
794 set:
795 
796 	if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
797 			state != in_flags && state != end_atom)) {
798 		g_set_error (err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
799 				" mime atom");
800 		return NULL;
801 	}
802 
803 	mime_atom = rspamd_mempool_alloc (pool, sizeof (*mime_atom));
804 	mime_atom->type = type;
805 	mime_atom->str = rspamd_mempool_alloc (pool, p - line + 1);
806 	rspamd_strlcpy (mime_atom->str, line, p - line + 1);
807 
808 	if (type == MIME_ATOM_REGEXP) {
809 		mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool,
810 				mime_atom->str, cfg);
811 		if (mime_atom->d.re == NULL) {
812 			g_set_error (err, rspamd_mime_expr_quark(), 200,
813 					"cannot parse regexp '%s'",
814 					mime_atom->str);
815 			goto err;
816 		}
817 		else {
818 			gint lua_cbref = -1;
819 
820 			/* Check regexp condition */
821 			if (real_ud->conf_obj != NULL) {
822 				const ucl_object_t *re_conditions = ucl_object_lookup (real_ud->conf_obj,
823 						"re_conditions");
824 
825 				if (re_conditions != NULL) {
826 					if (ucl_object_type (re_conditions) != UCL_OBJECT) {
827 						g_set_error (err, rspamd_mime_expr_quark (), 320,
828 								"re_conditions is not a table for '%s'",
829 								mime_atom->str);
830 						goto err;
831 					}
832 
833 					const ucl_object_t *function_obj = ucl_object_lookup (re_conditions,
834 							mime_atom->str);
835 
836 					if (function_obj != NULL) {
837 						if (ucl_object_type (function_obj) != UCL_USERDATA) {
838 							g_set_error (err, rspamd_mime_expr_quark (), 320,
839 									"condition for '%s' is invalid, must be function",
840 									mime_atom->str);
841 							goto err;
842 						}
843 
844 						struct ucl_lua_funcdata *fd = function_obj->value.ud;
845 
846 						lua_cbref = fd->idx;
847 					}
848 				}
849 			}
850 
851 			if (lua_cbref != -1) {
852 				msg_info_config ("added condition for regexp %s", mime_atom->str);
853 			}
854 
855 			/* Register new item in the cache */
856 			if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
857 					mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
858 					mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
859 
860 				if (mime_atom->d.re->extra.header != NULL) {
861 					own_re = mime_atom->d.re->regexp;
862 					mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
863 							mime_atom->d.re->regexp,
864 							mime_atom->d.re->type,
865 							mime_atom->d.re->extra.header,
866 							strlen (mime_atom->d.re->extra.header) + 1,
867 							lua_cbref);
868 					/* Pass ownership to the cache */
869 					rspamd_regexp_unref (own_re);
870 				}
871 				else {
872 					/* We have header regexp, but no header name is detected */
873 					g_set_error (err,
874 							rspamd_mime_expr_quark (),
875 							200,
876 							"no header name in header regexp: '%s'",
877 							mime_atom->str);
878 					rspamd_regexp_unref (mime_atom->d.re->regexp);
879 					goto err;
880 				}
881 
882 			}
883 			else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
884 				if (mime_atom->d.re->extra.selector != NULL) {
885 					own_re = mime_atom->d.re->regexp;
886 					mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
887 							mime_atom->d.re->regexp,
888 							mime_atom->d.re->type,
889 							mime_atom->d.re->extra.selector,
890 							strlen (mime_atom->d.re->extra.selector) + 1,
891 							lua_cbref);
892 					/* Pass ownership to the cache */
893 					rspamd_regexp_unref (own_re);
894 				}
895 				else {
896 					/* We have selector regexp, but no selector name is detected */
897 					g_set_error (err,
898 							rspamd_mime_expr_quark (),
899 							200,
900 							"no selector name in selector regexp: '%s'",
901 							mime_atom->str);
902 					rspamd_regexp_unref (mime_atom->d.re->regexp);
903 					goto err;
904 				}
905 			}
906 			else {
907 				own_re = mime_atom->d.re->regexp;
908 				mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
909 						mime_atom->d.re->regexp,
910 						mime_atom->d.re->type,
911 						NULL,
912 						0,
913 						lua_cbref);
914 				/* Pass ownership to the cache */
915 				rspamd_regexp_unref (own_re);
916 			}
917 		}
918 	}
919 	else if (type == MIME_ATOM_LUA_FUNCTION) {
920 		mime_atom->d.lua_function = mime_atom->str;
921 
922 		lua_getglobal (cfg->lua_state, mime_atom->str);
923 
924 		if (lua_type (cfg->lua_state, -1) != LUA_TFUNCTION) {
925 			g_set_error (err, rspamd_mime_expr_quark(), 200,
926 					"no such lua function '%s'",
927 					mime_atom->str);
928 			lua_pop (cfg->lua_state, 1);
929 
930 			goto err;
931 		}
932 
933 		lua_pop (cfg->lua_state, 1);
934 	}
935 	else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
936 		/* p pointer is set to the start of Lua function name */
937 
938 		if (real_ud->conf_obj == NULL) {
939 			g_set_error (err, rspamd_mime_expr_quark(), 300,
940 					"no config object for '%s'",
941 					mime_atom->str);
942 			goto err;
943 		}
944 
945 		const ucl_object_t *functions = ucl_object_lookup (real_ud->conf_obj,
946 				"functions");
947 
948 		if (functions == NULL) {
949 			g_set_error (err, rspamd_mime_expr_quark(), 310,
950 					"no functions defined for '%s'",
951 					mime_atom->str);
952 			goto err;
953 		}
954 
955 		if (ucl_object_type (functions) != UCL_OBJECT) {
956 			g_set_error (err, rspamd_mime_expr_quark(), 320,
957 					"functions is not a table for '%s'",
958 					mime_atom->str);
959 			goto err;
960 		}
961 
962 		const ucl_object_t *function_obj;
963 
964 		function_obj = ucl_object_lookup_len (functions, c,
965 				p - c);
966 
967 		if (function_obj == NULL) {
968 			g_set_error (err, rspamd_mime_expr_quark(), 320,
969 					"function %*.s is not found for '%s'",
970 					(int)(p - c), c, mime_atom->str);
971 			goto err;
972 		}
973 
974 		if (ucl_object_type (function_obj) != UCL_USERDATA) {
975 			g_set_error (err, rspamd_mime_expr_quark(), 320,
976 					"function %*.s has invalid type for '%s'",
977 					(int)(p - c), c, mime_atom->str);
978 			goto err;
979 		}
980 
981 		struct ucl_lua_funcdata *fd = function_obj->value.ud;
982 
983 		mime_atom->d.lua_cbref = fd->idx;
984 	}
985 	else {
986 		mime_atom->d.func = rspamd_mime_expr_parse_function_atom (pool,
987 				mime_atom->str);
988 		if (mime_atom->d.func == NULL) {
989 			g_set_error (err, rspamd_mime_expr_quark(), 200,
990 					"cannot parse function '%s'",
991 					mime_atom->str);
992 			goto err;
993 		}
994 	}
995 
996 	a = rspamd_mempool_alloc0 (pool, sizeof (*a));
997 	a->len = p - line;
998 	a->priority = 0;
999 	a->data = mime_atom;
1000 
1001 	return a;
1002 
1003 err:
1004 
1005 	return NULL;
1006 }
1007 
1008 static gint
rspamd_mime_expr_process_regexp(struct rspamd_regexp_atom * re,struct rspamd_task * task)1009 rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
1010 		struct rspamd_task *task)
1011 {
1012 	gint ret;
1013 
1014 	if (re == NULL) {
1015 		msg_info_task ("invalid regexp passed");
1016 		return 0;
1017 	}
1018 
1019 	if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
1020 		ret = rspamd_re_cache_process (task,
1021 				re->regexp,
1022 				re->type,
1023 				re->extra.header,
1024 				strlen (re->extra.header),
1025 				re->is_strong);
1026 	}
1027 	else if (re->type == RSPAMD_RE_SELECTOR) {
1028 		ret = rspamd_re_cache_process (task,
1029 				re->regexp,
1030 				re->type,
1031 				re->extra.selector,
1032 				strlen (re->extra.selector),
1033 				re->is_strong);
1034 	}
1035 	else {
1036 		ret = rspamd_re_cache_process (task,
1037 				re->regexp,
1038 				re->type,
1039 				NULL,
1040 				0,
1041 				re->is_strong);
1042 	}
1043 
1044 	if (re->is_test) {
1045 		msg_info_task ("test %s regexp '%s' returned %d",
1046 				rspamd_re_cache_type_to_string (re->type),
1047 				re->regexp_text, ret);
1048 	}
1049 
1050 	return ret;
1051 }
1052 
1053 
1054 static gint
rspamd_mime_expr_priority(rspamd_expression_atom_t * atom)1055 rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
1056 {
1057 	struct rspamd_mime_atom *mime_atom = atom->data;
1058 	gint ret = 0;
1059 
1060 	switch (mime_atom->type) {
1061 	case MIME_ATOM_INTERNAL_FUNCTION:
1062 		/* Prioritize internal functions slightly */
1063 		ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
1064 		break;
1065 	case MIME_ATOM_LUA_FUNCTION:
1066 	case MIME_ATOM_LOCAL_LUA_FUNCTION:
1067 		ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 4;
1068 		break;
1069 	case MIME_ATOM_REGEXP:
1070 		switch (mime_atom->d.re->type) {
1071 		case RSPAMD_RE_HEADER:
1072 		case RSPAMD_RE_RAWHEADER:
1073 			ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 16;
1074 			break;
1075 		case RSPAMD_RE_URL:
1076 		case RSPAMD_RE_EMAIL:
1077 			ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
1078 			break;
1079 		case RSPAMD_RE_SELECTOR:
1080 			ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
1081 			break;
1082 		case RSPAMD_RE_MIME:
1083 		case RSPAMD_RE_RAWMIME:
1084 			ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 2;
1085 			break;
1086 		case RSPAMD_RE_WORDS:
1087 		case RSPAMD_RE_RAWWORDS:
1088 		case RSPAMD_RE_STEMWORDS:
1089 		default:
1090 			/* For expensive regexps */
1091 			ret = 0;
1092 			break;
1093 		}
1094 	}
1095 
1096 	return ret;
1097 }
1098 
1099 static void
rspamd_mime_expr_destroy(rspamd_expression_atom_t * atom)1100 rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom)
1101 {
1102 	struct rspamd_mime_atom *mime_atom = atom->data;
1103 
1104 	if (mime_atom) {
1105 		if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
1106 			/* Need to cleanup arguments */
1107 			g_array_free (mime_atom->d.func->args, TRUE);
1108 		}
1109 	}
1110 }
1111 
1112 static gboolean
rspamd_mime_expr_process_function(struct rspamd_function_atom * func,struct rspamd_task * task,lua_State * L)1113 rspamd_mime_expr_process_function (struct rspamd_function_atom * func,
1114 	struct rspamd_task * task,
1115 	lua_State *L)
1116 {
1117 	struct _fl *selected, key;
1118 
1119 	key.name = func->name;
1120 
1121 	selected = bsearch (&key,
1122 			list_ptr,
1123 			functions_number,
1124 			sizeof (struct _fl),
1125 			fl_cmp);
1126 	if (selected == NULL) {
1127 		/* Try to check lua function */
1128 		return FALSE;
1129 	}
1130 
1131 	return selected->func (task, func->args, selected->user_data);
1132 }
1133 
1134 static gdouble
rspamd_mime_expr_process(void * ud,rspamd_expression_atom_t * atom)1135 rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom)
1136 {
1137 	struct rspamd_task *task = (struct rspamd_task *)ud;
1138 	struct rspamd_mime_atom *mime_atom;
1139 	lua_State *L;
1140 	gdouble ret = 0;
1141 
1142 	g_assert (task != NULL);
1143 	g_assert (atom != NULL);
1144 
1145 	mime_atom = atom->data;
1146 
1147 	if (mime_atom->type == MIME_ATOM_REGEXP) {
1148 		ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task);
1149 	}
1150 	else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
1151 		L = task->cfg->lua_state;
1152 		lua_getglobal (L, mime_atom->d.lua_function);
1153 		rspamd_lua_task_push (L, task);
1154 
1155 		if (lua_pcall (L, 1, 1, 0) != 0) {
1156 			msg_info_task ("lua call to global function '%s' for atom '%s' failed: %s",
1157 				mime_atom->d.lua_function,
1158 				mime_atom->str,
1159 				lua_tostring (L, -1));
1160 			lua_pop (L, 1);
1161 		}
1162 		else {
1163 			if (lua_type (L, -1) == LUA_TBOOLEAN) {
1164 				ret = lua_toboolean (L, -1);
1165 			}
1166 			else if (lua_type (L, -1) == LUA_TNUMBER) {
1167 				ret = lua_tonumber (L, 1);
1168 			}
1169 			else {
1170 				msg_err_task ("%s returned wrong return type: %s",
1171 						mime_atom->str, lua_typename (L, lua_type (L, -1)));
1172 			}
1173 			/* Remove result */
1174 			lua_pop (L, 1);
1175 		}
1176 	}
1177 	else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
1178 		gint err_idx;
1179 
1180 		L = task->cfg->lua_state;
1181 		lua_pushcfunction (L, &rspamd_lua_traceback);
1182 		err_idx = lua_gettop (L);
1183 
1184 		lua_rawgeti (L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref);
1185 		rspamd_lua_task_push (L, task);
1186 
1187 		if (lua_pcall (L, 1, 1, err_idx) != 0) {
1188 			msg_info_task ("lua call to local function for atom '%s' failed: %s",
1189 					mime_atom->str,
1190 					lua_tostring (L, -1));
1191 		}
1192 		else {
1193 			if (lua_type (L, -1) == LUA_TBOOLEAN) {
1194 				ret = lua_toboolean (L, -1);
1195 			}
1196 			else if (lua_type (L, -1) == LUA_TNUMBER) {
1197 				ret = lua_tonumber (L, 1);
1198 			}
1199 			else {
1200 				msg_err_task ("%s returned wrong return type: %s",
1201 						mime_atom->str, lua_typename (L, lua_type (L, -1)));
1202 			}
1203 		}
1204 
1205 		lua_settop (L, 0);
1206 	}
1207 	else {
1208 		ret = rspamd_mime_expr_process_function (mime_atom->d.func, task,
1209 				task->cfg->lua_state);
1210 	}
1211 
1212 	return ret;
1213 }
1214 
1215 void
register_expression_function(const gchar * name,rspamd_internal_func_t func,void * user_data)1216 register_expression_function (const gchar *name,
1217 	rspamd_internal_func_t func,
1218 	void *user_data)
1219 {
1220 	static struct _fl *new;
1221 
1222 	functions_number++;
1223 
1224 	new = g_new (struct _fl, functions_number);
1225 	memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
1226 	if (list_allocated) {
1227 		g_free (list_ptr);
1228 	}
1229 
1230 	list_allocated = TRUE;
1231 	new[functions_number - 1].name = name;
1232 	new[functions_number - 1].func = func;
1233 	new[functions_number - 1].user_data = user_data;
1234 	qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
1235 	list_ptr = new;
1236 }
1237 
1238 gboolean
rspamd_compare_encoding(struct rspamd_task * task,GArray * args,void * unused)1239 rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused)
1240 {
1241 	struct expression_argument *arg;
1242 
1243 	if (args == NULL || task == NULL) {
1244 		return FALSE;
1245 	}
1246 
1247 	arg = &g_array_index (args, struct expression_argument, 0);
1248 	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1249 		msg_warn_task ("invalid argument to function is passed");
1250 		return FALSE;
1251 	}
1252 
1253 	/* XXX: really write this function */
1254 	return TRUE;
1255 }
1256 
1257 gboolean
rspamd_header_exists(struct rspamd_task * task,GArray * args,void * unused)1258 rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
1259 {
1260 	struct expression_argument *arg;
1261 	struct rspamd_mime_header *rh;
1262 
1263 	if (args == NULL || task == NULL) {
1264 		return FALSE;
1265 	}
1266 
1267 	arg = &g_array_index (args, struct expression_argument, 0);
1268 	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1269 		msg_warn_task ("invalid argument to function is passed");
1270 		return FALSE;
1271 	}
1272 
1273 	rh = rspamd_message_get_header_array(task,
1274 			(gchar *) arg->data, FALSE);
1275 
1276 	debug_task ("try to get header %s: %d", (gchar *)arg->data,
1277 			(rh != NULL));
1278 
1279 	if (rh) {
1280 		return TRUE;
1281 	}
1282 
1283 	return FALSE;
1284 }
1285 
1286 
1287 /*
1288  * This function is designed to find difference between text/html and text/plain parts
1289  * It takes one argument: difference threshold, if we have two text parts, compare
1290  * its hashes and check for threshold, if value is greater than threshold, return TRUE
1291  * and return FALSE otherwise.
1292  */
1293 gboolean
rspamd_parts_distance(struct rspamd_task * task,GArray * args,void * unused)1294 rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
1295 {
1296 	gint threshold, threshold2 = -1;
1297 	struct expression_argument *arg;
1298 	gdouble *pdiff, diff;
1299 
1300 	if (args == NULL || args->len == 0) {
1301 		debug_task ("no threshold is specified, assume it 100");
1302 		threshold = 100;
1303 	}
1304 	else {
1305 		errno = 0;
1306 		arg = &g_array_index (args, struct expression_argument, 0);
1307 		if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1308 			msg_warn_task ("invalid argument to function is passed");
1309 			return FALSE;
1310 		}
1311 
1312 		threshold = strtoul ((gchar *)arg->data, NULL, 10);
1313 		if (errno != 0) {
1314 			msg_info_task ("bad numeric value for threshold \"%s\", assume it 100",
1315 				(gchar *)arg->data);
1316 			threshold = 100;
1317 		}
1318 		if (args->len >= 2) {
1319 			arg = &g_array_index (args, struct expression_argument, 1);
1320 			if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1321 				msg_warn_task ("invalid argument to function is passed");
1322 				return FALSE;
1323 			}
1324 
1325 			errno = 0;
1326 			threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
1327 			if (errno != 0) {
1328 				msg_info_task ("bad numeric value for threshold \"%s\", ignore it",
1329 					(gchar *)arg->data);
1330 				threshold2 = -1;
1331 			}
1332 		}
1333 	}
1334 
1335 	if ((pdiff =
1336 		rspamd_mempool_get_variable (task->task_pool,
1337 		"parts_distance")) != NULL) {
1338 		diff = (1.0 - (*pdiff)) * 100.0;
1339 
1340 		if (diff != -1) {
1341 			if (threshold2 > 0) {
1342 				if (diff >= MIN (threshold, threshold2) &&
1343 					diff < MAX (threshold, threshold2)) {
1344 
1345 					return TRUE;
1346 				}
1347 			}
1348 			else {
1349 				if (diff <= threshold) {
1350 					return TRUE;
1351 				}
1352 			}
1353 			return FALSE;
1354 		}
1355 		else {
1356 			return FALSE;
1357 		}
1358 	}
1359 
1360 	return FALSE;
1361 }
1362 
1363 struct addr_list {
1364 	const gchar *name;
1365 	guint namelen;
1366 	const gchar *addr;
1367 	guint addrlen;
1368 };
1369 
1370 static gint
addr_list_cmp_func(const void * a,const void * b)1371 addr_list_cmp_func (const void *a, const void *b)
1372 {
1373 	const struct addr_list *addra = (struct addr_list *)a,
1374 			*addrb = (struct addr_list *)b;
1375 
1376 	if (addra->addrlen != addrb->addrlen) {
1377 		return addra->addrlen - addrb->addrlen;
1378 	}
1379 
1380 	return memcmp (addra->addr, addrb->addr, addra->addrlen);
1381 }
1382 
1383 #define COMPARE_RCPT_LEN 3
1384 #define MIN_RCPT_TO_COMPARE 7
1385 
1386 gboolean
rspamd_recipients_distance(struct rspamd_task * task,GArray * args,void * unused)1387 rspamd_recipients_distance (struct rspamd_task *task, GArray * args,
1388 	void *unused)
1389 {
1390 	struct expression_argument *arg;
1391 	struct rspamd_email_address *cur;
1392 	double threshold;
1393 	struct addr_list *ar;
1394 	gint num, i, hits = 0;
1395 
1396 	if (args == NULL) {
1397 		msg_warn_task ("no parameters to function");
1398 		return FALSE;
1399 	}
1400 
1401 	arg = &g_array_index (args, struct expression_argument, 0);
1402 	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1403 		msg_warn_task ("invalid argument to function is passed");
1404 		return FALSE;
1405 	}
1406 
1407 	errno = 0;
1408 	threshold = strtod ((gchar *)arg->data, NULL);
1409 
1410 	if (errno != 0) {
1411 		msg_warn_task ("invalid numeric value '%s': %s",
1412 			(gchar *)arg->data,
1413 			strerror (errno));
1414 		return FALSE;
1415 	}
1416 
1417 	if (!MESSAGE_FIELD (task, rcpt_mime)) {
1418 		return FALSE;
1419 	}
1420 
1421 	num = MESSAGE_FIELD (task, rcpt_mime)->len;
1422 
1423 	if (num < MIN_RCPT_TO_COMPARE) {
1424 		return FALSE;
1425 	}
1426 
1427 	ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list));
1428 
1429 	/* Fill array */
1430 	num = 0;
1431 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, rcpt_mime), i, cur) {
1432 		if (cur->addr_len > COMPARE_RCPT_LEN) {
1433 			ar[num].name = cur->addr;
1434 			ar[num].namelen = cur->addr_len;
1435 			ar[num].addr = cur->domain;
1436 			ar[num].addrlen = cur->domain_len;
1437 			num ++;
1438 		}
1439 	}
1440 
1441 	qsort (ar, num, sizeof (*ar), addr_list_cmp_func);
1442 
1443 	/* Cycle all elements in array */
1444 	for (i = 0; i < num; i++) {
1445 		if (i < num - 1) {
1446 			if (ar[i].namelen == ar[i + 1].namelen) {
1447 				if (rspamd_lc_cmp (ar[i].name, ar[i + 1].name, COMPARE_RCPT_LEN) == 0) {
1448 					hits++;
1449 				}
1450 			}
1451 		}
1452 	}
1453 
1454 	if ((hits * num / 2.) / (double)num >= threshold) {
1455 		return TRUE;
1456 	}
1457 
1458 	return FALSE;
1459 }
1460 
1461 gboolean
rspamd_has_only_html_part(struct rspamd_task * task,GArray * args,void * unused)1462 rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
1463 	void *unused)
1464 {
1465 	struct rspamd_mime_text_part *p;
1466 	guint i, cnt_html = 0, cnt_txt = 0;
1467 
1468 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
1469 		p = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0);
1470 
1471 		if (!IS_TEXT_PART_ATTACHMENT (p)) {
1472 			if (IS_TEXT_PART_HTML (p)) {
1473 				cnt_html++;
1474 			}
1475 			else {
1476 				cnt_txt++;
1477 			}
1478 		}
1479 	}
1480 
1481 	return (cnt_html > 0 && cnt_txt == 0);
1482 }
1483 
1484 static gboolean
is_recipient_list_sorted(GPtrArray * ar)1485 is_recipient_list_sorted (GPtrArray *ar)
1486 {
1487 	struct rspamd_email_address *addr;
1488 	gboolean res = TRUE;
1489 	rspamd_ftok_t cur, prev;
1490 	gint i;
1491 
1492 	/* Do not check to short address lists */
1493 	if (ar == NULL || ar->len < MIN_RCPT_TO_COMPARE) {
1494 		return FALSE;
1495 	}
1496 
1497 	prev.len = 0;
1498 	prev.begin = NULL;
1499 
1500 	PTR_ARRAY_FOREACH (ar, i, addr) {
1501 		cur.begin = addr->addr;
1502 		cur.len = addr->addr_len;
1503 
1504 		if (prev.len != 0) {
1505 			if (rspamd_ftok_casecmp (&cur, &prev) <= 0) {
1506 				res = FALSE;
1507 				break;
1508 			}
1509 		}
1510 
1511 		prev = cur;
1512 	}
1513 
1514 	return res;
1515 }
1516 
1517 gboolean
rspamd_is_recipients_sorted(struct rspamd_task * task,GArray * args,void * unused)1518 rspamd_is_recipients_sorted (struct rspamd_task * task,
1519 	GArray * args,
1520 	void *unused)
1521 {
1522 	/* Check all types of addresses */
1523 
1524 	if (MESSAGE_FIELD (task, rcpt_mime)) {
1525 		return is_recipient_list_sorted (MESSAGE_FIELD (task, rcpt_mime));
1526 	}
1527 
1528 	return FALSE;
1529 }
1530 
1531 gboolean
rspamd_compare_transfer_encoding(struct rspamd_task * task,GArray * args,void * unused)1532 rspamd_compare_transfer_encoding (struct rspamd_task * task,
1533 	GArray * args,
1534 	void *unused)
1535 {
1536 	struct expression_argument *arg;
1537 	guint i;
1538 	struct rspamd_mime_part *part;
1539 	enum rspamd_cte cte;
1540 
1541 	if (args == NULL) {
1542 		msg_warn_task ("no parameters to function");
1543 		return FALSE;
1544 	}
1545 
1546 	arg = &g_array_index (args, struct expression_argument, 0);
1547 	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1548 		msg_warn_task ("invalid argument to function is passed");
1549 		return FALSE;
1550 	}
1551 
1552 	cte = rspamd_cte_from_string (arg->data);
1553 
1554 	if (cte == RSPAMD_CTE_UNKNOWN) {
1555 		msg_warn_task ("unknown cte: %s", arg->data);
1556 		return FALSE;
1557 	}
1558 
1559 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
1560 		if (IS_PART_TEXT (part)) {
1561 			if (part->cte == cte) {
1562 				return TRUE;
1563 			}
1564 		}
1565 	}
1566 
1567 	return FALSE;
1568 }
1569 
1570 gboolean
rspamd_is_html_balanced(struct rspamd_task * task,GArray * args,void * unused)1571 rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
1572 {
1573 	/* Totally broken but seems to be never used */
1574 	return TRUE;
1575 }
1576 
1577 gboolean
rspamd_has_html_tag(struct rspamd_task * task,GArray * args,void * unused)1578 rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
1579 {
1580 	struct rspamd_mime_text_part *p;
1581 	struct expression_argument *arg;
1582 	guint i;
1583 	gboolean res = FALSE;
1584 
1585 	if (args == NULL) {
1586 		msg_warn_task ("no parameters to function");
1587 		return FALSE;
1588 	}
1589 
1590 	arg = &g_array_index (args, struct expression_argument, 0);
1591 	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1592 		msg_warn_task ("invalid argument to function is passed");
1593 		return FALSE;
1594 	}
1595 
1596 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
1597 		if (IS_TEXT_PART_HTML (p) && p->html) {
1598 			res = rspamd_html_tag_seen (p->html, arg->data);
1599 		}
1600 
1601 		if (res) {
1602 			break;
1603 		}
1604 	}
1605 
1606 	return res;
1607 
1608 }
1609 
1610 gboolean
rspamd_has_fake_html(struct rspamd_task * task,GArray * args,void * unused)1611 rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
1612 {
1613 	struct rspamd_mime_text_part *p;
1614 	guint i;
1615 	gboolean res = FALSE;
1616 
1617 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
1618 		if (IS_TEXT_PART_HTML (p) && (rspamd_html_get_tags_count(p->html) < 2)) {
1619 			res = TRUE;
1620 		}
1621 
1622 		if (res) {
1623 			break;
1624 		}
1625 	}
1626 
1627 	return res;
1628 
1629 }
1630 
1631 static gboolean
rspamd_raw_header_exists(struct rspamd_task * task,GArray * args,void * unused)1632 rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused)
1633 {
1634 	struct expression_argument *arg;
1635 
1636 	if (args == NULL || task == NULL) {
1637 		return FALSE;
1638 	}
1639 
1640 	arg = &g_array_index (args, struct expression_argument, 0);
1641 	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1642 		msg_warn_task ("invalid argument to function is passed");
1643 		return FALSE;
1644 	}
1645 
1646 	return rspamd_message_get_header_array(task, arg->data, FALSE) != NULL;
1647 }
1648 
1649 static gboolean
match_smtp_data(struct rspamd_task * task,struct expression_argument * arg,const gchar * what,gsize len)1650 match_smtp_data (struct rspamd_task *task,
1651 	struct expression_argument *arg,
1652 	const gchar *what, gsize len)
1653 {
1654 	rspamd_regexp_t *re;
1655 	gint r = 0;
1656 
1657 	if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
1658 		/* This is a regexp */
1659 		re = arg->data;
1660 		if (re == NULL) {
1661 			msg_warn_task ("cannot compile regexp for function");
1662 			return FALSE;
1663 		}
1664 
1665 
1666 		if (len > 0) {
1667 			r = rspamd_regexp_search (re, what, len, NULL, NULL, FALSE, NULL);
1668 		}
1669 
1670 		return r;
1671 	}
1672 	else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
1673 			g_ascii_strncasecmp (arg->data, what, len) == 0) {
1674 		return TRUE;
1675 	}
1676 
1677 	return FALSE;
1678 }
1679 
1680 static gboolean
rspamd_check_smtp_data(struct rspamd_task * task,GArray * args,void * unused)1681 rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused)
1682 {
1683 	struct expression_argument *arg;
1684 	struct rspamd_email_address *addr = NULL;
1685 	GPtrArray *rcpts = NULL;
1686 	const gchar *type, *str = NULL;
1687 	guint i;
1688 
1689 	if (args == NULL) {
1690 		msg_warn_task ("no parameters to function");
1691 		return FALSE;
1692 	}
1693 
1694 	arg = &g_array_index (args, struct expression_argument, 0);
1695 
1696 	if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1697 		msg_warn_task ("no parameters to function");
1698 		return FALSE;
1699 	}
1700 	else {
1701 		type = arg->data;
1702 		switch (*type) {
1703 		case 'f':
1704 		case 'F':
1705 			if (g_ascii_strcasecmp (type, "from") == 0) {
1706 				addr = rspamd_task_get_sender (task);
1707 			}
1708 			else {
1709 				msg_warn_task ("bad argument to function: %s", type);
1710 				return FALSE;
1711 			}
1712 			break;
1713 		case 'h':
1714 		case 'H':
1715 			if (g_ascii_strcasecmp (type, "helo") == 0) {
1716 				str = task->helo;
1717 			}
1718 			else {
1719 				msg_warn_task ("bad argument to function: %s", type);
1720 				return FALSE;
1721 			}
1722 			break;
1723 		case 'u':
1724 		case 'U':
1725 			if (g_ascii_strcasecmp (type, "user") == 0) {
1726 				str = task->user;
1727 			}
1728 			else {
1729 				msg_warn_task ("bad argument to function: %s", type);
1730 				return FALSE;
1731 			}
1732 			break;
1733 		case 's':
1734 		case 'S':
1735 			if (g_ascii_strcasecmp (type, "subject") == 0) {
1736 				str = MESSAGE_FIELD (task, subject);
1737 			}
1738 			else {
1739 				msg_warn_task ("bad argument to function: %s", type);
1740 				return FALSE;
1741 			}
1742 			break;
1743 		case 'r':
1744 		case 'R':
1745 			if (g_ascii_strcasecmp (type, "rcpt") == 0) {
1746 				rcpts = task->rcpt_envelope;
1747 			}
1748 			else {
1749 				msg_warn_task ("bad argument to function: %s", type);
1750 				return FALSE;
1751 			}
1752 			break;
1753 		default:
1754 			msg_warn_task ("bad argument to function: %s", type);
1755 			return FALSE;
1756 		}
1757 	}
1758 
1759 	if (str == NULL && addr == NULL && rcpts == NULL) {
1760 		/* Not enough data so regexp would NOT be found anyway */
1761 		return FALSE;
1762 	}
1763 
1764 	/* We would process only one more argument, others are ignored */
1765 	if (args->len >= 2) {
1766 		arg = &g_array_index (args, struct expression_argument, 1);
1767 
1768 		if (arg) {
1769 			if (str != NULL) {
1770 				return match_smtp_data (task, arg, str, strlen (str));
1771 			}
1772 			else if (addr != NULL && addr->addr) {
1773 				return match_smtp_data (task, arg, addr->addr, addr->addr_len);
1774 			}
1775 			else {
1776 				if (rcpts != NULL) {
1777 					for (i = 0; i < rcpts->len; i ++) {
1778 						addr = g_ptr_array_index (rcpts, i);
1779 
1780 						if (addr && addr->addr &&
1781 							match_smtp_data (task, arg,
1782 								addr->addr, addr->addr_len)) {
1783 							return TRUE;
1784 						}
1785 					}
1786 				}
1787 			}
1788 		}
1789 	}
1790 
1791 	return FALSE;
1792 }
1793 
1794 static inline gboolean
rspamd_check_ct_attr(const gchar * begin,gsize len,struct expression_argument * arg_pattern)1795 rspamd_check_ct_attr (const gchar *begin, gsize len,
1796 		struct expression_argument *arg_pattern)
1797 {
1798 	rspamd_regexp_t *re;
1799 	gboolean r = FALSE;
1800 
1801 	if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
1802 		re = arg_pattern->data;
1803 
1804 		if (len > 0) {
1805 			r = rspamd_regexp_search (re,
1806 					begin, len,
1807 					NULL, NULL, FALSE, NULL);
1808 		}
1809 
1810 		if (r) {
1811 			return TRUE;
1812 		}
1813 	}
1814 	else {
1815 		/* Just do strcasecmp */
1816 		gsize plen = strlen (arg_pattern->data);
1817 
1818 		if (plen == len &&
1819 			g_ascii_strncasecmp (arg_pattern->data, begin, len) == 0) {
1820 			return TRUE;
1821 		}
1822 	}
1823 
1824 	return FALSE;
1825 }
1826 
1827 static gboolean
rspamd_content_type_compare_param(struct rspamd_task * task,GArray * args,void * unused)1828 rspamd_content_type_compare_param (struct rspamd_task * task,
1829 	GArray * args,
1830 	void *unused)
1831 {
1832 
1833 	struct expression_argument *arg, *arg1, *arg_pattern;
1834 	gboolean recursive = FALSE;
1835 	struct rspamd_mime_part *cur_part;
1836 	guint i;
1837 	rspamd_ftok_t srch;
1838 	struct rspamd_content_type_param *found = NULL, *cur;
1839 	const gchar *param_name;
1840 
1841 	if (args == NULL || args->len < 2) {
1842 		msg_warn_task ("no parameters to function");
1843 		return FALSE;
1844 	}
1845 
1846 	arg = &g_array_index (args, struct expression_argument, 0);
1847 	g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
1848 	param_name = arg->data;
1849 	arg_pattern = &g_array_index (args, struct expression_argument, 1);
1850 
1851 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
1852 		if (args->len >= 3) {
1853 			arg1 = &g_array_index (args, struct expression_argument, 2);
1854 			if (g_ascii_strncasecmp (arg1->data, "true",
1855 					sizeof ("true") - 1) == 0) {
1856 				recursive = TRUE;
1857 			}
1858 		}
1859 		else {
1860 			/*
1861 			 * If user did not specify argument, let's assume that he wants
1862 			 * recursive search if mime part is multipart/mixed
1863 			 */
1864 			if (IS_PART_MULTIPART (cur_part)) {
1865 				recursive = TRUE;
1866 			}
1867 		}
1868 
1869 		rspamd_ftok_t lit;
1870 		RSPAMD_FTOK_FROM_STR (&srch, param_name);
1871 		RSPAMD_FTOK_FROM_STR (&lit, "charset");
1872 
1873 		if (rspamd_ftok_equal (&srch, &lit)) {
1874 			if (rspamd_check_ct_attr (cur_part->ct->charset.begin,
1875 					cur_part->ct->charset.len, arg_pattern)) {
1876 				return TRUE;
1877 			}
1878 		}
1879 
1880 		RSPAMD_FTOK_FROM_STR (&lit, "boundary");
1881 		if (rspamd_ftok_equal (&srch, &lit)) {
1882 			if (rspamd_check_ct_attr (cur_part->ct->orig_boundary.begin,
1883 					cur_part->ct->orig_boundary.len, arg_pattern)) {
1884 				return TRUE;
1885 			}
1886 		}
1887 
1888 		if (cur_part->ct->attrs) {
1889 			found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
1890 
1891 			if (found) {
1892 				DL_FOREACH (found, cur) {
1893 					if (rspamd_check_ct_attr (cur->value.begin,
1894 							cur->value.len, arg_pattern)) {
1895 						return TRUE;
1896 					}
1897 				}
1898 			}
1899 		}
1900 
1901 		if (!recursive) {
1902 			break;
1903 		}
1904 	}
1905 
1906 	return FALSE;
1907 }
1908 
1909 static gboolean
rspamd_content_type_has_param(struct rspamd_task * task,GArray * args,void * unused)1910 rspamd_content_type_has_param (struct rspamd_task * task,
1911 	GArray * args,
1912 	void *unused)
1913 {
1914 	struct expression_argument *arg, *arg1;
1915 	gboolean recursive = FALSE;
1916 	struct rspamd_mime_part *cur_part;
1917 	guint i;
1918 	rspamd_ftok_t srch;
1919 	struct rspamd_content_type_param *found = NULL;
1920 	const gchar *param_name;
1921 
1922 	if (args == NULL || args->len < 1) {
1923 		msg_warn_task ("no parameters to function");
1924 		return FALSE;
1925 	}
1926 
1927 	arg = &g_array_index (args, struct expression_argument, 0);
1928 	g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
1929 	param_name = arg->data;
1930 
1931 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
1932 		if (args->len >= 2) {
1933 			arg1 = &g_array_index (args, struct expression_argument, 1);
1934 			if (g_ascii_strncasecmp (arg1->data, "true",
1935 					sizeof ("true") - 1) == 0) {
1936 				recursive = TRUE;
1937 			}
1938 		}
1939 		else {
1940 			/*
1941 			 * If user did not specify argument, let's assume that he wants
1942 			 * recursive search if mime part is multipart/mixed
1943 			 */
1944 			if (IS_PART_MULTIPART (cur_part)) {
1945 				recursive = TRUE;
1946 			}
1947 		}
1948 
1949 
1950 		rspamd_ftok_t lit;
1951 		RSPAMD_FTOK_FROM_STR (&srch, param_name);
1952 		RSPAMD_FTOK_FROM_STR (&lit, "charset");
1953 
1954 		if (rspamd_ftok_equal (&srch, &lit)) {
1955 			if (cur_part->ct->charset.len > 0) {
1956 				return TRUE;
1957 			}
1958 		}
1959 
1960 		RSPAMD_FTOK_FROM_STR (&lit, "boundary");
1961 		if (rspamd_ftok_equal (&srch, &lit)) {
1962 			if (cur_part->ct->boundary.len > 0) {
1963 				return TRUE;
1964 			}
1965 		}
1966 
1967 		if (cur_part->ct->attrs) {
1968 			found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
1969 
1970 			if (found) {
1971 				return TRUE;
1972 			}
1973 		}
1974 
1975 		if (!recursive) {
1976 			break;
1977 		}
1978 	}
1979 
1980 	return FALSE;
1981 }
1982 
1983 static gboolean
rspamd_content_type_check(struct rspamd_task * task,GArray * args,gboolean check_subtype)1984 rspamd_content_type_check (struct rspamd_task *task,
1985 	GArray * args,
1986 	gboolean check_subtype)
1987 {
1988 	rspamd_ftok_t *param_data, srch;
1989 	rspamd_regexp_t *re;
1990 	struct expression_argument *arg1, *arg_pattern;
1991 	struct rspamd_content_type *ct;
1992 	gint r = 0;
1993 	guint i;
1994 	gboolean recursive = FALSE;
1995 	struct rspamd_mime_part *cur_part;
1996 
1997 	if (args == NULL || args->len < 1) {
1998 		msg_warn_task ("no parameters to function");
1999 		return FALSE;
2000 	}
2001 
2002 	arg_pattern = &g_array_index (args, struct expression_argument, 0);
2003 
2004 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
2005 		ct = cur_part->ct;
2006 
2007 		if (args->len >= 2) {
2008 			arg1 = &g_array_index (args, struct expression_argument, 1);
2009 			if (g_ascii_strncasecmp (arg1->data, "true",
2010 					sizeof ("true") - 1) == 0) {
2011 				recursive = TRUE;
2012 			}
2013 		}
2014 		else {
2015 			/*
2016 			 * If user did not specify argument, let's assume that he wants
2017 			 * recursive search if mime part is multipart/mixed
2018 			 */
2019 			if (IS_PART_MULTIPART (cur_part)) {
2020 				recursive = TRUE;
2021 			}
2022 		}
2023 
2024 		if (check_subtype) {
2025 			param_data = &ct->subtype;
2026 		}
2027 		else {
2028 			param_data = &ct->type;
2029 		}
2030 
2031 		if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
2032 			re = arg_pattern->data;
2033 
2034 			if (param_data->len > 0) {
2035 				r = rspamd_regexp_search (re, param_data->begin, param_data->len,
2036 						NULL, NULL, FALSE, NULL);
2037 			}
2038 
2039 			if (r) {
2040 				return TRUE;
2041 			}
2042 		}
2043 		else {
2044 			/* Just do strcasecmp */
2045 			srch.begin = arg_pattern->data;
2046 			srch.len = strlen (arg_pattern->data);
2047 
2048 			if (rspamd_ftok_casecmp (param_data, &srch) == 0) {
2049 				return TRUE;
2050 			}
2051 		}
2052 
2053 		/* Get next part */
2054 		if (!recursive) {
2055 			break;
2056 		}
2057 	}
2058 
2059 	return FALSE;
2060 }
2061 
2062 static gboolean
rspamd_content_type_is_type(struct rspamd_task * task,GArray * args,void * unused)2063 rspamd_content_type_is_type (struct rspamd_task * task,
2064 	GArray * args,
2065 	void *unused)
2066 {
2067 	return rspamd_content_type_check (task, args, FALSE);
2068 }
2069 
2070 static gboolean
rspamd_content_type_is_subtype(struct rspamd_task * task,GArray * args,void * unused)2071 rspamd_content_type_is_subtype (struct rspamd_task * task,
2072 	GArray * args,
2073 	void *unused)
2074 {
2075 	return rspamd_content_type_check (task, args, TRUE);
2076 }
2077 
2078 static gboolean
compare_subtype(struct rspamd_task * task,struct rspamd_content_type * ct,struct expression_argument * subtype)2079 compare_subtype (struct rspamd_task *task, struct rspamd_content_type *ct,
2080 	struct expression_argument *subtype)
2081 {
2082 	rspamd_regexp_t *re;
2083 	rspamd_ftok_t srch;
2084 	gint r = 0;
2085 
2086 	if (subtype == NULL || ct == NULL) {
2087 		msg_warn_task ("invalid parameters passed");
2088 		return FALSE;
2089 	}
2090 	if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
2091 		re = subtype->data;
2092 
2093 		if (ct->subtype.len > 0) {
2094 			r = rspamd_regexp_search (re, ct->subtype.begin, ct->subtype.len,
2095 					NULL, NULL, FALSE, NULL);
2096 		}
2097 	}
2098 	else {
2099 		srch.begin = subtype->data;
2100 		srch.len = strlen (subtype->data);
2101 
2102 		/* Just do strcasecmp */
2103 		if (rspamd_ftok_casecmp (&ct->subtype, &srch) == 0) {
2104 			return TRUE;
2105 		}
2106 	}
2107 
2108 	return r;
2109 }
2110 
2111 static gboolean
compare_len(struct rspamd_mime_part * part,guint min,guint max)2112 compare_len (struct rspamd_mime_part *part, guint min, guint max)
2113 {
2114 	if (min == 0 && max == 0) {
2115 		return TRUE;
2116 	}
2117 
2118 	if (min == 0) {
2119 		return part->parsed_data.len <= max;
2120 	}
2121 	else if (max == 0) {
2122 		return part->parsed_data.len >= min;
2123 	}
2124 	else {
2125 		return part->parsed_data.len >= min && part->parsed_data.len <= max;
2126 	}
2127 }
2128 
2129 static gboolean
common_has_content_part(struct rspamd_task * task,struct expression_argument * param_type,struct expression_argument * param_subtype,gint min_len,gint max_len)2130 common_has_content_part (struct rspamd_task * task,
2131 	struct expression_argument *param_type,
2132 	struct expression_argument *param_subtype,
2133 	gint min_len,
2134 	gint max_len)
2135 {
2136 	rspamd_regexp_t *re;
2137 	struct rspamd_mime_part *part;
2138 	struct rspamd_content_type *ct;
2139 	rspamd_ftok_t srch;
2140 	gint r = 0;
2141 	guint i;
2142 
2143 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
2144 		ct = part->ct;
2145 
2146 		if (ct == NULL) {
2147 			continue;
2148 		}
2149 
2150 		if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
2151 			re = param_type->data;
2152 
2153 			if (ct->type.len > 0) {
2154 				r = rspamd_regexp_search (re, ct->type.begin, ct->type.len,
2155 						NULL, NULL, FALSE, NULL);
2156 			}
2157 
2158 			/* Also check subtype and length of the part */
2159 			if (r && param_subtype) {
2160 				r = compare_len (part, min_len, max_len) &&
2161 						compare_subtype (task, ct, param_subtype);
2162 
2163 				return r;
2164 			}
2165 		}
2166 		else {
2167 			/* Just do strcasecmp */
2168 			srch.begin = param_type->data;
2169 			srch.len = strlen (param_type->data);
2170 
2171 			if (rspamd_ftok_casecmp (&ct->type, &srch) == 0) {
2172 				if (param_subtype) {
2173 					if (compare_subtype (task, ct, param_subtype)) {
2174 						if (compare_len (part, min_len, max_len)) {
2175 							return TRUE;
2176 						}
2177 					}
2178 				}
2179 				else {
2180 					if (compare_len (part, min_len, max_len)) {
2181 						return TRUE;
2182 					}
2183 				}
2184 			}
2185 		}
2186 	}
2187 
2188 	return FALSE;
2189 }
2190 
2191 static gboolean
rspamd_has_content_part(struct rspamd_task * task,GArray * args,void * unused)2192 rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused)
2193 {
2194 	struct expression_argument *param_type = NULL, *param_subtype = NULL;
2195 
2196 	if (args == NULL) {
2197 		msg_warn_task ("no parameters to function");
2198 		return FALSE;
2199 	}
2200 
2201 	param_type = &g_array_index (args, struct expression_argument, 0);
2202 	if (args->len >= 2) {
2203 		param_subtype = &g_array_index (args, struct expression_argument, 1);
2204 	}
2205 
2206 	return common_has_content_part (task, param_type, param_subtype, 0, 0);
2207 }
2208 
2209 static gboolean
rspamd_has_content_part_len(struct rspamd_task * task,GArray * args,void * unused)2210 rspamd_has_content_part_len (struct rspamd_task * task,
2211 	GArray * args,
2212 	void *unused)
2213 {
2214 	struct expression_argument *param_type = NULL, *param_subtype = NULL;
2215 	gint min = 0, max = 0;
2216 	struct expression_argument *arg;
2217 
2218 	if (args == NULL) {
2219 		msg_warn_task ("no parameters to function");
2220 		return FALSE;
2221 	}
2222 
2223 	param_type = &g_array_index (args, struct expression_argument, 0);
2224 
2225 	if (args->len >= 2) {
2226 		param_subtype = &g_array_index (args, struct expression_argument, 1);
2227 
2228 		if (args->len >= 3) {
2229 			arg = &g_array_index (args, struct expression_argument, 2);
2230 			errno = 0;
2231 			min = strtoul (arg->data, NULL, 10);
2232 			g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
2233 
2234 			if (errno != 0) {
2235 				msg_warn_task ("invalid numeric value '%s': %s",
2236 					(gchar *)arg->data,
2237 					strerror (errno));
2238 				return FALSE;
2239 			}
2240 
2241 			if (args->len >= 4) {
2242 				arg = &g_array_index (args, struct expression_argument, 3);
2243 				g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
2244 				max = strtoul (arg->data, NULL, 10);
2245 
2246 				if (errno != 0) {
2247 					msg_warn_task ("invalid numeric value '%s': %s",
2248 						(gchar *)arg->data,
2249 						strerror (errno));
2250 					return FALSE;
2251 				}
2252 			}
2253 		}
2254 	}
2255 
2256 	return common_has_content_part (task, param_type, param_subtype, min, max);
2257 }
2258 
2259 static gboolean
rspamd_is_empty_body(struct rspamd_task * task,GArray * args,void * unused)2260 rspamd_is_empty_body (struct rspamd_task *task,
2261 		GArray * args,
2262 		void *unused)
2263 {
2264 	struct rspamd_mime_part *part;
2265 	guint i;
2266 
2267 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
2268 		if (part->parsed_data.len > 0) {
2269 			return FALSE;
2270 		}
2271 	}
2272 
2273 	return TRUE;
2274 }
2275 
2276 #define TASK_FLAG_READ(flag) do { \
2277 	result = !!(task->flags & (flag)); \
2278 } while(0)
2279 
2280 #define TASK_GET_FLAG(flag, strname, macro) do { \
2281 	if (!found && strcmp ((flag), strname) == 0) { \
2282 		TASK_FLAG_READ((macro)); \
2283 		found = TRUE; \
2284 	} \
2285 } while(0)
2286 
2287 #define TASK_PROTOCOL_FLAG_READ(flag) do { \
2288 	result = !!(task->protocol_flags & (flag)); \
2289 } while(0)
2290 
2291 #define TASK_GET_PROTOCOL_FLAG(flag, strname, macro) do { \
2292 	if (!found && strcmp ((flag), strname) == 0) { \
2293 		TASK_PROTOCOL_FLAG_READ((macro)); \
2294 		found = TRUE; \
2295 	} \
2296 } while(0)
2297 
2298 
2299 static gboolean
rspamd_has_flag_expr(struct rspamd_task * task,GArray * args,void * unused)2300 rspamd_has_flag_expr (struct rspamd_task *task,
2301 					  GArray * args,
2302 					  void *unused)
2303 {
2304 	gboolean found = FALSE, result = FALSE;
2305 	struct expression_argument *flag_arg;
2306 	const gchar *flag_str;
2307 
2308 	if (args == NULL) {
2309 		msg_warn_task ("no parameters to function");
2310 		return FALSE;
2311 	}
2312 
2313 	flag_arg = &g_array_index (args, struct expression_argument, 0);
2314 
2315 	if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
2316 		msg_warn_task ("invalid parameter to function");
2317 		return FALSE;
2318 	}
2319 
2320 	flag_str = (const gchar *)flag_arg->data;
2321 
2322 	TASK_GET_FLAG (flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
2323 	TASK_GET_FLAG (flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG);
2324 	TASK_GET_FLAG (flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT);
2325 	TASK_GET_FLAG (flag_str, "skip", RSPAMD_TASK_FLAG_SKIP);
2326 	TASK_GET_PROTOCOL_FLAG (flag_str, "extended_urls",
2327 			RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
2328 	TASK_GET_FLAG (flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM);
2329 	TASK_GET_FLAG (flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM);
2330 	TASK_GET_FLAG (flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED);
2331 	TASK_GET_FLAG (flag_str, "broken_headers",
2332 			RSPAMD_TASK_FLAG_BROKEN_HEADERS);
2333 	TASK_GET_FLAG (flag_str, "skip_process",
2334 			RSPAMD_TASK_FLAG_SKIP_PROCESS);
2335 	TASK_GET_PROTOCOL_FLAG (flag_str, "milter",
2336 			RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
2337 	TASK_GET_FLAG (flag_str, "bad_unicode",
2338 			RSPAMD_TASK_FLAG_BAD_UNICODE);
2339 
2340 	if (!found) {
2341 		msg_warn_task ("invalid flag name %s", flag_str);
2342 		return FALSE;
2343 	}
2344 
2345 	return result;
2346 }
2347 
2348 static gboolean
rspamd_has_symbol_expr(struct rspamd_task * task,GArray * args,void * unused)2349 rspamd_has_symbol_expr (struct rspamd_task *task,
2350 					  GArray * args,
2351 					  void *unused)
2352 {
2353 	struct expression_argument *sym_arg;
2354 	const gchar *symbol_str;
2355 
2356 	if (args == NULL) {
2357 		msg_warn_task ("no parameters to function");
2358 		return FALSE;
2359 	}
2360 
2361 	sym_arg = &g_array_index (args, struct expression_argument, 0);
2362 
2363 	if (sym_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
2364 		msg_warn_task ("invalid parameter to function");
2365 		return FALSE;
2366 	}
2367 
2368 	symbol_str = (const gchar *)sym_arg->data;
2369 
2370 	if (rspamd_task_find_symbol_result (task, symbol_str, NULL)) {
2371 		return TRUE;
2372 	}
2373 
2374 	return FALSE;
2375 }
2376