1 /*-
2 * Copyright 2016 Vsevolod Stakhov
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <contrib/libucl/ucl.h>
17 #include "config.h"
18 #include "util.h"
19 #include "cfg_file.h"
20 #include "rspamd.h"
21 #include "message.h"
22 #include "mime_expressions.h"
23 #include "libserver/html/html.h"
24 #include "lua/lua_common.h"
25 #include "utlist.h"
26
27 gboolean rspamd_compare_encoding (struct rspamd_task *task,
28 GArray * args,
29 void *unused);
30 gboolean rspamd_header_exists (struct rspamd_task *task,
31 GArray * args,
32 void *unused);
33 gboolean rspamd_parts_distance (struct rspamd_task *task,
34 GArray * args,
35 void *unused);
36 gboolean rspamd_recipients_distance (struct rspamd_task *task,
37 GArray * args,
38 void *unused);
39 gboolean rspamd_has_only_html_part (struct rspamd_task *task,
40 GArray * args,
41 void *unused);
42 gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
43 GArray * args,
44 void *unused);
45 gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
46 GArray * args,
47 void *unused);
48 gboolean rspamd_is_html_balanced (struct rspamd_task *task,
49 GArray * args,
50 void *unused);
51 gboolean rspamd_has_html_tag (struct rspamd_task *task,
52 GArray * args,
53 void *unused);
54 gboolean rspamd_has_fake_html (struct rspamd_task *task,
55 GArray * args,
56 void *unused);
57 static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
58 GArray * args,
59 void *unused);
60 static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
61 GArray * args,
62 void *unused);
63 static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
64 GArray * args,
65 void *unused);
66 static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
67 GArray * args,
68 void *unused);
69 static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
70 GArray * args,
71 void *unused);
72 static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
73 GArray * args,
74 void *unused);
75 static gboolean rspamd_has_content_part (struct rspamd_task *task,
76 GArray * args,
77 void *unused);
78 static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
79 GArray * args,
80 void *unused);
81 static gboolean rspamd_is_empty_body (struct rspamd_task *task,
82 GArray * args,
83 void *unused);
84 static gboolean rspamd_has_flag_expr (struct rspamd_task *task,
85 GArray * args,
86 void *unused);
87 static gboolean rspamd_has_symbol_expr (struct rspamd_task *task,
88 GArray * args,
89 void *unused);
90
91 static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len,
92 rspamd_mempool_t *pool, gpointer ud, GError **err);
93 static gdouble rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom);
94 static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom);
95 static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom);
96
97 /**
98 * Regexp structure
99 */
100 struct rspamd_regexp_atom {
101 enum rspamd_re_type type; /**< regexp type */
102 gchar *regexp_text; /**< regexp text representation */
103 rspamd_regexp_t *regexp; /**< regexp structure */
104 union {
105 const gchar *header; /**< header name for header regexps */
106 const gchar *selector; /**< selector name for lua selector regexp */
107 } extra;
108 gboolean is_test; /**< true if this expression must be tested */
109 gboolean is_strong; /**< true if headers search must be case sensitive */
110 gboolean is_multiple; /**< true if we need to match all inclusions of atom */
111 };
112
113 /**
114 * Rspamd expression function
115 */
116 struct rspamd_function_atom {
117 gchar *name; /**< name of function */
118 GArray *args; /**< its args */
119 };
120
121 enum rspamd_mime_atom_type {
122 MIME_ATOM_REGEXP = 0,
123 MIME_ATOM_INTERNAL_FUNCTION,
124 MIME_ATOM_LUA_FUNCTION,
125 MIME_ATOM_LOCAL_LUA_FUNCTION, /* New style */
126 };
127
128 struct rspamd_mime_atom {
129 gchar *str;
130 union {
131 struct rspamd_regexp_atom *re;
132 struct rspamd_function_atom *func;
133 const gchar *lua_function;
134 gint lua_cbref;
135 } d;
136 enum rspamd_mime_atom_type type;
137 };
138
139 /*
140 * List of internal functions of rspamd
141 * Sorted by name to use bsearch
142 */
143 static struct _fl {
144 const gchar *name;
145 rspamd_internal_func_t func;
146 void *user_data;
147 } rspamd_functions_list[] = {
148 {"check_smtp_data", rspamd_check_smtp_data, NULL},
149 {"compare_encoding", rspamd_compare_encoding, NULL},
150 {"compare_parts_distance", rspamd_parts_distance, NULL},
151 {"compare_recipients_distance", rspamd_recipients_distance, NULL},
152 {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
153 {"content_type_compare_param", rspamd_content_type_compare_param, NULL},
154 {"content_type_has_param", rspamd_content_type_has_param, NULL},
155 {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
156 {"content_type_is_type", rspamd_content_type_is_type, NULL},
157 {"has_content_part", rspamd_has_content_part, NULL},
158 {"has_content_part_len", rspamd_has_content_part_len, NULL},
159 {"has_fake_html", rspamd_has_fake_html, NULL},
160 {"has_flag", rspamd_has_flag_expr, NULL},
161 {"has_html_tag", rspamd_has_html_tag, NULL},
162 {"has_only_html_part", rspamd_has_only_html_part, NULL},
163 {"has_symbol", rspamd_has_symbol_expr, NULL},
164 {"header_exists", rspamd_header_exists, NULL},
165 {"is_empty_body", rspamd_is_empty_body, NULL},
166 {"is_html_balanced", rspamd_is_html_balanced, NULL},
167 {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
168 {"raw_header_exists", rspamd_raw_header_exists, NULL},
169 };
170
171 const struct rspamd_atom_subr mime_expr_subr = {
172 .parse = rspamd_mime_expr_parse,
173 .process = rspamd_mime_expr_process,
174 .priority = rspamd_mime_expr_priority,
175 .destroy = rspamd_mime_expr_destroy
176 };
177
178 static struct _fl *list_ptr = &rspamd_functions_list[0];
179 static guint32 functions_number = sizeof (rspamd_functions_list) /
180 sizeof (struct _fl);
181 static gboolean list_allocated = FALSE;
182
183 /* Bsearch routine */
184 static gint
fl_cmp(const void * s1,const void * s2)185 fl_cmp (const void *s1, const void *s2)
186 {
187 struct _fl *fl1 = (struct _fl *)s1;
188 struct _fl *fl2 = (struct _fl *)s2;
189 return strcmp (fl1->name, fl2->name);
190 }
191
192 static GQuark
rspamd_mime_expr_quark(void)193 rspamd_mime_expr_quark (void)
194 {
195 return g_quark_from_static_string ("mime-expressions");
196 }
197
198 #define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0)
199 static gboolean
rspamd_parse_long_option(const gchar * start,gsize len,struct rspamd_regexp_atom * a)200 rspamd_parse_long_option (const gchar *start, gsize len,
201 struct rspamd_regexp_atom *a)
202 {
203 gboolean ret = FALSE;
204
205 if (TYPE_CHECK (start, "body", len)) {
206 ret = TRUE;
207 a->type = RSPAMD_RE_BODY;
208 }
209 else if (TYPE_CHECK (start, "part", len) ||
210 TYPE_CHECK (start, "mime", len)) {
211 ret = TRUE;
212 a->type = RSPAMD_RE_MIME;
213 }
214 else if (TYPE_CHECK (start, "raw_part", len) ||
215 TYPE_CHECK (start, "raw_mime", len) ||
216 TYPE_CHECK (start, "mime_raw", len)) {
217 ret = TRUE;
218 a->type = RSPAMD_RE_RAWMIME;
219 }
220 else if (TYPE_CHECK (start, "header", len)) {
221 ret = TRUE;
222 a->type = RSPAMD_RE_HEADER;
223 }
224 else if (TYPE_CHECK (start, "mime_header", len) ||
225 TYPE_CHECK (start, "header_mime", len)) {
226 ret = TRUE;
227 a->type = RSPAMD_RE_MIMEHEADER;
228 }
229 else if (TYPE_CHECK (start, "raw_header", len) ||
230 TYPE_CHECK (start, "header_raw", len)) {
231 ret = TRUE;
232 a->type = RSPAMD_RE_RAWHEADER;
233 }
234 else if (TYPE_CHECK (start, "all_header", len) ||
235 TYPE_CHECK (start, "header_all", len) ||
236 TYPE_CHECK (start, "all_headers", len)) {
237 ret = TRUE;
238 a->type = RSPAMD_RE_ALLHEADER;
239 }
240 else if (TYPE_CHECK (start, "url", len)) {
241 ret = TRUE;
242 a->type = RSPAMD_RE_URL;
243 }
244 else if (TYPE_CHECK (start, "email", len)) {
245 ret = TRUE;
246 a->type = RSPAMD_RE_EMAIL;
247 }
248 else if (TYPE_CHECK (start, "sa_body", len)) {
249 ret = TRUE;
250 a->type = RSPAMD_RE_SABODY;
251 }
252 else if (TYPE_CHECK (start, "sa_raw_body", len) ||
253 TYPE_CHECK (start, "sa_body_raw", len)) {
254 ret = TRUE;
255 a->type = RSPAMD_RE_SARAWBODY;
256 }
257 else if (TYPE_CHECK (start, "words", len)) {
258 ret = TRUE;
259 a->type = RSPAMD_RE_WORDS;
260 }
261 else if (TYPE_CHECK (start, "raw_words", len)) {
262 ret = TRUE;
263 a->type = RSPAMD_RE_RAWWORDS;
264 }
265 else if (TYPE_CHECK (start, "stem_words", len)) {
266 ret = TRUE;
267 a->type = RSPAMD_RE_STEMWORDS;
268 }
269 else if (TYPE_CHECK (start, "selector", len)) {
270 ret = TRUE;
271 a->type = RSPAMD_RE_SELECTOR;
272 }
273
274 return ret;
275 }
276
277 /*
278 * Rspamd regexp utility functions
279 */
280 static struct rspamd_regexp_atom *
rspamd_mime_expr_parse_regexp_atom(rspamd_mempool_t * pool,const gchar * line,struct rspamd_config * cfg)281 rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
282 struct rspamd_config *cfg)
283 {
284 const gchar *begin, *end, *p, *src, *start, *brace;
285 gchar *dbegin, *dend, *extra = NULL;
286 struct rspamd_regexp_atom *result;
287 GError *err = NULL;
288 GString *re_flags;
289
290 if (line == NULL) {
291 msg_err_pool ("cannot parse NULL line");
292 return NULL;
293 }
294
295 src = line;
296 result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom));
297 /* Skip whitespaces */
298 while (g_ascii_isspace (*line)) {
299 line++;
300 }
301 if (*line == '\0') {
302 msg_warn_pool ("got empty regexp");
303 return NULL;
304 }
305
306 result->type = RSPAMD_RE_MAX;
307
308 start = line;
309 /* First try to find header name */
310 begin = strchr (line, '/');
311 if (begin != NULL) {
312 p = begin;
313 end = NULL;
314 while (p != line) {
315 if (*p == '=') {
316 end = p;
317 break;
318 }
319 p--;
320 }
321
322 if (end) {
323 extra = rspamd_mempool_alloc (pool, end - line + 1);
324 rspamd_strlcpy (extra, line, end - line + 1);
325 line = end;
326 }
327 }
328 else {
329 extra = rspamd_mempool_strdup (pool, line);
330 result->type = RSPAMD_RE_MAX;
331 line = start;
332 }
333 /* Find begin of regexp */
334 while (*line && *line != '/') {
335 line++;
336 }
337 if (*line != '\0') {
338 begin = line + 1;
339 }
340 else if (extra == NULL) {
341 /* Assume that line without // is just a header name */
342 extra = rspamd_mempool_strdup (pool, line);
343 result->type = RSPAMD_RE_HEADER;
344 return result;
345 }
346 else {
347 /* We got header name earlier but have not found // expression, so it is invalid regexp */
348 msg_warn_pool (
349 "got no header name (eg. header=) but without corresponding regexp, %s",
350 src);
351 return NULL;
352 }
353 /* Find end */
354 end = begin;
355 while (*end && (*end != '/' || *(end - 1) == '\\')) {
356 end++;
357 }
358 if (end == begin || *end != '/') {
359 msg_warn_pool ("no trailing / in regexp %s", src);
360 return NULL;
361 }
362 /* Parse flags */
363 p = end + 1;
364 re_flags = g_string_sized_new (32);
365
366 while (p != NULL) {
367 switch (*p) {
368 case 'i':
369 case 'm':
370 case 's':
371 case 'x':
372 case 'u':
373 case 'O':
374 case 'r':
375 case 'L':
376 /* Handled by rspamd_regexp_t */
377 g_string_append_c (re_flags, *p);
378 p++;
379 break;
380 case 'o':
381 p++;
382 break;
383 /* Type flags */
384 case 'H':
385 result->type = RSPAMD_RE_HEADER;
386 p++;
387 break;
388 case 'R':
389 result->type = RSPAMD_RE_ALLHEADER;
390 p++;
391 break;
392 case 'B':
393 result->type = RSPAMD_RE_MIMEHEADER;
394 p++;
395 break;
396 case 'C':
397 result->type = RSPAMD_RE_SABODY;
398 p++;
399 break;
400 case 'D':
401 result->type = RSPAMD_RE_SARAWBODY;
402 p++;
403 break;
404 case 'M':
405 result->type = RSPAMD_RE_BODY;
406 p++;
407 break;
408 case 'P':
409 result->type = RSPAMD_RE_MIME;
410 p++;
411 break;
412 case 'Q':
413 result->type = RSPAMD_RE_RAWMIME;
414 p++;
415 break;
416 case 'U':
417 result->type = RSPAMD_RE_URL;
418 p++;
419 break;
420 case 'X':
421 result->type = RSPAMD_RE_RAWHEADER;
422 p++;
423 break;
424 case '$':
425 result->type = RSPAMD_RE_SELECTOR;
426 p++;
427 break;
428 case '{':
429 /* Long definition */
430 if ((brace = strchr (p + 1, '}')) != NULL) {
431 if (!rspamd_parse_long_option (p + 1, brace - (p + 1), result)) {
432 msg_warn_pool ("invalid long regexp type: %*s in '%s'",
433 (int)(brace - (p + 1)), p + 1, src);
434 p = NULL;
435 }
436 else {
437 p = brace + 1;
438 }
439 }
440 else {
441 p = NULL;
442 }
443 break;
444 /* Other flags */
445 case 'T':
446 result->is_test = TRUE;
447 p++;
448 break;
449 case 'S':
450 result->is_strong = TRUE;
451 p++;
452 break;
453 case 'A':
454 result->is_multiple = TRUE;
455 p++;
456 break;
457 /* Stop flags parsing */
458 default:
459 p = NULL;
460 break;
461 }
462 }
463
464 if (result->type >= RSPAMD_RE_MAX) {
465 if (extra) {
466 /* Assume header regexp */
467 result->extra.header = extra;
468 result->type = RSPAMD_RE_HEADER;
469 }
470 else {
471 msg_err_pool ("could not read regexp: %s, unknown type", src);
472 return NULL;
473 }
474 }
475
476 if ((result->type == RSPAMD_RE_HEADER ||
477 result->type == RSPAMD_RE_RAWHEADER ||
478 result->type == RSPAMD_RE_MIMEHEADER)) {
479 if (extra == NULL) {
480 msg_err_pool ("header regexp: '%s' has no header part", src);
481 return NULL;
482 }
483 else {
484 result->extra.header = extra;
485 }
486 }
487
488 if (result->type == RSPAMD_RE_SELECTOR) {
489 if (extra == NULL) {
490 msg_err_pool ("selector regexp: '%s' has no selector part", src);
491 return NULL;
492 }
493 else {
494 result->extra.selector = extra;
495 }
496 }
497
498
499 result->regexp_text = rspamd_mempool_strdup (pool, start);
500 dbegin = result->regexp_text + (begin - start);
501 dend = result->regexp_text + (end - start);
502 *dend = '\0';
503
504 result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
505 &err);
506
507 g_string_free (re_flags, TRUE);
508
509 if (result->regexp == NULL || err != NULL) {
510 msg_warn_pool ("could not read regexp: %s while reading regexp %e",
511 src, err);
512
513 if (err) {
514 g_error_free (err);
515 }
516
517 return NULL;
518 }
519
520 if (result->is_multiple) {
521 rspamd_regexp_set_maxhits (result->regexp, 0);
522 }
523 else {
524 rspamd_regexp_set_maxhits (result->regexp, 1);
525 }
526
527 rspamd_regexp_set_ud (result->regexp, result);
528
529 *dend = '/';
530
531 return result;
532 }
533
534 struct rspamd_function_atom *
rspamd_mime_expr_parse_function_atom(rspamd_mempool_t * pool,const gchar * input)535 rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input)
536 {
537 const gchar *obrace, *ebrace, *p, *c;
538 gchar t, *databuf;
539 guint len;
540 struct rspamd_function_atom *res;
541 struct expression_argument arg;
542 GError *err = NULL;
543 enum {
544 start_read_argument = 0,
545 in_string,
546 in_regexp,
547 got_backslash,
548 got_comma
549 } state, prev_state = 0;
550
551 obrace = strchr (input, '(');
552 ebrace = strrchr (input, ')');
553
554 g_assert (obrace != NULL && ebrace != NULL);
555
556 res = rspamd_mempool_alloc0 (pool, sizeof (*res));
557 res->name = rspamd_mempool_alloc (pool, obrace - input + 1);
558 rspamd_strlcpy (res->name, input, obrace - input + 1);
559 res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument));
560
561 p = obrace + 1;
562 c = p;
563 state = start_read_argument;
564
565 /* Read arguments */
566 while (p <= ebrace) {
567 t = *p;
568 switch (state) {
569 case start_read_argument:
570 if (t == '/') {
571 state = in_regexp;
572 c = p;
573 }
574 else if (!g_ascii_isspace (t)) {
575 state = in_string;
576
577 if (t == '\'' || t == '\"') {
578 c = p + 1;
579 }
580 else {
581 c = p;
582 }
583 }
584 p ++;
585 break;
586 case in_regexp:
587 if (t == '\\') {
588 state = got_backslash;
589 prev_state = in_regexp;
590 }
591 else if (t == ',' || p == ebrace) {
592 len = p - c + 1;
593 databuf = rspamd_mempool_alloc (pool, len);
594 rspamd_strlcpy (databuf, c, len);
595 arg.type = EXPRESSION_ARGUMENT_REGEXP;
596 arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err);
597
598 if (arg.data == NULL) {
599 /* Fallback to string */
600 msg_warn ("cannot parse slashed argument %s as regexp: %s",
601 databuf, err->message);
602 g_error_free (err);
603 arg.type = EXPRESSION_ARGUMENT_NORMAL;
604 arg.data = databuf;
605 }
606
607 g_array_append_val (res->args, arg);
608 state = got_comma;
609 }
610 p ++;
611 break;
612 case in_string:
613 if (t == '\\') {
614 state = got_backslash;
615 prev_state = in_string;
616 }
617 else if (t == ',' || p == ebrace) {
618 if (*(p - 1) == '\'' || *(p - 1) == '\"') {
619 len = p - c;
620 }
621 else {
622 len = p - c + 1;
623 }
624
625 databuf = rspamd_mempool_alloc (pool, len);
626 rspamd_strlcpy (databuf, c, len);
627 arg.type = EXPRESSION_ARGUMENT_NORMAL;
628 arg.data = databuf;
629 g_array_append_val (res->args, arg);
630 state = got_comma;
631 }
632 p ++;
633 break;
634 case got_backslash:
635 state = prev_state;
636 p ++;
637 break;
638 case got_comma:
639 state = start_read_argument;
640 break;
641 }
642 }
643
644 return res;
645 }
646
647 static rspamd_expression_atom_t *
rspamd_mime_expr_parse(const gchar * line,gsize len,rspamd_mempool_t * pool,gpointer ud,GError ** err)648 rspamd_mime_expr_parse (const gchar *line, gsize len,
649 rspamd_mempool_t *pool, gpointer ud, GError **err)
650 {
651 rspamd_expression_atom_t *a = NULL;
652 struct rspamd_mime_atom *mime_atom = NULL;
653 const gchar *p, *end, *c = NULL;
654 struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *)ud;
655 struct rspamd_config *cfg;
656 rspamd_regexp_t *own_re;
657 gchar t;
658 gint type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
659 enum {
660 in_header = 0,
661 got_slash,
662 in_regexp,
663 got_backslash,
664 got_second_slash,
665 in_flags,
666 in_flags_brace,
667 got_obrace,
668 in_function,
669 in_local_function,
670 got_ebrace,
671 end_atom,
672 bad_atom
673 } state = 0, prev_state = 0;
674
675 p = line;
676 end = p + len;
677 cfg = real_ud->cfg;
678
679 while (p < end) {
680 t = *p;
681
682 switch (state) {
683 case in_header:
684 if (t == '/') {
685 /* Regexp */
686 state = got_slash;
687 }
688 else if (t == '(') {
689 /* Function */
690 state = got_obrace;
691 }
692 else if (!g_ascii_isalnum (t) && t != '_' && t != '-' && t != '=') {
693 if (t == ':') {
694 if (p - line == 3 && memcmp (line, "lua", 3) == 0) {
695 type = MIME_ATOM_LOCAL_LUA_FUNCTION;
696 state = in_local_function;
697 c = p + 1;
698 }
699 }
700 else {
701 /* Likely lua function, identified by just a string */
702 type = MIME_ATOM_LUA_FUNCTION;
703 state = end_atom;
704 /* Do not increase p */
705 continue;
706 }
707 }
708 else if (g_ascii_isspace (t)) {
709 state = bad_atom;
710 }
711 p ++;
712 break;
713 case got_slash:
714 state = in_regexp;
715 break;
716 case in_regexp:
717 if (t == '\\') {
718 state = got_backslash;
719 prev_state = in_regexp;
720 }
721 else if (t == '/') {
722 state = got_second_slash;
723 }
724 p ++;
725 break;
726 case got_second_slash:
727 state = in_flags;
728 break;
729 case in_flags:
730 if (t == '{') {
731 state = in_flags_brace;
732 p ++;
733 }
734 else if (!g_ascii_isalpha (t) && t != '$') {
735 state = end_atom;
736 }
737 else {
738 p ++;
739 }
740 break;
741 case in_flags_brace:
742 if (t == '}') {
743 state = in_flags;
744 }
745 p ++;
746 break;
747 case got_backslash:
748 state = prev_state;
749 p ++;
750 break;
751 case got_obrace:
752 state = in_function;
753 type = MIME_ATOM_INTERNAL_FUNCTION;
754 obraces ++;
755 break;
756 case in_function:
757 if (t == '\\') {
758 state = got_backslash;
759 prev_state = in_function;
760 }
761 else if (t == '(') {
762 obraces ++;
763 }
764 else if (t == ')') {
765 ebraces ++;
766 if (ebraces == obraces) {
767 state = got_ebrace;
768 }
769 }
770 p ++;
771 break;
772 case in_local_function:
773 if (!(g_ascii_isalnum (t) || t == '-' || t == '_')) {
774 g_assert (c != NULL);
775 state = end_atom;
776 }
777 else {
778 p++;
779 }
780 break;
781 case got_ebrace:
782 state = end_atom;
783 break;
784 case bad_atom:
785 g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse"
786 " mime atom '%s' when reading symbol '%c' at offset %d, "
787 "near %*.s", line, t, (gint)(p - line),
788 (gint)MIN (end - p, 10), p);
789 return NULL;
790 case end_atom:
791 goto set;
792 }
793 }
794 set:
795
796 if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
797 state != in_flags && state != end_atom)) {
798 g_set_error (err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
799 " mime atom");
800 return NULL;
801 }
802
803 mime_atom = rspamd_mempool_alloc (pool, sizeof (*mime_atom));
804 mime_atom->type = type;
805 mime_atom->str = rspamd_mempool_alloc (pool, p - line + 1);
806 rspamd_strlcpy (mime_atom->str, line, p - line + 1);
807
808 if (type == MIME_ATOM_REGEXP) {
809 mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool,
810 mime_atom->str, cfg);
811 if (mime_atom->d.re == NULL) {
812 g_set_error (err, rspamd_mime_expr_quark(), 200,
813 "cannot parse regexp '%s'",
814 mime_atom->str);
815 goto err;
816 }
817 else {
818 gint lua_cbref = -1;
819
820 /* Check regexp condition */
821 if (real_ud->conf_obj != NULL) {
822 const ucl_object_t *re_conditions = ucl_object_lookup (real_ud->conf_obj,
823 "re_conditions");
824
825 if (re_conditions != NULL) {
826 if (ucl_object_type (re_conditions) != UCL_OBJECT) {
827 g_set_error (err, rspamd_mime_expr_quark (), 320,
828 "re_conditions is not a table for '%s'",
829 mime_atom->str);
830 goto err;
831 }
832
833 const ucl_object_t *function_obj = ucl_object_lookup (re_conditions,
834 mime_atom->str);
835
836 if (function_obj != NULL) {
837 if (ucl_object_type (function_obj) != UCL_USERDATA) {
838 g_set_error (err, rspamd_mime_expr_quark (), 320,
839 "condition for '%s' is invalid, must be function",
840 mime_atom->str);
841 goto err;
842 }
843
844 struct ucl_lua_funcdata *fd = function_obj->value.ud;
845
846 lua_cbref = fd->idx;
847 }
848 }
849 }
850
851 if (lua_cbref != -1) {
852 msg_info_config ("added condition for regexp %s", mime_atom->str);
853 }
854
855 /* Register new item in the cache */
856 if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
857 mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
858 mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
859
860 if (mime_atom->d.re->extra.header != NULL) {
861 own_re = mime_atom->d.re->regexp;
862 mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
863 mime_atom->d.re->regexp,
864 mime_atom->d.re->type,
865 mime_atom->d.re->extra.header,
866 strlen (mime_atom->d.re->extra.header) + 1,
867 lua_cbref);
868 /* Pass ownership to the cache */
869 rspamd_regexp_unref (own_re);
870 }
871 else {
872 /* We have header regexp, but no header name is detected */
873 g_set_error (err,
874 rspamd_mime_expr_quark (),
875 200,
876 "no header name in header regexp: '%s'",
877 mime_atom->str);
878 rspamd_regexp_unref (mime_atom->d.re->regexp);
879 goto err;
880 }
881
882 }
883 else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
884 if (mime_atom->d.re->extra.selector != NULL) {
885 own_re = mime_atom->d.re->regexp;
886 mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
887 mime_atom->d.re->regexp,
888 mime_atom->d.re->type,
889 mime_atom->d.re->extra.selector,
890 strlen (mime_atom->d.re->extra.selector) + 1,
891 lua_cbref);
892 /* Pass ownership to the cache */
893 rspamd_regexp_unref (own_re);
894 }
895 else {
896 /* We have selector regexp, but no selector name is detected */
897 g_set_error (err,
898 rspamd_mime_expr_quark (),
899 200,
900 "no selector name in selector regexp: '%s'",
901 mime_atom->str);
902 rspamd_regexp_unref (mime_atom->d.re->regexp);
903 goto err;
904 }
905 }
906 else {
907 own_re = mime_atom->d.re->regexp;
908 mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
909 mime_atom->d.re->regexp,
910 mime_atom->d.re->type,
911 NULL,
912 0,
913 lua_cbref);
914 /* Pass ownership to the cache */
915 rspamd_regexp_unref (own_re);
916 }
917 }
918 }
919 else if (type == MIME_ATOM_LUA_FUNCTION) {
920 mime_atom->d.lua_function = mime_atom->str;
921
922 lua_getglobal (cfg->lua_state, mime_atom->str);
923
924 if (lua_type (cfg->lua_state, -1) != LUA_TFUNCTION) {
925 g_set_error (err, rspamd_mime_expr_quark(), 200,
926 "no such lua function '%s'",
927 mime_atom->str);
928 lua_pop (cfg->lua_state, 1);
929
930 goto err;
931 }
932
933 lua_pop (cfg->lua_state, 1);
934 }
935 else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
936 /* p pointer is set to the start of Lua function name */
937
938 if (real_ud->conf_obj == NULL) {
939 g_set_error (err, rspamd_mime_expr_quark(), 300,
940 "no config object for '%s'",
941 mime_atom->str);
942 goto err;
943 }
944
945 const ucl_object_t *functions = ucl_object_lookup (real_ud->conf_obj,
946 "functions");
947
948 if (functions == NULL) {
949 g_set_error (err, rspamd_mime_expr_quark(), 310,
950 "no functions defined for '%s'",
951 mime_atom->str);
952 goto err;
953 }
954
955 if (ucl_object_type (functions) != UCL_OBJECT) {
956 g_set_error (err, rspamd_mime_expr_quark(), 320,
957 "functions is not a table for '%s'",
958 mime_atom->str);
959 goto err;
960 }
961
962 const ucl_object_t *function_obj;
963
964 function_obj = ucl_object_lookup_len (functions, c,
965 p - c);
966
967 if (function_obj == NULL) {
968 g_set_error (err, rspamd_mime_expr_quark(), 320,
969 "function %*.s is not found for '%s'",
970 (int)(p - c), c, mime_atom->str);
971 goto err;
972 }
973
974 if (ucl_object_type (function_obj) != UCL_USERDATA) {
975 g_set_error (err, rspamd_mime_expr_quark(), 320,
976 "function %*.s has invalid type for '%s'",
977 (int)(p - c), c, mime_atom->str);
978 goto err;
979 }
980
981 struct ucl_lua_funcdata *fd = function_obj->value.ud;
982
983 mime_atom->d.lua_cbref = fd->idx;
984 }
985 else {
986 mime_atom->d.func = rspamd_mime_expr_parse_function_atom (pool,
987 mime_atom->str);
988 if (mime_atom->d.func == NULL) {
989 g_set_error (err, rspamd_mime_expr_quark(), 200,
990 "cannot parse function '%s'",
991 mime_atom->str);
992 goto err;
993 }
994 }
995
996 a = rspamd_mempool_alloc0 (pool, sizeof (*a));
997 a->len = p - line;
998 a->priority = 0;
999 a->data = mime_atom;
1000
1001 return a;
1002
1003 err:
1004
1005 return NULL;
1006 }
1007
1008 static gint
rspamd_mime_expr_process_regexp(struct rspamd_regexp_atom * re,struct rspamd_task * task)1009 rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
1010 struct rspamd_task *task)
1011 {
1012 gint ret;
1013
1014 if (re == NULL) {
1015 msg_info_task ("invalid regexp passed");
1016 return 0;
1017 }
1018
1019 if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
1020 ret = rspamd_re_cache_process (task,
1021 re->regexp,
1022 re->type,
1023 re->extra.header,
1024 strlen (re->extra.header),
1025 re->is_strong);
1026 }
1027 else if (re->type == RSPAMD_RE_SELECTOR) {
1028 ret = rspamd_re_cache_process (task,
1029 re->regexp,
1030 re->type,
1031 re->extra.selector,
1032 strlen (re->extra.selector),
1033 re->is_strong);
1034 }
1035 else {
1036 ret = rspamd_re_cache_process (task,
1037 re->regexp,
1038 re->type,
1039 NULL,
1040 0,
1041 re->is_strong);
1042 }
1043
1044 if (re->is_test) {
1045 msg_info_task ("test %s regexp '%s' returned %d",
1046 rspamd_re_cache_type_to_string (re->type),
1047 re->regexp_text, ret);
1048 }
1049
1050 return ret;
1051 }
1052
1053
1054 static gint
rspamd_mime_expr_priority(rspamd_expression_atom_t * atom)1055 rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
1056 {
1057 struct rspamd_mime_atom *mime_atom = atom->data;
1058 gint ret = 0;
1059
1060 switch (mime_atom->type) {
1061 case MIME_ATOM_INTERNAL_FUNCTION:
1062 /* Prioritize internal functions slightly */
1063 ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
1064 break;
1065 case MIME_ATOM_LUA_FUNCTION:
1066 case MIME_ATOM_LOCAL_LUA_FUNCTION:
1067 ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 4;
1068 break;
1069 case MIME_ATOM_REGEXP:
1070 switch (mime_atom->d.re->type) {
1071 case RSPAMD_RE_HEADER:
1072 case RSPAMD_RE_RAWHEADER:
1073 ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 16;
1074 break;
1075 case RSPAMD_RE_URL:
1076 case RSPAMD_RE_EMAIL:
1077 ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
1078 break;
1079 case RSPAMD_RE_SELECTOR:
1080 ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
1081 break;
1082 case RSPAMD_RE_MIME:
1083 case RSPAMD_RE_RAWMIME:
1084 ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 2;
1085 break;
1086 case RSPAMD_RE_WORDS:
1087 case RSPAMD_RE_RAWWORDS:
1088 case RSPAMD_RE_STEMWORDS:
1089 default:
1090 /* For expensive regexps */
1091 ret = 0;
1092 break;
1093 }
1094 }
1095
1096 return ret;
1097 }
1098
1099 static void
rspamd_mime_expr_destroy(rspamd_expression_atom_t * atom)1100 rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom)
1101 {
1102 struct rspamd_mime_atom *mime_atom = atom->data;
1103
1104 if (mime_atom) {
1105 if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
1106 /* Need to cleanup arguments */
1107 g_array_free (mime_atom->d.func->args, TRUE);
1108 }
1109 }
1110 }
1111
1112 static gboolean
rspamd_mime_expr_process_function(struct rspamd_function_atom * func,struct rspamd_task * task,lua_State * L)1113 rspamd_mime_expr_process_function (struct rspamd_function_atom * func,
1114 struct rspamd_task * task,
1115 lua_State *L)
1116 {
1117 struct _fl *selected, key;
1118
1119 key.name = func->name;
1120
1121 selected = bsearch (&key,
1122 list_ptr,
1123 functions_number,
1124 sizeof (struct _fl),
1125 fl_cmp);
1126 if (selected == NULL) {
1127 /* Try to check lua function */
1128 return FALSE;
1129 }
1130
1131 return selected->func (task, func->args, selected->user_data);
1132 }
1133
1134 static gdouble
rspamd_mime_expr_process(void * ud,rspamd_expression_atom_t * atom)1135 rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom)
1136 {
1137 struct rspamd_task *task = (struct rspamd_task *)ud;
1138 struct rspamd_mime_atom *mime_atom;
1139 lua_State *L;
1140 gdouble ret = 0;
1141
1142 g_assert (task != NULL);
1143 g_assert (atom != NULL);
1144
1145 mime_atom = atom->data;
1146
1147 if (mime_atom->type == MIME_ATOM_REGEXP) {
1148 ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task);
1149 }
1150 else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
1151 L = task->cfg->lua_state;
1152 lua_getglobal (L, mime_atom->d.lua_function);
1153 rspamd_lua_task_push (L, task);
1154
1155 if (lua_pcall (L, 1, 1, 0) != 0) {
1156 msg_info_task ("lua call to global function '%s' for atom '%s' failed: %s",
1157 mime_atom->d.lua_function,
1158 mime_atom->str,
1159 lua_tostring (L, -1));
1160 lua_pop (L, 1);
1161 }
1162 else {
1163 if (lua_type (L, -1) == LUA_TBOOLEAN) {
1164 ret = lua_toboolean (L, -1);
1165 }
1166 else if (lua_type (L, -1) == LUA_TNUMBER) {
1167 ret = lua_tonumber (L, 1);
1168 }
1169 else {
1170 msg_err_task ("%s returned wrong return type: %s",
1171 mime_atom->str, lua_typename (L, lua_type (L, -1)));
1172 }
1173 /* Remove result */
1174 lua_pop (L, 1);
1175 }
1176 }
1177 else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
1178 gint err_idx;
1179
1180 L = task->cfg->lua_state;
1181 lua_pushcfunction (L, &rspamd_lua_traceback);
1182 err_idx = lua_gettop (L);
1183
1184 lua_rawgeti (L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref);
1185 rspamd_lua_task_push (L, task);
1186
1187 if (lua_pcall (L, 1, 1, err_idx) != 0) {
1188 msg_info_task ("lua call to local function for atom '%s' failed: %s",
1189 mime_atom->str,
1190 lua_tostring (L, -1));
1191 }
1192 else {
1193 if (lua_type (L, -1) == LUA_TBOOLEAN) {
1194 ret = lua_toboolean (L, -1);
1195 }
1196 else if (lua_type (L, -1) == LUA_TNUMBER) {
1197 ret = lua_tonumber (L, 1);
1198 }
1199 else {
1200 msg_err_task ("%s returned wrong return type: %s",
1201 mime_atom->str, lua_typename (L, lua_type (L, -1)));
1202 }
1203 }
1204
1205 lua_settop (L, 0);
1206 }
1207 else {
1208 ret = rspamd_mime_expr_process_function (mime_atom->d.func, task,
1209 task->cfg->lua_state);
1210 }
1211
1212 return ret;
1213 }
1214
1215 void
register_expression_function(const gchar * name,rspamd_internal_func_t func,void * user_data)1216 register_expression_function (const gchar *name,
1217 rspamd_internal_func_t func,
1218 void *user_data)
1219 {
1220 static struct _fl *new;
1221
1222 functions_number++;
1223
1224 new = g_new (struct _fl, functions_number);
1225 memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
1226 if (list_allocated) {
1227 g_free (list_ptr);
1228 }
1229
1230 list_allocated = TRUE;
1231 new[functions_number - 1].name = name;
1232 new[functions_number - 1].func = func;
1233 new[functions_number - 1].user_data = user_data;
1234 qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
1235 list_ptr = new;
1236 }
1237
1238 gboolean
rspamd_compare_encoding(struct rspamd_task * task,GArray * args,void * unused)1239 rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused)
1240 {
1241 struct expression_argument *arg;
1242
1243 if (args == NULL || task == NULL) {
1244 return FALSE;
1245 }
1246
1247 arg = &g_array_index (args, struct expression_argument, 0);
1248 if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1249 msg_warn_task ("invalid argument to function is passed");
1250 return FALSE;
1251 }
1252
1253 /* XXX: really write this function */
1254 return TRUE;
1255 }
1256
1257 gboolean
rspamd_header_exists(struct rspamd_task * task,GArray * args,void * unused)1258 rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
1259 {
1260 struct expression_argument *arg;
1261 struct rspamd_mime_header *rh;
1262
1263 if (args == NULL || task == NULL) {
1264 return FALSE;
1265 }
1266
1267 arg = &g_array_index (args, struct expression_argument, 0);
1268 if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1269 msg_warn_task ("invalid argument to function is passed");
1270 return FALSE;
1271 }
1272
1273 rh = rspamd_message_get_header_array(task,
1274 (gchar *) arg->data, FALSE);
1275
1276 debug_task ("try to get header %s: %d", (gchar *)arg->data,
1277 (rh != NULL));
1278
1279 if (rh) {
1280 return TRUE;
1281 }
1282
1283 return FALSE;
1284 }
1285
1286
1287 /*
1288 * This function is designed to find difference between text/html and text/plain parts
1289 * It takes one argument: difference threshold, if we have two text parts, compare
1290 * its hashes and check for threshold, if value is greater than threshold, return TRUE
1291 * and return FALSE otherwise.
1292 */
1293 gboolean
rspamd_parts_distance(struct rspamd_task * task,GArray * args,void * unused)1294 rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
1295 {
1296 gint threshold, threshold2 = -1;
1297 struct expression_argument *arg;
1298 gdouble *pdiff, diff;
1299
1300 if (args == NULL || args->len == 0) {
1301 debug_task ("no threshold is specified, assume it 100");
1302 threshold = 100;
1303 }
1304 else {
1305 errno = 0;
1306 arg = &g_array_index (args, struct expression_argument, 0);
1307 if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1308 msg_warn_task ("invalid argument to function is passed");
1309 return FALSE;
1310 }
1311
1312 threshold = strtoul ((gchar *)arg->data, NULL, 10);
1313 if (errno != 0) {
1314 msg_info_task ("bad numeric value for threshold \"%s\", assume it 100",
1315 (gchar *)arg->data);
1316 threshold = 100;
1317 }
1318 if (args->len >= 2) {
1319 arg = &g_array_index (args, struct expression_argument, 1);
1320 if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1321 msg_warn_task ("invalid argument to function is passed");
1322 return FALSE;
1323 }
1324
1325 errno = 0;
1326 threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
1327 if (errno != 0) {
1328 msg_info_task ("bad numeric value for threshold \"%s\", ignore it",
1329 (gchar *)arg->data);
1330 threshold2 = -1;
1331 }
1332 }
1333 }
1334
1335 if ((pdiff =
1336 rspamd_mempool_get_variable (task->task_pool,
1337 "parts_distance")) != NULL) {
1338 diff = (1.0 - (*pdiff)) * 100.0;
1339
1340 if (diff != -1) {
1341 if (threshold2 > 0) {
1342 if (diff >= MIN (threshold, threshold2) &&
1343 diff < MAX (threshold, threshold2)) {
1344
1345 return TRUE;
1346 }
1347 }
1348 else {
1349 if (diff <= threshold) {
1350 return TRUE;
1351 }
1352 }
1353 return FALSE;
1354 }
1355 else {
1356 return FALSE;
1357 }
1358 }
1359
1360 return FALSE;
1361 }
1362
1363 struct addr_list {
1364 const gchar *name;
1365 guint namelen;
1366 const gchar *addr;
1367 guint addrlen;
1368 };
1369
1370 static gint
addr_list_cmp_func(const void * a,const void * b)1371 addr_list_cmp_func (const void *a, const void *b)
1372 {
1373 const struct addr_list *addra = (struct addr_list *)a,
1374 *addrb = (struct addr_list *)b;
1375
1376 if (addra->addrlen != addrb->addrlen) {
1377 return addra->addrlen - addrb->addrlen;
1378 }
1379
1380 return memcmp (addra->addr, addrb->addr, addra->addrlen);
1381 }
1382
1383 #define COMPARE_RCPT_LEN 3
1384 #define MIN_RCPT_TO_COMPARE 7
1385
1386 gboolean
rspamd_recipients_distance(struct rspamd_task * task,GArray * args,void * unused)1387 rspamd_recipients_distance (struct rspamd_task *task, GArray * args,
1388 void *unused)
1389 {
1390 struct expression_argument *arg;
1391 struct rspamd_email_address *cur;
1392 double threshold;
1393 struct addr_list *ar;
1394 gint num, i, hits = 0;
1395
1396 if (args == NULL) {
1397 msg_warn_task ("no parameters to function");
1398 return FALSE;
1399 }
1400
1401 arg = &g_array_index (args, struct expression_argument, 0);
1402 if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1403 msg_warn_task ("invalid argument to function is passed");
1404 return FALSE;
1405 }
1406
1407 errno = 0;
1408 threshold = strtod ((gchar *)arg->data, NULL);
1409
1410 if (errno != 0) {
1411 msg_warn_task ("invalid numeric value '%s': %s",
1412 (gchar *)arg->data,
1413 strerror (errno));
1414 return FALSE;
1415 }
1416
1417 if (!MESSAGE_FIELD (task, rcpt_mime)) {
1418 return FALSE;
1419 }
1420
1421 num = MESSAGE_FIELD (task, rcpt_mime)->len;
1422
1423 if (num < MIN_RCPT_TO_COMPARE) {
1424 return FALSE;
1425 }
1426
1427 ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list));
1428
1429 /* Fill array */
1430 num = 0;
1431 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, rcpt_mime), i, cur) {
1432 if (cur->addr_len > COMPARE_RCPT_LEN) {
1433 ar[num].name = cur->addr;
1434 ar[num].namelen = cur->addr_len;
1435 ar[num].addr = cur->domain;
1436 ar[num].addrlen = cur->domain_len;
1437 num ++;
1438 }
1439 }
1440
1441 qsort (ar, num, sizeof (*ar), addr_list_cmp_func);
1442
1443 /* Cycle all elements in array */
1444 for (i = 0; i < num; i++) {
1445 if (i < num - 1) {
1446 if (ar[i].namelen == ar[i + 1].namelen) {
1447 if (rspamd_lc_cmp (ar[i].name, ar[i + 1].name, COMPARE_RCPT_LEN) == 0) {
1448 hits++;
1449 }
1450 }
1451 }
1452 }
1453
1454 if ((hits * num / 2.) / (double)num >= threshold) {
1455 return TRUE;
1456 }
1457
1458 return FALSE;
1459 }
1460
1461 gboolean
rspamd_has_only_html_part(struct rspamd_task * task,GArray * args,void * unused)1462 rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
1463 void *unused)
1464 {
1465 struct rspamd_mime_text_part *p;
1466 guint i, cnt_html = 0, cnt_txt = 0;
1467
1468 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
1469 p = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0);
1470
1471 if (!IS_TEXT_PART_ATTACHMENT (p)) {
1472 if (IS_TEXT_PART_HTML (p)) {
1473 cnt_html++;
1474 }
1475 else {
1476 cnt_txt++;
1477 }
1478 }
1479 }
1480
1481 return (cnt_html > 0 && cnt_txt == 0);
1482 }
1483
1484 static gboolean
is_recipient_list_sorted(GPtrArray * ar)1485 is_recipient_list_sorted (GPtrArray *ar)
1486 {
1487 struct rspamd_email_address *addr;
1488 gboolean res = TRUE;
1489 rspamd_ftok_t cur, prev;
1490 gint i;
1491
1492 /* Do not check to short address lists */
1493 if (ar == NULL || ar->len < MIN_RCPT_TO_COMPARE) {
1494 return FALSE;
1495 }
1496
1497 prev.len = 0;
1498 prev.begin = NULL;
1499
1500 PTR_ARRAY_FOREACH (ar, i, addr) {
1501 cur.begin = addr->addr;
1502 cur.len = addr->addr_len;
1503
1504 if (prev.len != 0) {
1505 if (rspamd_ftok_casecmp (&cur, &prev) <= 0) {
1506 res = FALSE;
1507 break;
1508 }
1509 }
1510
1511 prev = cur;
1512 }
1513
1514 return res;
1515 }
1516
1517 gboolean
rspamd_is_recipients_sorted(struct rspamd_task * task,GArray * args,void * unused)1518 rspamd_is_recipients_sorted (struct rspamd_task * task,
1519 GArray * args,
1520 void *unused)
1521 {
1522 /* Check all types of addresses */
1523
1524 if (MESSAGE_FIELD (task, rcpt_mime)) {
1525 return is_recipient_list_sorted (MESSAGE_FIELD (task, rcpt_mime));
1526 }
1527
1528 return FALSE;
1529 }
1530
1531 gboolean
rspamd_compare_transfer_encoding(struct rspamd_task * task,GArray * args,void * unused)1532 rspamd_compare_transfer_encoding (struct rspamd_task * task,
1533 GArray * args,
1534 void *unused)
1535 {
1536 struct expression_argument *arg;
1537 guint i;
1538 struct rspamd_mime_part *part;
1539 enum rspamd_cte cte;
1540
1541 if (args == NULL) {
1542 msg_warn_task ("no parameters to function");
1543 return FALSE;
1544 }
1545
1546 arg = &g_array_index (args, struct expression_argument, 0);
1547 if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1548 msg_warn_task ("invalid argument to function is passed");
1549 return FALSE;
1550 }
1551
1552 cte = rspamd_cte_from_string (arg->data);
1553
1554 if (cte == RSPAMD_CTE_UNKNOWN) {
1555 msg_warn_task ("unknown cte: %s", arg->data);
1556 return FALSE;
1557 }
1558
1559 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
1560 if (IS_PART_TEXT (part)) {
1561 if (part->cte == cte) {
1562 return TRUE;
1563 }
1564 }
1565 }
1566
1567 return FALSE;
1568 }
1569
1570 gboolean
rspamd_is_html_balanced(struct rspamd_task * task,GArray * args,void * unused)1571 rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
1572 {
1573 /* Totally broken but seems to be never used */
1574 return TRUE;
1575 }
1576
1577 gboolean
rspamd_has_html_tag(struct rspamd_task * task,GArray * args,void * unused)1578 rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
1579 {
1580 struct rspamd_mime_text_part *p;
1581 struct expression_argument *arg;
1582 guint i;
1583 gboolean res = FALSE;
1584
1585 if (args == NULL) {
1586 msg_warn_task ("no parameters to function");
1587 return FALSE;
1588 }
1589
1590 arg = &g_array_index (args, struct expression_argument, 0);
1591 if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1592 msg_warn_task ("invalid argument to function is passed");
1593 return FALSE;
1594 }
1595
1596 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
1597 if (IS_TEXT_PART_HTML (p) && p->html) {
1598 res = rspamd_html_tag_seen (p->html, arg->data);
1599 }
1600
1601 if (res) {
1602 break;
1603 }
1604 }
1605
1606 return res;
1607
1608 }
1609
1610 gboolean
rspamd_has_fake_html(struct rspamd_task * task,GArray * args,void * unused)1611 rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
1612 {
1613 struct rspamd_mime_text_part *p;
1614 guint i;
1615 gboolean res = FALSE;
1616
1617 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
1618 if (IS_TEXT_PART_HTML (p) && (rspamd_html_get_tags_count(p->html) < 2)) {
1619 res = TRUE;
1620 }
1621
1622 if (res) {
1623 break;
1624 }
1625 }
1626
1627 return res;
1628
1629 }
1630
1631 static gboolean
rspamd_raw_header_exists(struct rspamd_task * task,GArray * args,void * unused)1632 rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused)
1633 {
1634 struct expression_argument *arg;
1635
1636 if (args == NULL || task == NULL) {
1637 return FALSE;
1638 }
1639
1640 arg = &g_array_index (args, struct expression_argument, 0);
1641 if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1642 msg_warn_task ("invalid argument to function is passed");
1643 return FALSE;
1644 }
1645
1646 return rspamd_message_get_header_array(task, arg->data, FALSE) != NULL;
1647 }
1648
1649 static gboolean
match_smtp_data(struct rspamd_task * task,struct expression_argument * arg,const gchar * what,gsize len)1650 match_smtp_data (struct rspamd_task *task,
1651 struct expression_argument *arg,
1652 const gchar *what, gsize len)
1653 {
1654 rspamd_regexp_t *re;
1655 gint r = 0;
1656
1657 if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
1658 /* This is a regexp */
1659 re = arg->data;
1660 if (re == NULL) {
1661 msg_warn_task ("cannot compile regexp for function");
1662 return FALSE;
1663 }
1664
1665
1666 if (len > 0) {
1667 r = rspamd_regexp_search (re, what, len, NULL, NULL, FALSE, NULL);
1668 }
1669
1670 return r;
1671 }
1672 else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
1673 g_ascii_strncasecmp (arg->data, what, len) == 0) {
1674 return TRUE;
1675 }
1676
1677 return FALSE;
1678 }
1679
1680 static gboolean
rspamd_check_smtp_data(struct rspamd_task * task,GArray * args,void * unused)1681 rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused)
1682 {
1683 struct expression_argument *arg;
1684 struct rspamd_email_address *addr = NULL;
1685 GPtrArray *rcpts = NULL;
1686 const gchar *type, *str = NULL;
1687 guint i;
1688
1689 if (args == NULL) {
1690 msg_warn_task ("no parameters to function");
1691 return FALSE;
1692 }
1693
1694 arg = &g_array_index (args, struct expression_argument, 0);
1695
1696 if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
1697 msg_warn_task ("no parameters to function");
1698 return FALSE;
1699 }
1700 else {
1701 type = arg->data;
1702 switch (*type) {
1703 case 'f':
1704 case 'F':
1705 if (g_ascii_strcasecmp (type, "from") == 0) {
1706 addr = rspamd_task_get_sender (task);
1707 }
1708 else {
1709 msg_warn_task ("bad argument to function: %s", type);
1710 return FALSE;
1711 }
1712 break;
1713 case 'h':
1714 case 'H':
1715 if (g_ascii_strcasecmp (type, "helo") == 0) {
1716 str = task->helo;
1717 }
1718 else {
1719 msg_warn_task ("bad argument to function: %s", type);
1720 return FALSE;
1721 }
1722 break;
1723 case 'u':
1724 case 'U':
1725 if (g_ascii_strcasecmp (type, "user") == 0) {
1726 str = task->user;
1727 }
1728 else {
1729 msg_warn_task ("bad argument to function: %s", type);
1730 return FALSE;
1731 }
1732 break;
1733 case 's':
1734 case 'S':
1735 if (g_ascii_strcasecmp (type, "subject") == 0) {
1736 str = MESSAGE_FIELD (task, subject);
1737 }
1738 else {
1739 msg_warn_task ("bad argument to function: %s", type);
1740 return FALSE;
1741 }
1742 break;
1743 case 'r':
1744 case 'R':
1745 if (g_ascii_strcasecmp (type, "rcpt") == 0) {
1746 rcpts = task->rcpt_envelope;
1747 }
1748 else {
1749 msg_warn_task ("bad argument to function: %s", type);
1750 return FALSE;
1751 }
1752 break;
1753 default:
1754 msg_warn_task ("bad argument to function: %s", type);
1755 return FALSE;
1756 }
1757 }
1758
1759 if (str == NULL && addr == NULL && rcpts == NULL) {
1760 /* Not enough data so regexp would NOT be found anyway */
1761 return FALSE;
1762 }
1763
1764 /* We would process only one more argument, others are ignored */
1765 if (args->len >= 2) {
1766 arg = &g_array_index (args, struct expression_argument, 1);
1767
1768 if (arg) {
1769 if (str != NULL) {
1770 return match_smtp_data (task, arg, str, strlen (str));
1771 }
1772 else if (addr != NULL && addr->addr) {
1773 return match_smtp_data (task, arg, addr->addr, addr->addr_len);
1774 }
1775 else {
1776 if (rcpts != NULL) {
1777 for (i = 0; i < rcpts->len; i ++) {
1778 addr = g_ptr_array_index (rcpts, i);
1779
1780 if (addr && addr->addr &&
1781 match_smtp_data (task, arg,
1782 addr->addr, addr->addr_len)) {
1783 return TRUE;
1784 }
1785 }
1786 }
1787 }
1788 }
1789 }
1790
1791 return FALSE;
1792 }
1793
1794 static inline gboolean
rspamd_check_ct_attr(const gchar * begin,gsize len,struct expression_argument * arg_pattern)1795 rspamd_check_ct_attr (const gchar *begin, gsize len,
1796 struct expression_argument *arg_pattern)
1797 {
1798 rspamd_regexp_t *re;
1799 gboolean r = FALSE;
1800
1801 if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
1802 re = arg_pattern->data;
1803
1804 if (len > 0) {
1805 r = rspamd_regexp_search (re,
1806 begin, len,
1807 NULL, NULL, FALSE, NULL);
1808 }
1809
1810 if (r) {
1811 return TRUE;
1812 }
1813 }
1814 else {
1815 /* Just do strcasecmp */
1816 gsize plen = strlen (arg_pattern->data);
1817
1818 if (plen == len &&
1819 g_ascii_strncasecmp (arg_pattern->data, begin, len) == 0) {
1820 return TRUE;
1821 }
1822 }
1823
1824 return FALSE;
1825 }
1826
1827 static gboolean
rspamd_content_type_compare_param(struct rspamd_task * task,GArray * args,void * unused)1828 rspamd_content_type_compare_param (struct rspamd_task * task,
1829 GArray * args,
1830 void *unused)
1831 {
1832
1833 struct expression_argument *arg, *arg1, *arg_pattern;
1834 gboolean recursive = FALSE;
1835 struct rspamd_mime_part *cur_part;
1836 guint i;
1837 rspamd_ftok_t srch;
1838 struct rspamd_content_type_param *found = NULL, *cur;
1839 const gchar *param_name;
1840
1841 if (args == NULL || args->len < 2) {
1842 msg_warn_task ("no parameters to function");
1843 return FALSE;
1844 }
1845
1846 arg = &g_array_index (args, struct expression_argument, 0);
1847 g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
1848 param_name = arg->data;
1849 arg_pattern = &g_array_index (args, struct expression_argument, 1);
1850
1851 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
1852 if (args->len >= 3) {
1853 arg1 = &g_array_index (args, struct expression_argument, 2);
1854 if (g_ascii_strncasecmp (arg1->data, "true",
1855 sizeof ("true") - 1) == 0) {
1856 recursive = TRUE;
1857 }
1858 }
1859 else {
1860 /*
1861 * If user did not specify argument, let's assume that he wants
1862 * recursive search if mime part is multipart/mixed
1863 */
1864 if (IS_PART_MULTIPART (cur_part)) {
1865 recursive = TRUE;
1866 }
1867 }
1868
1869 rspamd_ftok_t lit;
1870 RSPAMD_FTOK_FROM_STR (&srch, param_name);
1871 RSPAMD_FTOK_FROM_STR (&lit, "charset");
1872
1873 if (rspamd_ftok_equal (&srch, &lit)) {
1874 if (rspamd_check_ct_attr (cur_part->ct->charset.begin,
1875 cur_part->ct->charset.len, arg_pattern)) {
1876 return TRUE;
1877 }
1878 }
1879
1880 RSPAMD_FTOK_FROM_STR (&lit, "boundary");
1881 if (rspamd_ftok_equal (&srch, &lit)) {
1882 if (rspamd_check_ct_attr (cur_part->ct->orig_boundary.begin,
1883 cur_part->ct->orig_boundary.len, arg_pattern)) {
1884 return TRUE;
1885 }
1886 }
1887
1888 if (cur_part->ct->attrs) {
1889 found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
1890
1891 if (found) {
1892 DL_FOREACH (found, cur) {
1893 if (rspamd_check_ct_attr (cur->value.begin,
1894 cur->value.len, arg_pattern)) {
1895 return TRUE;
1896 }
1897 }
1898 }
1899 }
1900
1901 if (!recursive) {
1902 break;
1903 }
1904 }
1905
1906 return FALSE;
1907 }
1908
1909 static gboolean
rspamd_content_type_has_param(struct rspamd_task * task,GArray * args,void * unused)1910 rspamd_content_type_has_param (struct rspamd_task * task,
1911 GArray * args,
1912 void *unused)
1913 {
1914 struct expression_argument *arg, *arg1;
1915 gboolean recursive = FALSE;
1916 struct rspamd_mime_part *cur_part;
1917 guint i;
1918 rspamd_ftok_t srch;
1919 struct rspamd_content_type_param *found = NULL;
1920 const gchar *param_name;
1921
1922 if (args == NULL || args->len < 1) {
1923 msg_warn_task ("no parameters to function");
1924 return FALSE;
1925 }
1926
1927 arg = &g_array_index (args, struct expression_argument, 0);
1928 g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
1929 param_name = arg->data;
1930
1931 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
1932 if (args->len >= 2) {
1933 arg1 = &g_array_index (args, struct expression_argument, 1);
1934 if (g_ascii_strncasecmp (arg1->data, "true",
1935 sizeof ("true") - 1) == 0) {
1936 recursive = TRUE;
1937 }
1938 }
1939 else {
1940 /*
1941 * If user did not specify argument, let's assume that he wants
1942 * recursive search if mime part is multipart/mixed
1943 */
1944 if (IS_PART_MULTIPART (cur_part)) {
1945 recursive = TRUE;
1946 }
1947 }
1948
1949
1950 rspamd_ftok_t lit;
1951 RSPAMD_FTOK_FROM_STR (&srch, param_name);
1952 RSPAMD_FTOK_FROM_STR (&lit, "charset");
1953
1954 if (rspamd_ftok_equal (&srch, &lit)) {
1955 if (cur_part->ct->charset.len > 0) {
1956 return TRUE;
1957 }
1958 }
1959
1960 RSPAMD_FTOK_FROM_STR (&lit, "boundary");
1961 if (rspamd_ftok_equal (&srch, &lit)) {
1962 if (cur_part->ct->boundary.len > 0) {
1963 return TRUE;
1964 }
1965 }
1966
1967 if (cur_part->ct->attrs) {
1968 found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
1969
1970 if (found) {
1971 return TRUE;
1972 }
1973 }
1974
1975 if (!recursive) {
1976 break;
1977 }
1978 }
1979
1980 return FALSE;
1981 }
1982
1983 static gboolean
rspamd_content_type_check(struct rspamd_task * task,GArray * args,gboolean check_subtype)1984 rspamd_content_type_check (struct rspamd_task *task,
1985 GArray * args,
1986 gboolean check_subtype)
1987 {
1988 rspamd_ftok_t *param_data, srch;
1989 rspamd_regexp_t *re;
1990 struct expression_argument *arg1, *arg_pattern;
1991 struct rspamd_content_type *ct;
1992 gint r = 0;
1993 guint i;
1994 gboolean recursive = FALSE;
1995 struct rspamd_mime_part *cur_part;
1996
1997 if (args == NULL || args->len < 1) {
1998 msg_warn_task ("no parameters to function");
1999 return FALSE;
2000 }
2001
2002 arg_pattern = &g_array_index (args, struct expression_argument, 0);
2003
2004 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
2005 ct = cur_part->ct;
2006
2007 if (args->len >= 2) {
2008 arg1 = &g_array_index (args, struct expression_argument, 1);
2009 if (g_ascii_strncasecmp (arg1->data, "true",
2010 sizeof ("true") - 1) == 0) {
2011 recursive = TRUE;
2012 }
2013 }
2014 else {
2015 /*
2016 * If user did not specify argument, let's assume that he wants
2017 * recursive search if mime part is multipart/mixed
2018 */
2019 if (IS_PART_MULTIPART (cur_part)) {
2020 recursive = TRUE;
2021 }
2022 }
2023
2024 if (check_subtype) {
2025 param_data = &ct->subtype;
2026 }
2027 else {
2028 param_data = &ct->type;
2029 }
2030
2031 if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
2032 re = arg_pattern->data;
2033
2034 if (param_data->len > 0) {
2035 r = rspamd_regexp_search (re, param_data->begin, param_data->len,
2036 NULL, NULL, FALSE, NULL);
2037 }
2038
2039 if (r) {
2040 return TRUE;
2041 }
2042 }
2043 else {
2044 /* Just do strcasecmp */
2045 srch.begin = arg_pattern->data;
2046 srch.len = strlen (arg_pattern->data);
2047
2048 if (rspamd_ftok_casecmp (param_data, &srch) == 0) {
2049 return TRUE;
2050 }
2051 }
2052
2053 /* Get next part */
2054 if (!recursive) {
2055 break;
2056 }
2057 }
2058
2059 return FALSE;
2060 }
2061
2062 static gboolean
rspamd_content_type_is_type(struct rspamd_task * task,GArray * args,void * unused)2063 rspamd_content_type_is_type (struct rspamd_task * task,
2064 GArray * args,
2065 void *unused)
2066 {
2067 return rspamd_content_type_check (task, args, FALSE);
2068 }
2069
2070 static gboolean
rspamd_content_type_is_subtype(struct rspamd_task * task,GArray * args,void * unused)2071 rspamd_content_type_is_subtype (struct rspamd_task * task,
2072 GArray * args,
2073 void *unused)
2074 {
2075 return rspamd_content_type_check (task, args, TRUE);
2076 }
2077
2078 static gboolean
compare_subtype(struct rspamd_task * task,struct rspamd_content_type * ct,struct expression_argument * subtype)2079 compare_subtype (struct rspamd_task *task, struct rspamd_content_type *ct,
2080 struct expression_argument *subtype)
2081 {
2082 rspamd_regexp_t *re;
2083 rspamd_ftok_t srch;
2084 gint r = 0;
2085
2086 if (subtype == NULL || ct == NULL) {
2087 msg_warn_task ("invalid parameters passed");
2088 return FALSE;
2089 }
2090 if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
2091 re = subtype->data;
2092
2093 if (ct->subtype.len > 0) {
2094 r = rspamd_regexp_search (re, ct->subtype.begin, ct->subtype.len,
2095 NULL, NULL, FALSE, NULL);
2096 }
2097 }
2098 else {
2099 srch.begin = subtype->data;
2100 srch.len = strlen (subtype->data);
2101
2102 /* Just do strcasecmp */
2103 if (rspamd_ftok_casecmp (&ct->subtype, &srch) == 0) {
2104 return TRUE;
2105 }
2106 }
2107
2108 return r;
2109 }
2110
2111 static gboolean
compare_len(struct rspamd_mime_part * part,guint min,guint max)2112 compare_len (struct rspamd_mime_part *part, guint min, guint max)
2113 {
2114 if (min == 0 && max == 0) {
2115 return TRUE;
2116 }
2117
2118 if (min == 0) {
2119 return part->parsed_data.len <= max;
2120 }
2121 else if (max == 0) {
2122 return part->parsed_data.len >= min;
2123 }
2124 else {
2125 return part->parsed_data.len >= min && part->parsed_data.len <= max;
2126 }
2127 }
2128
2129 static gboolean
common_has_content_part(struct rspamd_task * task,struct expression_argument * param_type,struct expression_argument * param_subtype,gint min_len,gint max_len)2130 common_has_content_part (struct rspamd_task * task,
2131 struct expression_argument *param_type,
2132 struct expression_argument *param_subtype,
2133 gint min_len,
2134 gint max_len)
2135 {
2136 rspamd_regexp_t *re;
2137 struct rspamd_mime_part *part;
2138 struct rspamd_content_type *ct;
2139 rspamd_ftok_t srch;
2140 gint r = 0;
2141 guint i;
2142
2143 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
2144 ct = part->ct;
2145
2146 if (ct == NULL) {
2147 continue;
2148 }
2149
2150 if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
2151 re = param_type->data;
2152
2153 if (ct->type.len > 0) {
2154 r = rspamd_regexp_search (re, ct->type.begin, ct->type.len,
2155 NULL, NULL, FALSE, NULL);
2156 }
2157
2158 /* Also check subtype and length of the part */
2159 if (r && param_subtype) {
2160 r = compare_len (part, min_len, max_len) &&
2161 compare_subtype (task, ct, param_subtype);
2162
2163 return r;
2164 }
2165 }
2166 else {
2167 /* Just do strcasecmp */
2168 srch.begin = param_type->data;
2169 srch.len = strlen (param_type->data);
2170
2171 if (rspamd_ftok_casecmp (&ct->type, &srch) == 0) {
2172 if (param_subtype) {
2173 if (compare_subtype (task, ct, param_subtype)) {
2174 if (compare_len (part, min_len, max_len)) {
2175 return TRUE;
2176 }
2177 }
2178 }
2179 else {
2180 if (compare_len (part, min_len, max_len)) {
2181 return TRUE;
2182 }
2183 }
2184 }
2185 }
2186 }
2187
2188 return FALSE;
2189 }
2190
2191 static gboolean
rspamd_has_content_part(struct rspamd_task * task,GArray * args,void * unused)2192 rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused)
2193 {
2194 struct expression_argument *param_type = NULL, *param_subtype = NULL;
2195
2196 if (args == NULL) {
2197 msg_warn_task ("no parameters to function");
2198 return FALSE;
2199 }
2200
2201 param_type = &g_array_index (args, struct expression_argument, 0);
2202 if (args->len >= 2) {
2203 param_subtype = &g_array_index (args, struct expression_argument, 1);
2204 }
2205
2206 return common_has_content_part (task, param_type, param_subtype, 0, 0);
2207 }
2208
2209 static gboolean
rspamd_has_content_part_len(struct rspamd_task * task,GArray * args,void * unused)2210 rspamd_has_content_part_len (struct rspamd_task * task,
2211 GArray * args,
2212 void *unused)
2213 {
2214 struct expression_argument *param_type = NULL, *param_subtype = NULL;
2215 gint min = 0, max = 0;
2216 struct expression_argument *arg;
2217
2218 if (args == NULL) {
2219 msg_warn_task ("no parameters to function");
2220 return FALSE;
2221 }
2222
2223 param_type = &g_array_index (args, struct expression_argument, 0);
2224
2225 if (args->len >= 2) {
2226 param_subtype = &g_array_index (args, struct expression_argument, 1);
2227
2228 if (args->len >= 3) {
2229 arg = &g_array_index (args, struct expression_argument, 2);
2230 errno = 0;
2231 min = strtoul (arg->data, NULL, 10);
2232 g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
2233
2234 if (errno != 0) {
2235 msg_warn_task ("invalid numeric value '%s': %s",
2236 (gchar *)arg->data,
2237 strerror (errno));
2238 return FALSE;
2239 }
2240
2241 if (args->len >= 4) {
2242 arg = &g_array_index (args, struct expression_argument, 3);
2243 g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
2244 max = strtoul (arg->data, NULL, 10);
2245
2246 if (errno != 0) {
2247 msg_warn_task ("invalid numeric value '%s': %s",
2248 (gchar *)arg->data,
2249 strerror (errno));
2250 return FALSE;
2251 }
2252 }
2253 }
2254 }
2255
2256 return common_has_content_part (task, param_type, param_subtype, min, max);
2257 }
2258
2259 static gboolean
rspamd_is_empty_body(struct rspamd_task * task,GArray * args,void * unused)2260 rspamd_is_empty_body (struct rspamd_task *task,
2261 GArray * args,
2262 void *unused)
2263 {
2264 struct rspamd_mime_part *part;
2265 guint i;
2266
2267 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
2268 if (part->parsed_data.len > 0) {
2269 return FALSE;
2270 }
2271 }
2272
2273 return TRUE;
2274 }
2275
2276 #define TASK_FLAG_READ(flag) do { \
2277 result = !!(task->flags & (flag)); \
2278 } while(0)
2279
2280 #define TASK_GET_FLAG(flag, strname, macro) do { \
2281 if (!found && strcmp ((flag), strname) == 0) { \
2282 TASK_FLAG_READ((macro)); \
2283 found = TRUE; \
2284 } \
2285 } while(0)
2286
2287 #define TASK_PROTOCOL_FLAG_READ(flag) do { \
2288 result = !!(task->protocol_flags & (flag)); \
2289 } while(0)
2290
2291 #define TASK_GET_PROTOCOL_FLAG(flag, strname, macro) do { \
2292 if (!found && strcmp ((flag), strname) == 0) { \
2293 TASK_PROTOCOL_FLAG_READ((macro)); \
2294 found = TRUE; \
2295 } \
2296 } while(0)
2297
2298
2299 static gboolean
rspamd_has_flag_expr(struct rspamd_task * task,GArray * args,void * unused)2300 rspamd_has_flag_expr (struct rspamd_task *task,
2301 GArray * args,
2302 void *unused)
2303 {
2304 gboolean found = FALSE, result = FALSE;
2305 struct expression_argument *flag_arg;
2306 const gchar *flag_str;
2307
2308 if (args == NULL) {
2309 msg_warn_task ("no parameters to function");
2310 return FALSE;
2311 }
2312
2313 flag_arg = &g_array_index (args, struct expression_argument, 0);
2314
2315 if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
2316 msg_warn_task ("invalid parameter to function");
2317 return FALSE;
2318 }
2319
2320 flag_str = (const gchar *)flag_arg->data;
2321
2322 TASK_GET_FLAG (flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
2323 TASK_GET_FLAG (flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG);
2324 TASK_GET_FLAG (flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT);
2325 TASK_GET_FLAG (flag_str, "skip", RSPAMD_TASK_FLAG_SKIP);
2326 TASK_GET_PROTOCOL_FLAG (flag_str, "extended_urls",
2327 RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
2328 TASK_GET_FLAG (flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM);
2329 TASK_GET_FLAG (flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM);
2330 TASK_GET_FLAG (flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED);
2331 TASK_GET_FLAG (flag_str, "broken_headers",
2332 RSPAMD_TASK_FLAG_BROKEN_HEADERS);
2333 TASK_GET_FLAG (flag_str, "skip_process",
2334 RSPAMD_TASK_FLAG_SKIP_PROCESS);
2335 TASK_GET_PROTOCOL_FLAG (flag_str, "milter",
2336 RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
2337 TASK_GET_FLAG (flag_str, "bad_unicode",
2338 RSPAMD_TASK_FLAG_BAD_UNICODE);
2339
2340 if (!found) {
2341 msg_warn_task ("invalid flag name %s", flag_str);
2342 return FALSE;
2343 }
2344
2345 return result;
2346 }
2347
2348 static gboolean
rspamd_has_symbol_expr(struct rspamd_task * task,GArray * args,void * unused)2349 rspamd_has_symbol_expr (struct rspamd_task *task,
2350 GArray * args,
2351 void *unused)
2352 {
2353 struct expression_argument *sym_arg;
2354 const gchar *symbol_str;
2355
2356 if (args == NULL) {
2357 msg_warn_task ("no parameters to function");
2358 return FALSE;
2359 }
2360
2361 sym_arg = &g_array_index (args, struct expression_argument, 0);
2362
2363 if (sym_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
2364 msg_warn_task ("invalid parameter to function");
2365 return FALSE;
2366 }
2367
2368 symbol_str = (const gchar *)sym_arg->data;
2369
2370 if (rspamd_task_find_symbol_result (task, symbol_str, NULL)) {
2371 return TRUE;
2372 }
2373
2374 return FALSE;
2375 }
2376