1--[[
2Copyright (c) 2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15]]--
16
17if confighelp then
18  return
19end
20
21-- This plugin is intended to read and parse spamassassin rules with regexp
22-- rules. SA plugins or statistics are not supported
23
24local E = {}
25local N = 'spamassassin'
26
27local rspamd_logger = require "rspamd_logger"
28local rspamd_regexp = require "rspamd_regexp"
29local rspamd_expression = require "rspamd_expression"
30local rspamd_trie = require "rspamd_trie"
31local util = require "rspamd_util"
32local lua_util = require "lua_util"
33local fun = require "fun"
34
35-- Known plugins
36local known_plugins = {
37  'Mail::SpamAssassin::Plugin::FreeMail',
38  'Mail::SpamAssassin::Plugin::HeaderEval',
39  'Mail::SpamAssassin::Plugin::ReplaceTags',
40  'Mail::SpamAssassin::Plugin::RelayEval',
41  'Mail::SpamAssassin::Plugin::MIMEEval',
42  'Mail::SpamAssassin::Plugin::BodyEval',
43  'Mail::SpamAssassin::Plugin::MIMEHeader',
44  'Mail::SpamAssassin::Plugin::WLBLEval',
45  'Mail::SpamAssassin::Plugin::HTMLEval',
46}
47
48-- Table that replaces SA symbol with rspamd equivalent
49-- Used for dependency resolution
50local symbols_replacements = {
51  -- SPF replacements
52  USER_IN_SPF_WHITELIST = 'WHITELIST_SPF',
53  USER_IN_DEF_SPF_WL = 'WHITELIST_SPF',
54  SPF_PASS = 'R_SPF_ALLOW',
55  SPF_FAIL = 'R_SPF_FAIL',
56  SPF_SOFTFAIL = 'R_SPF_SOFTFAIL',
57  SPF_HELO_PASS = 'R_SPF_ALLOW',
58  SPF_HELLO_FAIL = 'R_SPF_FAIL',
59  SPF_HELLO_SOFTFAIL = 'R_SPF_SOFTFAIL',
60  -- DKIM replacements
61  USER_IN_DKIM_WHITELIST = 'WHITELIST_DKIM',
62  USER_IN_DEF_DKIM_WL = 'WHITELIST_DKIM',
63  DKIM_VALID = 'R_DKIM_ALLOW',
64  -- SURBL replacements
65  URIBL_SBL_A = 'URIBL_SBL',
66  URIBL_DBL_SPAM = 'DBL_SPAM',
67  URIBL_DBL_PHISH = 'DBL_PHISH',
68  URIBL_DBL_MALWARE = 'DBL_MALWARE',
69  URIBL_DBL_BOTNETCC = 'DBL_BOTNET',
70  URIBL_DBL_ABUSE_SPAM = 'DBL_ABUSE',
71  URIBL_DBL_ABUSE_REDIR = 'DBL_ABUSE_REDIR',
72  URIBL_DBL_ABUSE_MALW = 'DBL_ABUSE_MALWARE',
73  URIBL_DBL_ABUSE_BOTCC = 'DBL_ABUSE_BOTNET',
74  URIBL_WS_SURBL = 'WS_SURBL_MULTI',
75  URIBL_PH_SURBL = 'PH_SURBL_MULTI',
76  URIBL_MW_SURBL = 'MW_SURBL_MULTI',
77  URIBL_CR_SURBL = 'CRACKED_SURBL',
78  URIBL_ABUSE_SURBL = 'ABUSE_SURBL',
79  -- Misc rules
80  BODY_URI_ONLY = 'R_EMPTY_IMAGE',
81  HTML_IMAGE_ONLY_04 = 'HTML_SHORT_LINK_IMG_1',
82  HTML_IMAGE_ONLY_08 = 'HTML_SHORT_LINK_IMG_1',
83  HTML_IMAGE_ONLY_12 = 'HTML_SHORT_LINK_IMG_1',
84  HTML_IMAGE_ONLY_16 = 'HTML_SHORT_LINK_IMG_2',
85  HTML_IMAGE_ONLY_20 = 'HTML_SHORT_LINK_IMG_2',
86  HTML_IMAGE_ONLY_24 = 'HTML_SHORT_LINK_IMG_3',
87  HTML_IMAGE_ONLY_28 = 'HTML_SHORT_LINK_IMG_3',
88  HTML_IMAGE_ONLY_32 = 'HTML_SHORT_LINK_IMG_3',
89}
90
91-- Internal variables
92local rules = {}
93local atoms = {}
94local scores = {}
95local scores_added = {}
96local external_deps = {}
97local freemail_domains = {}
98local pcre_only_regexps = {}
99local freemail_trie
100local replace = {
101  tags = {},
102  pre = {},
103  inter = {},
104  post = {},
105  rules = {},
106}
107local internal_regexp = {
108  date_shift = rspamd_regexp.create("^\\(\\s*'((?:-?\\d+)|(?:undef))'\\s*,\\s*'((?:-?\\d+)|(?:undef))'\\s*\\)$")
109}
110
111-- Mail::SpamAssassin::Plugin::WLBLEval plugin
112local sa_lists = {
113  from_blacklist = {},
114  from_whitelist = {},
115  from_def_whitelist = {},
116  to_blacklist = {},
117  to_whitelist = {},
118  elts = 0,
119}
120
121local func_cache = {}
122local section = rspamd_config:get_all_opt("spamassassin")
123if not (section and type(section) == 'table') then
124  rspamd_logger.infox(rspamd_config, 'Module is unconfigured')
125end
126
127-- Minimum score to treat symbols as meta
128local meta_score_alpha = 0.5
129
130-- Maximum size of regexp checked
131local match_limit = 0
132
133-- Default priority of the scores registered in the metric
134-- Historically this is set to 2 allowing SA scores to override Rspamd scores
135local scores_priority = 2
136
137local function split(str, delim)
138  local result = {}
139
140  if not delim then
141    delim = '[^%s]+'
142  end
143
144  for token in string.gmatch(str, delim) do
145    table.insert(result, token)
146  end
147
148  return result
149end
150
151local function replace_symbol(s)
152  local rspamd_symbol = symbols_replacements[s]
153  if not rspamd_symbol then
154    return s, false
155  end
156  return rspamd_symbol, true
157end
158
159local ffi
160if type(jit) == 'table' then
161  ffi = require("ffi")
162  ffi.cdef[[
163    int rspamd_re_cache_type_from_string (const char *str);
164    int rspamd_re_cache_process_ffi (void *ptask,
165        void *pre,
166        int type,
167        const char *type_data,
168        int is_strong);
169]]
170end
171
172local function process_regexp_opt(re, task, re_type, header, strong)
173  --[[
174  -- This is now broken with lua regexp conditions!
175  if type(jit) == 'table' then
176    -- Use ffi call
177    local itype = ffi.C.rspamd_re_cache_type_from_string(re_type)
178
179    if not strong then
180      strong = 0
181    else
182      strong = 1
183    end
184    local iret = ffi.C.rspamd_re_cache_process_ffi (task, re, itype, header, strong)
185
186    return tonumber(iret)
187  else
188    return task:process_regexp(re, re_type, header, strong)
189  end
190  --]]
191  return task:process_regexp(re, re_type, header, strong)
192end
193
194local function is_pcre_only(name)
195  if pcre_only_regexps[name] then
196    rspamd_logger.infox(rspamd_config, 'mark re %s as PCRE only', name)
197    return true
198  end
199  return false
200end
201
202local function handle_header_def(hline, cur_rule)
203  --Now check for modifiers inside header's name
204  local hdrs = split(hline, '[^|]+')
205  local hdr_params = {}
206  local cur_param = {}
207  -- Check if an re is an ordinary re
208  local ordinary = true
209
210  for _,h in ipairs(hdrs) do
211    if h == 'ALL' or h == 'ALL:raw' then
212      ordinary = false
213      cur_rule['type'] = 'function'
214      -- Pack closure
215      local re = cur_rule['re']
216      -- Rule to match all headers
217      rspamd_config:register_regexp({
218        re = re,
219        type = 'allheader',
220        pcre_only = is_pcre_only(cur_rule['symbol']),
221      })
222      cur_rule['function'] = function(task)
223        if not re then
224          rspamd_logger.errx(task, 're is missing for rule %1', h)
225          return 0
226        end
227
228        return process_regexp_opt(re, task, 'allheader')
229      end
230    else
231      local args = split(h, '[^:]+')
232      cur_param['strong'] = false
233      cur_param['raw'] = false
234      cur_param['header'] = args[1]
235
236      if args[2] then
237        -- We have some ops that are required for the header, so it's not ordinary
238        ordinary = false
239      end
240
241      fun.each(function(func)
242          if func == 'addr' then
243            cur_param['function'] = function(str)
244              local addr_parsed = util.parse_mail_address(str)
245              local ret = {}
246              if addr_parsed then
247                for _,elt in ipairs(addr_parsed) do
248                  if elt['addr'] then
249                    table.insert(ret, elt['addr'])
250                  end
251                end
252              end
253
254              return ret
255            end
256          elseif func == 'name' then
257            cur_param['function'] = function(str)
258              local addr_parsed = util.parse_mail_address(str)
259              local ret = {}
260              if addr_parsed then
261                for _,elt in ipairs(addr_parsed) do
262                  if elt['name'] then
263                    table.insert(ret, elt['name'])
264                  end
265                end
266              end
267
268              return ret
269            end
270          elseif func == 'raw' then
271            cur_param['raw'] = true
272          elseif func == 'case' then
273            cur_param['strong'] = true
274          else
275            rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
276              func, cur_rule['symbol'])
277          end
278        end, fun.tail(args))
279
280        local function split_hdr_param(param, headers)
281          for _,hh in ipairs(headers) do
282            local nparam = {}
283            for k,v in pairs(param) do
284              if k ~= 'header' then
285                nparam[k] = v
286              end
287            end
288
289            nparam['header'] = hh
290            table.insert(hdr_params, nparam)
291          end
292        end
293        -- Some header rules require splitting to check of multiple headers
294        if cur_param['header'] == 'MESSAGEID' then
295          -- Special case for spamassassin
296          ordinary = false
297          split_hdr_param(cur_param, {
298            'Message-ID',
299            'X-Message-ID',
300            'Resent-Message-ID'})
301        elseif cur_param['header'] == 'ToCc' then
302          ordinary = false
303          split_hdr_param(cur_param, { 'To', 'Cc', 'Bcc' })
304        else
305          table.insert(hdr_params, cur_param)
306        end
307    end
308
309    cur_rule['ordinary'] = ordinary
310    cur_rule['header'] = hdr_params
311  end
312end
313
314
315local function freemail_search(input)
316  local res = 0
317  local function trie_callback(number, pos)
318    lua_util.debugm(N, rspamd_config, 'Matched pattern %1 at pos %2', freemail_domains[number], pos)
319    res = res + 1
320  end
321
322  if input then
323    freemail_trie:match(input, trie_callback, true)
324  end
325
326  return res
327end
328
329local function gen_eval_rule(arg)
330  local eval_funcs = {
331    {'check_freemail_from', function(task)
332        local from = task:get_from('mime')
333        if from and from[1] then
334          return freemail_search(string.lower(from[1]['addr']))
335        end
336        return 0
337      end},
338    {'check_freemail_replyto',
339      function(task)
340        return freemail_search(task:get_header('Reply-To'))
341      end
342    },
343    {'check_freemail_header',
344      function(task, remain)
345        -- Remain here contains one or two args: header and regexp to match
346        local larg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$")
347        local re = nil
348        if not larg then
349          larg, re = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*,%s*['\"]([^%s]+)['\"]%s*%)$")
350        end
351
352        if larg then
353          local h
354          if larg == 'EnvelopeFrom' then
355            h = task:get_from('smtp')
356            if h then h = h[1]['addr'] end
357          else
358            h = task:get_header(larg)
359          end
360          if h then
361            local hdr_freemail = freemail_search(string.lower(h))
362
363            if hdr_freemail > 0 and re then
364              local r = rspamd_regexp.create_cached(re)
365              if r then
366                if r:match(h) then
367                  return 1
368                end
369                return 0
370              else
371                rspamd_logger.infox(rspamd_config, 'cannot create regexp %1', re)
372                return 0
373              end
374            end
375
376            return hdr_freemail
377          end
378        end
379
380        return 0
381      end
382    },
383    {
384      'check_for_missing_to_header',
385      function (task)
386        local th = task:get_recipients('mime')
387        if not th or #th == 0 then
388          return 1
389        end
390
391        return 0
392      end
393    },
394    {
395      'check_relays_unparseable',
396      function(task)
397        local rh_mime = task:get_header_full('Received')
398        local rh_parsed = task:get_received_headers()
399
400        local rh_cnt = 0
401        if rh_mime then rh_cnt = #rh_mime end
402        local parsed_cnt = 0
403        if rh_parsed then parsed_cnt = #rh_parsed end
404
405        return rh_cnt - parsed_cnt
406      end
407    },
408    {
409      'check_for_shifted_date',
410      function (task, remain)
411        -- Remain here contains two args: start and end hours shift
412        local matches = internal_regexp['date_shift']:search(remain, true, true)
413        if matches and matches[1] then
414          local min_diff = matches[1][2]
415          local max_diff = matches[1][3]
416
417          if min_diff == 'undef' then
418            min_diff = 0
419          else
420            min_diff = tonumber(min_diff) * 3600
421          end
422          if max_diff == 'undef' then
423            max_diff = 0
424          else
425            max_diff = tonumber(max_diff) * 3600
426          end
427
428          -- Now get the difference between Date and message received date
429          local dm = task:get_date { format = 'message', gmt = true}
430          local dt = task:get_date { format = 'connect', gmt = true}
431          local diff = dm - dt
432
433          if (max_diff == 0 and diff >= min_diff) or
434              (min_diff == 0 and diff <= max_diff) or
435              (diff >= min_diff and diff <= max_diff) then
436            return 1
437          end
438        end
439
440        return 0
441      end
442    },
443    {
444      'check_for_mime',
445      function(task, remain)
446        local larg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$")
447
448        if larg then
449          if larg == 'mime_attachment' then
450            local parts = task:get_parts()
451            if parts then
452              for _,p in ipairs(parts) do
453                if p:get_filename() then
454                  return 1
455                end
456              end
457            end
458          else
459            rspamd_logger.infox(task, 'unimplemented mime check %1', arg)
460          end
461        end
462
463        return 0
464      end
465    },
466    {
467      'check_from_in_blacklist',
468      function(task)
469        local from = task:get_from('mime')
470        if ((from or E)[1] or E).addr then
471          if sa_lists['from_blacklist'][string.lower(from[1]['addr'])] then
472            return 1
473          end
474        end
475
476        return 0
477      end
478    },
479    {
480      'check_from_in_whitelist',
481      function(task)
482        local from = task:get_from('mime')
483        if ((from or E)[1] or E).addr then
484          if sa_lists['from_whitelist'][string.lower(from[1]['addr'])] then
485            return 1
486          end
487        end
488
489        return 0
490      end
491    },
492    {
493      'check_from_in_default_whitelist',
494      function(task)
495        local from = task:get_from('mime')
496        if ((from or E)[1] or E).addr then
497          if sa_lists['from_def_whitelist'][string.lower(from[1]['addr'])] then
498            return 1
499          end
500        end
501
502        return 0
503      end
504    },
505    {
506      'check_to_in_blacklist',
507      function(task)
508        local rcpt = task:get_recipients('mime')
509        if rcpt then
510          for _,r in ipairs(rcpt) do
511            if sa_lists['to_blacklist'][string.lower(r['addr'])] then
512              return 1
513            end
514          end
515        end
516
517        return 0
518      end
519    },
520    {
521      'check_to_in_whitelist',
522      function(task)
523        local rcpt = task:get_recipients('mime')
524        if rcpt then
525          for _,r in ipairs(rcpt) do
526            if sa_lists['to_whitelist'][string.lower(r['addr'])] then
527              return 1
528            end
529          end
530        end
531
532        return 0
533      end
534    },
535    {
536      'html_tag_exists',
537      function(task, remain)
538        local tp = task:get_text_parts()
539
540        for _,p in ipairs(tp) do
541          if p:is_html() then
542            local hc = p:get_html()
543
544            if hc:has_tag(remain) then
545              return 1
546            end
547          end
548        end
549
550        return 0
551      end
552    }
553  }
554
555  for _,f in ipairs(eval_funcs) do
556    local pat = string.format('^%s', f[1])
557    local first,last = string.find(arg, pat)
558
559    if first then
560      local func_arg = string.sub(arg, last + 1)
561      return function(task)
562        return f[2](task, func_arg)
563      end
564    end
565  end
566end
567
568-- Returns parser function or nil
569local function maybe_parse_sa_function(line)
570  local arg
571  local elts = split(line, '[^:]+')
572  arg = elts[2]
573
574  lua_util.debugm(N, rspamd_config, 'trying to parse SA function %1 with args %2',
575    elts[1], elts[2])
576  local substitutions = {
577    {'^exists:',
578      function(task) -- filter
579        local hdrs_check
580        if arg == 'MESSAGEID' then
581          hdrs_check = {
582            'Message-ID',
583            'X-Message-ID',
584            'Resent-Message-ID'
585          }
586        elseif arg == 'ToCc' then
587          hdrs_check = { 'To', 'Cc', 'Bcc' }
588        else
589          hdrs_check = {arg}
590        end
591
592        for _,h in ipairs(hdrs_check) do
593          if task:has_header(h) then
594            return 1
595          end
596        end
597        return 0
598      end,
599    },
600    {'^eval:',
601      function(task)
602        local func = func_cache[arg]
603        if not func then
604          func = gen_eval_rule(arg)
605          func_cache[arg] = func
606        end
607
608        if not func then
609          rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %1',
610            arg)
611        else
612          return func(task)
613        end
614
615        return 0
616      end
617    },
618  }
619
620  for _,s in ipairs(substitutions) do
621    if string.find(line, s[1]) then
622      return s[2]
623    end
624  end
625
626  return nil
627end
628
629local function words_to_re(words, start)
630  return table.concat(fun.totable(fun.drop_n(start, words)), " ");
631end
632
633local function process_tflags(rule, flags)
634  fun.each(function(flag)
635    if flag == 'publish' then
636      rule['publish'] = true
637    elseif flag == 'multiple' then
638      rule['multiple'] = true
639    elseif string.match(flag, '^maxhits=(%d+)$') then
640      rule['maxhits'] = tonumber(string.match(flag, '^maxhits=(%d+)$'))
641    elseif flag == 'nice' then
642      rule['nice'] = true
643    end
644  end, fun.drop_n(1, flags))
645
646  if rule['re'] then
647    if rule['maxhits'] then
648      rule['re']:set_max_hits(rule['maxhits'])
649    elseif rule['multiple'] then
650      rule['re']:set_max_hits(0)
651    else
652      rule['re']:set_max_hits(1)
653    end
654  end
655end
656
657local function process_replace(words, tbl)
658  local re = words_to_re(words, 2)
659  tbl[words[2]] = re
660end
661
662local function process_sa_conf(f)
663  local cur_rule = {}
664  local valid_rule = false
665
666  local function insert_cur_rule()
667   if cur_rule['type'] ~= 'meta' and cur_rule['publish'] then
668     -- Create meta rule from this rule
669     local nsym = '__fake' .. cur_rule['symbol']
670     local nrule = {
671       type = 'meta',
672       symbol = cur_rule['symbol'],
673       score = cur_rule['score'],
674       meta = nsym,
675       description = cur_rule['description'],
676     }
677     rules[nrule['symbol']] = nrule
678     cur_rule['symbol'] = nsym
679   end
680   -- We have previous rule valid
681   if not cur_rule['symbol'] then
682     rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
683   end
684   rules[cur_rule['symbol']] = cur_rule
685   cur_rule = {}
686   valid_rule = false
687  end
688
689  local function parse_score(words)
690    if #words == 3 then
691      -- score rule <x>
692      lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[3])
693      return tonumber(words[3])
694    elseif #words == 6 then
695      -- score rule <x1> <x2> <x3> <x4>
696      -- we assume here that bayes and network are enabled and select <x4>
697      lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[6])
698      return tonumber(words[6])
699    else
700      rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
701    end
702
703    return 0
704  end
705
706  local skip_to_endif = false
707  local if_nested = 0
708  for l in f:lines() do
709    (function ()
710    l = lua_util.rspamd_str_trim(l)
711    -- Replace bla=~/re/ with bla =~ /re/ (#2372)
712    l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')
713
714    if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
715      return
716    end
717
718    -- Unbalanced if/endif
719    if if_nested < 0 then if_nested = 0 end
720    if skip_to_endif then
721      if string.match(l, '^endif') then
722        if_nested = if_nested - 1
723
724        if if_nested == 0 then
725          skip_to_endif = false
726        end
727      elseif string.match(l, '^if') then
728        if_nested = if_nested + 1
729      elseif string.match(l, '^else') then
730        -- Else counterpart for if
731        skip_to_endif = false
732      end
733      return
734    else
735      if string.match(l, '^ifplugin') then
736        local ls = split(l)
737
738        if not fun.any(function(pl)
739            if pl == ls[2] then return true end
740            return false
741            end, known_plugins) then
742          skip_to_endif = true
743        end
744        if_nested = if_nested + 1
745      elseif string.match(l, '^if !plugin%(') then
746         local pname = string.match(l, '^if !plugin%(([A-Za-z:]+)%)')
747         if fun.any(function(pl)
748           if pl == pname then return true end
749           return false
750         end, known_plugins) then
751           skip_to_endif = true
752         end
753         if_nested = if_nested + 1
754      elseif string.match(l, '^if') then
755        -- Unknown if
756        skip_to_endif = true
757        if_nested = if_nested + 1
758      elseif string.match(l, '^else') then
759        -- Else counterpart for if
760        skip_to_endif = true
761      elseif string.match(l, '^endif') then
762        if_nested = if_nested - 1
763      end
764    end
765
766    -- Skip comments
767    local words = fun.totable(fun.take_while(
768      function(w) return string.sub(w, 1, 1) ~= '#' end,
769      fun.filter(function(w)
770          return w ~= "" end,
771      fun.iter(split(l)))))
772
773    if words[1] == "header" or words[1] == 'mimeheader' then
774      -- header SYMBOL Header ~= /regexp/
775      if valid_rule then
776        insert_cur_rule()
777      end
778      if words[4] and (words[4] == '=~' or words[4] == '!~') then
779        cur_rule['type'] = 'header'
780        cur_rule['symbol'] = words[2]
781
782        if words[4] == '!~' then
783          cur_rule['not'] = true
784        end
785
786        cur_rule['re_expr'] = words_to_re(words, 4)
787        local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
788        if unset_comp then
789          -- We have optional part that needs to be processed
790          local unset = string.match(string.sub(cur_rule['re_expr'], unset_comp),
791            '%[if%-unset:%s*([^%]%s]+)]')
792          cur_rule['unset'] = unset
793          -- Cut it down
794           cur_rule['re_expr'] = string.sub(cur_rule['re_expr'], 1, unset_comp - 1)
795        end
796
797        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
798
799        if not cur_rule['re'] then
800          rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
801            cur_rule['re_expr'], cur_rule['symbol'])
802        else
803          cur_rule['re']:set_max_hits(1)
804          handle_header_def(words[3], cur_rule)
805        end
806
807        if cur_rule['unset'] then
808          cur_rule['ordinary'] = false
809        end
810
811        if words[1] == 'mimeheader' then
812          cur_rule['mime'] = true
813        else
814          cur_rule['mime'] = false
815        end
816
817        if cur_rule['re'] and cur_rule['symbol'] and
818          (cur_rule['header'] or cur_rule['function']) then
819          valid_rule = true
820          cur_rule['re']:set_max_hits(1)
821          if cur_rule['header'] and cur_rule['ordinary'] then
822            for _,h in ipairs(cur_rule['header']) do
823              if type(h) == 'string' then
824                if cur_rule['mime'] then
825                  rspamd_config:register_regexp({
826                    re = cur_rule['re'],
827                    type = 'mimeheader',
828                    header = h,
829                    pcre_only = is_pcre_only(cur_rule['symbol']),
830                  })
831                else
832                  rspamd_config:register_regexp({
833                    re = cur_rule['re'],
834                    type = 'header',
835                    header = h,
836                    pcre_only = is_pcre_only(cur_rule['symbol']),
837                  })
838                end
839              else
840                h['mime'] = cur_rule['mime']
841                if cur_rule['mime'] then
842                  rspamd_config:register_regexp({
843                    re = cur_rule['re'],
844                    type = 'mimeheader',
845                    header = h['header'],
846                    pcre_only = is_pcre_only(cur_rule['symbol']),
847                  })
848                else
849                  if h['raw'] then
850                    rspamd_config:register_regexp({
851                      re = cur_rule['re'],
852                      type = 'rawheader',
853                      header = h['header'],
854                      pcre_only = is_pcre_only(cur_rule['symbol']),
855                    })
856                  else
857                    rspamd_config:register_regexp({
858                      re = cur_rule['re'],
859                      type = 'header',
860                      header = h['header'],
861                      pcre_only = is_pcre_only(cur_rule['symbol']),
862                    })
863                  end
864                end
865              end
866            end
867            cur_rule['re']:set_limit(match_limit)
868            cur_rule['re']:set_max_hits(1)
869          end
870        end
871      else
872        -- Maybe we know the function and can convert it
873        local args =  words_to_re(words, 2)
874        local func = maybe_parse_sa_function(args)
875
876        if func then
877          cur_rule['type'] = 'function'
878          cur_rule['symbol'] = words[2]
879          cur_rule['function'] = func
880          valid_rule = true
881        else
882          rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
883        end
884      end
885    elseif words[1] == "body" then
886      -- body SYMBOL /regexp/
887      if valid_rule then
888        insert_cur_rule()
889      end
890
891      cur_rule['symbol'] = words[2]
892      if words[3] and (string.sub(words[3], 1, 1) == '/'
893          or string.sub(words[3], 1, 1) == 'm') then
894        cur_rule['type'] = 'sabody'
895        cur_rule['re_expr'] = words_to_re(words, 2)
896        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
897        if cur_rule['re'] then
898
899          rspamd_config:register_regexp({
900            re = cur_rule['re'],
901            type = 'sabody',
902            pcre_only = is_pcre_only(cur_rule['symbol']),
903          })
904          valid_rule = true
905          cur_rule['re']:set_limit(match_limit)
906          cur_rule['re']:set_max_hits(1)
907        end
908      else
909        -- might be function
910        local args = words_to_re(words, 2)
911        local func = maybe_parse_sa_function(args)
912
913        if func then
914          cur_rule['type'] = 'function'
915          cur_rule['symbol'] = words[2]
916          cur_rule['function'] = func
917          valid_rule = true
918        else
919          rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
920        end
921      end
922    elseif words[1] == "rawbody" then
923      -- body SYMBOL /regexp/
924      if valid_rule then
925        insert_cur_rule()
926      end
927
928      cur_rule['symbol'] = words[2]
929      if words[3] and (string.sub(words[3], 1, 1) == '/'
930          or string.sub(words[3], 1, 1) == 'm') then
931        cur_rule['type'] = 'sarawbody'
932        cur_rule['re_expr'] = words_to_re(words, 2)
933        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
934        if cur_rule['re'] then
935
936          rspamd_config:register_regexp({
937            re = cur_rule['re'],
938            type = 'sarawbody',
939            pcre_only = is_pcre_only(cur_rule['symbol']),
940          })
941          valid_rule = true
942          cur_rule['re']:set_limit(match_limit)
943          cur_rule['re']:set_max_hits(1)
944        end
945      else
946        -- might be function
947        local args = words_to_re(words, 2)
948        local func = maybe_parse_sa_function(args)
949
950        if func then
951          cur_rule['type'] = 'function'
952          cur_rule['symbol'] = words[2]
953          cur_rule['function'] = func
954          valid_rule = true
955        else
956          rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
957        end
958      end
959    elseif words[1] == "full" then
960      -- body SYMBOL /regexp/
961      if valid_rule then
962        insert_cur_rule()
963      end
964
965      cur_rule['symbol'] = words[2]
966
967      if words[3] and (string.sub(words[3], 1, 1) == '/'
968          or string.sub(words[3], 1, 1) == 'm') then
969        cur_rule['type'] = 'message'
970        cur_rule['re_expr'] = words_to_re(words, 2)
971        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
972        cur_rule['raw'] = true
973        if cur_rule['re'] then
974          valid_rule = true
975          rspamd_config:register_regexp({
976            re = cur_rule['re'],
977            type = 'body',
978            pcre_only = is_pcre_only(cur_rule['symbol']),
979          })
980          cur_rule['re']:set_limit(match_limit)
981          cur_rule['re']:set_max_hits(1)
982        end
983      else
984        -- might be function
985        local args = words_to_re(words, 2)
986        local func = maybe_parse_sa_function(args)
987
988        if func then
989          cur_rule['type'] = 'function'
990          cur_rule['symbol'] = words[2]
991          cur_rule['function'] = func
992          valid_rule = true
993        else
994          rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
995        end
996      end
997    elseif words[1] == "uri" then
998      -- uri SYMBOL /regexp/
999      if valid_rule then
1000        insert_cur_rule()
1001      end
1002      cur_rule['type'] = 'uri'
1003      cur_rule['symbol'] = words[2]
1004      cur_rule['re_expr'] = words_to_re(words, 2)
1005      cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
1006      if cur_rule['re'] and cur_rule['symbol'] then
1007        valid_rule = true
1008        rspamd_config:register_regexp({
1009          re = cur_rule['re'],
1010          type = 'url',
1011          pcre_only = is_pcre_only(cur_rule['symbol']),
1012        })
1013        cur_rule['re']:set_limit(match_limit)
1014        cur_rule['re']:set_max_hits(1)
1015      end
1016    elseif words[1] == "meta" then
1017      -- meta SYMBOL expression
1018      if valid_rule then
1019        insert_cur_rule()
1020      end
1021      cur_rule['type'] = 'meta'
1022      cur_rule['symbol'] = words[2]
1023      cur_rule['meta'] = words_to_re(words, 2)
1024      if cur_rule['meta'] and cur_rule['symbol']
1025        and cur_rule['meta'] ~= '0' then
1026          valid_rule = true
1027      end
1028    elseif words[1] == "describe" and valid_rule then
1029      cur_rule['description'] = words_to_re(words, 2)
1030    elseif words[1] == "score" then
1031      scores[words[2]] = parse_score(words)
1032    elseif words[1] == 'freemail_domains' then
1033      fun.each(function(dom)
1034          table.insert(freemail_domains, '@' .. dom)
1035        end, fun.drop_n(1, words))
1036    elseif words[1] == 'blacklist_from' then
1037      sa_lists['from_blacklist'][words[2]] = 1
1038      sa_lists['elts'] = sa_lists['elts'] + 1
1039    elseif words[1] == 'whitelist_from' then
1040      sa_lists['from_whitelist'][words[2]] = 1
1041      sa_lists['elts'] = sa_lists['elts'] + 1
1042    elseif words[1] == 'whitelist_to' then
1043      sa_lists['to_whitelist'][words[2]] = 1
1044      sa_lists['elts'] = sa_lists['elts'] + 1
1045    elseif words[1] == 'blacklist_to' then
1046      sa_lists['to_blacklist'][words[2]] = 1
1047      sa_lists['elts'] = sa_lists['elts'] + 1
1048    elseif words[1] == 'tflags' then
1049      process_tflags(cur_rule, words)
1050    elseif words[1] == 'replace_tag' then
1051      process_replace(words, replace['tags'])
1052    elseif words[1] == 'replace_pre' then
1053      process_replace(words, replace['pre'])
1054    elseif words[1] == 'replace_inter' then
1055      process_replace(words, replace['inter'])
1056    elseif words[1] == 'replace_post' then
1057      process_replace(words, replace['post'])
1058    elseif words[1] == 'replace_rules' then
1059      fun.each(function(r) table.insert(replace['rules'], r) end,
1060        fun.drop_n(1, words))
1061    end
1062    end)()
1063  end
1064  if valid_rule then
1065    insert_cur_rule()
1066  end
1067end
1068
1069-- Now check all valid rules and add the according rspamd rules
1070
1071local function calculate_score(sym, rule)
1072  if fun.all(function(c) return c == '_' end, fun.take_n(2, fun.iter(sym))) then
1073    return 0.0
1074  end
1075
1076  if rule['nice'] or (rule['score'] and rule['score'] < 0.0) then
1077    return -1.0
1078  end
1079
1080  return 1.0
1081end
1082
1083local function add_sole_meta(sym, rule)
1084  local r = {
1085    type = 'meta',
1086    meta = rule['symbol'],
1087    score = rule['score'],
1088    description = rule['description']
1089  }
1090  rules[sym] = r
1091end
1092
1093local function sa_regexp_match(data, re, raw, rule)
1094  local res = 0
1095  if not re then
1096    return 0
1097  end
1098  if rule['multiple'] then
1099    local lim = -1
1100    if rule['maxhits'] then
1101      lim = rule['maxhits']
1102    end
1103    res = res + re:matchn(data, lim, raw)
1104  else
1105    if re:match(data, raw) then res = 1 end
1106  end
1107
1108  return res
1109end
1110
1111local function apply_replacements(str)
1112  local pre = ""
1113  local post = ""
1114  local inter = ""
1115
1116  local function check_specific_tag(prefix, s, tbl)
1117    local replacement = nil
1118    local ret = s
1119    fun.each(function(n, t)
1120      local ns,matches = string.gsub(s, string.format("<%s%s>", prefix, n), "")
1121      if matches > 0 then
1122        replacement = t
1123        ret = ns
1124      end
1125    end, tbl)
1126
1127    return ret,replacement
1128  end
1129
1130  local repl
1131  str,repl = check_specific_tag("pre ", str, replace['pre'])
1132  if repl then
1133    pre = repl
1134  end
1135  str,repl = check_specific_tag("inter ", str, replace['inter'])
1136  if repl then
1137    inter = repl
1138  end
1139  str,repl = check_specific_tag("post ", str, replace['post'])
1140  if repl then
1141    post = repl
1142  end
1143
1144  -- XXX: ugly hack
1145  if inter then
1146    str = string.gsub(str, "><", string.format(">%s<", inter))
1147  end
1148
1149  local function replace_all_tags(s)
1150    local sstr
1151    sstr = s
1152    fun.each(function(n, t)
1153      local rep = string.format("%s%s%s", pre, t, post)
1154      rep = string.gsub(rep, '%%', '%%%%')
1155      sstr = string.gsub(sstr, string.format("<%s>", n), rep)
1156    end, replace['tags'])
1157
1158    return sstr
1159  end
1160
1161  local s = replace_all_tags(str)
1162
1163
1164  if str ~= s then
1165    return true,s
1166  end
1167
1168  return false,str
1169end
1170
1171local function parse_atom(str)
1172  local atom = table.concat(fun.totable(fun.take_while(function(c)
1173    if string.find(', \t()><+!|&\n', c) then
1174      return false
1175    end
1176    return true
1177  end, fun.iter(str))), '')
1178
1179  return atom
1180end
1181
1182local function gen_process_atom_cb(result_name, task)
1183  return  function (atom)
1184    local atom_cb = atoms[atom]
1185
1186    if atom_cb then
1187      local res = atom_cb(task, result_name)
1188
1189      if not res then
1190        lua_util.debugm(N, task, 'metric: %s, atom: %s, NULL result', result_name, atom)
1191      elseif res > 0 then
1192        lua_util.debugm(N, task, 'metric: %s, atom: %s, result: %s', result_name, atom, res)
1193      end
1194      return res
1195    else
1196      -- This is likely external atom
1197      local real_sym = atom
1198      if symbols_replacements[atom] then
1199        real_sym = symbols_replacements[atom]
1200      end
1201      if task:has_symbol(real_sym, result_name) then
1202        lua_util.debugm(N, task, 'external atom: %s, result: 1, named_result: %s', real_sym, result_name)
1203        return 1
1204      end
1205      lua_util.debugm(N, task, 'external atom: %s, result: 0, , named_result: %s', real_sym, result_name)
1206    end
1207    return 0
1208  end
1209end
1210
1211local function post_process()
1212  -- Replace rule tags
1213  local ntags = {}
1214  local function rec_replace_tags(tag, tagv)
1215    if ntags[tag] then return ntags[tag] end
1216    fun.each(function(n, t)
1217      if n ~= tag then
1218        local s, matches = string.gsub(tagv, string.format("<%s>", n), t)
1219        if matches > 0 then
1220          ntags[tag] = rec_replace_tags(tag, s)
1221        end
1222      end
1223    end, replace['tags'])
1224
1225    if not ntags[tag] then ntags[tag] = tagv end
1226    return ntags[tag]
1227  end
1228
1229  fun.each(function(n, t)
1230    rec_replace_tags(n, t)
1231  end, replace['tags'])
1232  fun.each(function(n, t)
1233    replace['tags'][n] = t
1234  end, ntags)
1235
1236  fun.each(function(r)
1237    local rule = rules[r]
1238
1239    if rule['re_expr'] and rule['re'] then
1240      local res, nexpr = apply_replacements(rule['re_expr'])
1241      if res then
1242        local nre = rspamd_regexp.create(nexpr)
1243        if not nre then
1244          rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r)
1245          --rule['re'] = nil
1246        else
1247          local old_max_hits = rule['re']:get_max_hits()
1248          lua_util.debugm(N, rspamd_config, 'replace %1 -> %2', r, nexpr)
1249          rspamd_config:replace_regexp({
1250            old_re = rule['re'],
1251            new_re = nre,
1252            pcre_only = is_pcre_only(rule['symbol']),
1253          })
1254          rule['re'] = nre
1255          rule['re_expr'] = nexpr
1256          nre:set_limit(match_limit)
1257          nre:set_max_hits(old_max_hits)
1258        end
1259      end
1260    end
1261  end, replace['rules'])
1262
1263  fun.each(function(key, score)
1264    if rules[key] then
1265      rules[key]['score'] = score
1266    end
1267  end, scores)
1268
1269  -- Header rules
1270  fun.each(function(k, r)
1271    local f = function(task)
1272
1273      local raw = false
1274      local check = {}
1275      -- Cached path for ordinary expressions
1276      if r['ordinary'] then
1277        local h = r['header'][1]
1278        local t = 'header'
1279
1280        if h['raw'] then
1281          t = 'rawheader'
1282        end
1283
1284        if not r['re'] then
1285          rspamd_logger.errx(task, 're is missing for rule %1 (%2 header)', k,
1286            h['header'])
1287          return 0
1288        end
1289
1290        local ret = process_regexp_opt(r.re, task, t, h.header, h.strong)
1291
1292        if r['not'] then
1293          if ret ~= 0 then
1294            ret = 0
1295          else
1296            ret = 1
1297          end
1298        end
1299
1300        return ret
1301      end
1302
1303      -- Slow path
1304      fun.each(function(h)
1305        local hname = h['header']
1306
1307        local hdr
1308        if h['mime'] then
1309          local parts = task:get_parts()
1310          for _, p in ipairs(parts) do
1311            local m_hdr = p:get_header_full(hname, h['strong'])
1312
1313            if m_hdr then
1314              if not hdr then
1315                hdr = {}
1316              end
1317              for _, mh in ipairs(m_hdr) do
1318                table.insert(hdr, mh)
1319              end
1320            end
1321          end
1322        else
1323          hdr = task:get_header_full(hname, h['strong'])
1324        end
1325
1326        if hdr then
1327          for _, rh in ipairs(hdr) do
1328            -- Subject for optimization
1329            local str
1330            if h['raw'] then
1331              str = rh['value']
1332              raw = true
1333            else
1334              str = rh['decoded']
1335            end
1336            if not str then return 0 end
1337
1338            if h['function'] then
1339              str = h['function'](str)
1340            end
1341
1342            if type(str) == 'string' then
1343              table.insert(check, str)
1344            else
1345              for _, c in ipairs(str) do
1346                table.insert(check, c)
1347              end
1348            end
1349          end
1350        elseif r['unset'] then
1351          table.insert(check, r['unset'])
1352        end
1353      end, r['header'])
1354
1355      if #check == 0 then
1356        if r['not'] then return 1 end
1357        return 0
1358      end
1359
1360      local ret = 0
1361      for _, c in ipairs(check) do
1362        local match = sa_regexp_match(c, r['re'], raw, r)
1363        if (match > 0 and not r['not']) or (match == 0 and r['not']) then
1364          ret = 1
1365        end
1366      end
1367
1368      return ret
1369    end
1370    if r['score'] then
1371      local real_score = r['score'] * calculate_score(k, r)
1372      if math.abs(real_score) > meta_score_alpha then
1373        add_sole_meta(k, r)
1374      end
1375    end
1376    atoms[k] = f
1377  end,
1378  fun.filter(function(_, r)
1379      return r['type'] == 'header' and r['header']
1380  end,
1381  rules))
1382
1383  -- Custom function rules
1384  fun.each(function(k, r)
1385    local f = function(task)
1386      local res = r['function'](task)
1387      if res and res > 0 then
1388        return res
1389      end
1390      return 0
1391    end
1392    if r['score'] then
1393      local real_score = r['score'] * calculate_score(k, r)
1394      if math.abs(real_score) > meta_score_alpha then
1395        add_sole_meta(k, r)
1396      end
1397    end
1398    atoms[k] = f
1399  end,
1400    fun.filter(function(_, r)
1401      return r['type'] == 'function' and r['function']
1402    end,
1403      rules))
1404
1405  -- Parts rules
1406  fun.each(function(k, r)
1407    local f = function(task)
1408      if not r['re'] then
1409        rspamd_logger.errx(task, 're is missing for rule %1', k)
1410        return 0
1411      end
1412
1413      local t = 'mime'
1414      if r['raw'] then t = 'rawmime' end
1415
1416      return process_regexp_opt(r.re, task, t)
1417    end
1418    if r['score'] then
1419      local real_score = r['score'] * calculate_score(k, r)
1420      if math.abs(real_score) > meta_score_alpha then
1421        add_sole_meta(k, r)
1422      end
1423    end
1424    atoms[k] = f
1425  end,
1426  fun.filter(function(_, r)
1427      return r['type'] == 'part'
1428  end, rules))
1429
1430  -- SA body rules
1431  fun.each(function(k, r)
1432    local f = function(task)
1433      if not r['re'] then
1434        rspamd_logger.errx(task, 're is missing for rule %1', k)
1435        return 0
1436      end
1437
1438      local t = r['type']
1439
1440      local ret = process_regexp_opt(r.re, task, t)
1441      return ret
1442    end
1443    if r['score'] then
1444      local real_score = r['score'] * calculate_score(k, r)
1445      if math.abs(real_score) > meta_score_alpha then
1446        add_sole_meta(k, r)
1447      end
1448    end
1449    atoms[k] = f
1450  end,
1451  fun.filter(function(_, r)
1452      return r['type'] == 'sabody' or r['type'] == 'message' or r['type'] == 'sarawbody'
1453  end, rules))
1454
1455  -- URL rules
1456  fun.each(function(k, r)
1457    local f = function(task)
1458      if not r['re'] then
1459        rspamd_logger.errx(task, 're is missing for rule %1', k)
1460        return 0
1461      end
1462
1463      return process_regexp_opt(r.re, task, 'url')
1464    end
1465    if r['score'] then
1466      local real_score = r['score'] * calculate_score(k, r)
1467      if math.abs(real_score) > meta_score_alpha then
1468        add_sole_meta(k, r)
1469      end
1470    end
1471    atoms[k] = f
1472  end,
1473    fun.filter(function(_, r)
1474      return r['type'] == 'uri'
1475    end,
1476      rules))
1477  -- Meta rules
1478  fun.each(function(k, r)
1479      local expression = nil
1480      -- Meta function callback
1481      -- Here are dragons!
1482      -- This function can be called from 2 DIFFERENT type of invocations:
1483      -- 1) Invocation from Rspamd itself where `res_name` will be nil
1484      -- 2) Invocation from other meta during expression:process_traced call
1485      -- So we need to distinguish that and return different stuff to be able to deal with atoms
1486      local meta_cb = function(task, res_name)
1487        lua_util.debugm(N, task, 'meta callback for %s; result name: %s', k, res_name)
1488        local cached = task:cache_get('sa_metas_processed')
1489
1490        -- We avoid many task methods invocations here (likely)
1491        if not cached then
1492          cached = {}
1493          task:cache_set('sa_metas_processed', cached)
1494        end
1495
1496        local already_processed = cached[k]
1497
1498        -- Exclude elements that are named in the same way as the symbol itself
1499        local function exclude_sym_filter(sopt)
1500          return sopt ~= k
1501        end
1502
1503        if not (already_processed and already_processed[res_name or 'default']) then
1504          -- Execute symbol
1505          local function exec_symbol(cur_res)
1506            local res,trace = expression:process_traced(gen_process_atom_cb(cur_res, task))
1507            lua_util.debugm(N, task, 'meta result for %s: %s; result name: %s', k, res, cur_res)
1508            if res > 0 then
1509              -- Symbol should be one shot to make it working properly
1510              task:insert_result_named(cur_res, k, res, fun.totable(fun.filter(exclude_sym_filter, trace)))
1511            end
1512
1513            if not cached[k] then
1514              cached[k] = {}
1515            end
1516
1517            cached[k][cur_res] = res
1518          end
1519
1520          if not res_name then
1521            -- Invoke for all named results
1522            local named_results = task:get_all_named_results()
1523            for _,cur_res in ipairs(named_results) do
1524              exec_symbol(cur_res)
1525            end
1526          else
1527            -- Invoked from another meta
1528            exec_symbol(res_name)
1529            return cached[k][res_name] or 0
1530          end
1531        else
1532          -- We have cached the result
1533          local res = already_processed[res_name or 'default'] or 0
1534          lua_util.debugm(N, task, 'cached meta result for %s: %s; result name: %s',
1535              k, res, res_name)
1536
1537          if res_name then
1538            return res
1539          end
1540        end
1541
1542        -- No return if invoked directly from Rspamd as we use task:insert_result_named directly
1543      end
1544
1545      expression = rspamd_expression.create(r['meta'], parse_atom, rspamd_config:get_mempool())
1546      if not expression then
1547        rspamd_logger.errx(rspamd_config, 'Cannot parse expression ' .. r['meta'])
1548      else
1549
1550        if r['score'] then
1551          rspamd_config:set_metric_symbol{
1552            name = k, score = r['score'],
1553            description = r['description'],
1554            priority = scores_priority,
1555            one_shot = true
1556          }
1557          scores_added[k] = 1
1558          rspamd_config:register_symbol{
1559            name = k,
1560            weight = calculate_score(k, r),
1561            callback = meta_cb
1562          }
1563        else
1564          -- Add 0 score to avoid issues
1565          rspamd_config:register_symbol{
1566            name = k,
1567            weight = calculate_score(k, r),
1568            callback = meta_cb,
1569            score = 0,
1570          }
1571        end
1572
1573        r['expression'] = expression
1574
1575        if not atoms[k] then
1576          atoms[k] = meta_cb
1577        end
1578      end
1579    end,
1580    fun.filter(function(_, r)
1581        return r['type'] == 'meta'
1582      end,
1583      rules))
1584
1585  -- Check meta rules for foreign symbols and register dependencies
1586  -- First direct dependencies:
1587  fun.each(function(k, r)
1588      if r['expression'] then
1589        local expr_atoms = r['expression']:atoms()
1590
1591        for _,a in ipairs(expr_atoms) do
1592          if not atoms[a] then
1593            local rspamd_symbol = replace_symbol(a)
1594            if not external_deps[k] then
1595              external_deps[k] = {}
1596            end
1597
1598            if not external_deps[k][rspamd_symbol] then
1599              rspamd_config:register_dependency(k, rspamd_symbol)
1600              external_deps[k][rspamd_symbol] = true
1601              lua_util.debugm(N, rspamd_config,
1602                'atom %1 is a direct foreign dependency, ' ..
1603                'register dependency for %2 on %3',
1604                a, k, rspamd_symbol)
1605            end
1606          end
1607        end
1608      end
1609    end,
1610    fun.filter(function(_, r)
1611      return r['type'] == 'meta'
1612    end,
1613    rules))
1614
1615  -- ... And then indirect ones ...
1616  local nchanges
1617  repeat
1618  nchanges = 0
1619    fun.each(function(k, r)
1620      if r['expression'] then
1621        local expr_atoms = r['expression']:atoms()
1622        for _,a in ipairs(expr_atoms) do
1623          if type(external_deps[a]) == 'table' then
1624            for dep in pairs(external_deps[a]) do
1625              if not external_deps[k] then
1626                external_deps[k] = {}
1627              end
1628              if not external_deps[k][dep] then
1629                rspamd_config:register_dependency(k, dep)
1630                external_deps[k][dep] = true
1631                lua_util.debugm(N, rspamd_config,
1632                  'atom %1 is an indirect foreign dependency, ' ..
1633                  'register dependency for %2 on %3',
1634                  a, k, dep)
1635                  nchanges = nchanges + 1
1636              end
1637            end
1638          else
1639            local rspamd_symbol, replaced_symbol = replace_symbol(a)
1640            if replaced_symbol then
1641              external_deps[a] = {[rspamd_symbol] = true}
1642            else
1643              external_deps[a] = {}
1644            end
1645          end
1646        end
1647      end
1648    end,
1649    fun.filter(function(_, r)
1650      return r['type'] == 'meta'
1651    end,
1652    rules))
1653  until nchanges == 0
1654
1655  -- Set missing symbols
1656  fun.each(function(key, score)
1657    if not scores_added[key] then
1658      rspamd_config:set_metric_symbol({
1659            name = key, score = score,
1660            priority = 2, flags = 'ignore'})
1661    end
1662  end, scores)
1663
1664  -- Logging output
1665  if freemail_domains then
1666    freemail_trie = rspamd_trie.create(freemail_domains)
1667    rspamd_logger.infox(rspamd_config, 'loaded %1 freemail domains definitions',
1668      #freemail_domains)
1669  end
1670  rspamd_logger.infox(rspamd_config, 'loaded %1 blacklist/whitelist elements',
1671      sa_lists['elts'])
1672end
1673
1674local has_rules = false
1675
1676if type(section) == "table" then
1677  local keywords = {
1678    pcre_only = {'table', function(v) pcre_only_regexps = lua_util.list_to_hash(v) end},
1679    alpha = {'number', function(v) meta_score_alpha = tonumber(v) end},
1680    match_limit = {'number', function(v) match_limit = tonumber(v) end},
1681    scores_priority = {'number', function(v) scores_priority = tonumber(v) end},
1682  }
1683
1684  for k, fn in pairs(section) do
1685    local kw = keywords[k]
1686    if kw and type(fn) == kw[1] then
1687      kw[2](fn)
1688    else
1689      -- SA rule file
1690      if type(fn) == 'table' then
1691        for _, elt in ipairs(fn) do
1692          local files = util.glob(elt)
1693
1694          if not files or #files == 0 then
1695            rspamd_logger.errx(rspamd_config, "cannot find any files matching pattern %s", elt)
1696          else
1697            for _,matched in ipairs(files) do
1698              local f = io.open(matched, "r")
1699              if f then
1700                rspamd_logger.infox(rspamd_config, 'loading SA rules from %s', matched)
1701                process_sa_conf(f)
1702                has_rules = true
1703              else
1704                rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
1705              end
1706            end
1707          end
1708        end
1709      else
1710        -- assume string
1711        local files = util.glob(fn)
1712
1713        if not files or #files == 0 then
1714          rspamd_logger.errx(rspamd_config, "cannot find any files matching pattern %s", fn)
1715        else
1716          for _,matched in ipairs(files) do
1717            local f = io.open(matched, "r")
1718            if f then
1719              rspamd_logger.infox(rspamd_config, 'loading SA rules from %s', matched)
1720              process_sa_conf(f)
1721              has_rules = true
1722            else
1723              rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
1724            end
1725          end
1726        end
1727      end
1728    end
1729  end
1730end
1731
1732if has_rules then
1733  post_process()
1734else
1735  lua_util.disable_module(N, "config")
1736end
1737