1--[[
2Copyright (c) 2017, Vsevolod Stakhov <vsevolod@highsecure.ru>
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15]]--
16
17local logger = require "rspamd_logger"
18local lua_util = require "lua_util"
19local rspamd_util = require "rspamd_util"
20local fun = require "fun"
21
22local function is_implicit(t)
23  local mt = getmetatable(t)
24
25  return mt and mt.class and mt.class == 'ucl.type.impl_array'
26end
27
28local function metric_pairs(t)
29  -- collect the keys
30  local keys = {}
31  local implicit_array = is_implicit(t)
32
33  local function gen_keys(tbl)
34    if implicit_array then
35      for _,v in ipairs(tbl) do
36        if v.name then
37          table.insert(keys, {v.name, v})
38          v.name = nil
39        else
40          -- Very tricky to distinguish:
41          -- group {name = "foo" ... } + group "blah" { ... }
42          for gr_name,gr in pairs(v) do
43            if type(gr_name) ~= 'number' then
44              -- We can also have implicit arrays here
45              local gr_implicit = is_implicit(gr)
46
47              if gr_implicit then
48                for _,gr_elt in ipairs(gr) do
49                  table.insert(keys, {gr_name, gr_elt})
50                end
51              else
52                table.insert(keys, {gr_name, gr})
53              end
54            end
55          end
56        end
57      end
58    else
59      if tbl.name then
60        table.insert(keys, {tbl.name, tbl})
61        tbl.name = nil
62      else
63        for k,v in pairs(tbl) do
64          if type(k) ~= 'number' then
65            -- We can also have implicit arrays here
66            local sym_implicit = is_implicit(v)
67
68            if sym_implicit then
69              for _,elt in ipairs(v) do
70                table.insert(keys, {k, elt})
71              end
72            else
73              table.insert(keys, {k, v})
74            end
75          end
76        end
77      end
78    end
79  end
80
81  gen_keys(t)
82
83  -- return the iterator function
84  local i = 0
85  return function()
86    i = i + 1
87    if keys[i] then
88      return keys[i][1], keys[i][2]
89    end
90  end
91end
92
93local function group_transform(cfg, k, v)
94  if v.name then k = v.name end
95
96  local new_group = {
97    symbols = {}
98  }
99
100  if v.enabled then new_group.enabled = v.enabled end
101  if v.disabled then new_group.disabled = v.disabled end
102  if v.max_score then new_group.max_score = v.max_score end
103
104  if v.symbol then
105    for sk,sv in metric_pairs(v.symbol) do
106      if sv.name then
107        sk = sv.name
108        sv.name = nil -- Remove field
109      end
110
111      new_group.symbols[sk] = sv
112    end
113  end
114
115  if not cfg.group then cfg.group = {} end
116
117  if cfg.group[k] then
118    cfg.group[k] = lua_util.override_defaults(cfg.group[k], new_group)
119  else
120    cfg.group[k] = new_group
121  end
122
123  logger.infox("overriding group %s from the legacy metric settings", k)
124end
125
126local function symbol_transform(cfg, k, v)
127  -- first try to find any group where there is a definition of this symbol
128  for gr_n, gr in pairs(cfg.group) do
129    if gr.symbols and gr.symbols[k] then
130      -- We override group symbol with ungrouped symbol
131      logger.infox("overriding group symbol %s in the group %s", k, gr_n)
132      gr.symbols[k] = lua_util.override_defaults(gr.symbols[k], v)
133      return
134    end
135  end
136  -- Now check what Rspamd knows about this symbol
137  local sym = rspamd_config:get_metric_symbol(k)
138
139  if not sym or not sym.group then
140    -- Otherwise we just use group 'ungrouped'
141    if not cfg.group.ungrouped then
142      cfg.group.ungrouped = {
143        symbols = {}
144      }
145    end
146
147    cfg.group.ungrouped.symbols[k] = v
148    logger.debugx("adding symbol %s to the group 'ungrouped'", k)
149  end
150end
151
152local function test_groups(groups)
153  for gr_name, gr in pairs(groups) do
154    if not gr.symbols then
155      local cnt = 0
156      for _,_ in pairs(gr) do cnt = cnt + 1 end
157
158      if cnt == 0 then
159        logger.debugx('group %s is empty', gr_name)
160      else
161        logger.infox('group %s has no symbols', gr_name)
162      end
163    end
164  end
165end
166
167local function convert_metric(cfg, metric)
168  if metric.actions then
169    cfg.actions = lua_util.override_defaults(cfg.actions, metric.actions)
170    logger.infox("overriding actions from the legacy metric settings")
171  end
172  if metric.unknown_weight then
173    cfg.actions.unknown_weight = metric.unknown_weight
174  end
175
176  if metric.subject then
177    logger.infox("overriding subject from the legacy metric settings")
178    cfg.actions.subject = metric.subject
179  end
180
181  if metric.group then
182    for k, v in metric_pairs(metric.group) do
183      group_transform(cfg, k, v)
184    end
185  else
186    if not cfg.group then
187      cfg.group = {
188        ungrouped = {
189          symbols = {}
190        }
191      }
192    end
193  end
194
195  if metric.symbol then
196    for k, v in metric_pairs(metric.symbol) do
197      symbol_transform(cfg, k, v)
198    end
199  end
200
201  return cfg
202end
203
204-- Converts a table of groups indexed by number (implicit array) to a
205-- merged group definition
206local function merge_groups(groups)
207  local ret = {}
208  for k,gr in pairs(groups) do
209    if type(k) == 'number' then
210      for key,sec in pairs(gr) do
211        ret[key] = sec
212      end
213    else
214      ret[k] = gr
215    end
216  end
217
218  return ret
219end
220
221-- Checks configuration files for statistics
222local function check_statistics_sanity()
223  local local_conf = rspamd_paths['LOCAL_CONFDIR']
224  local local_stat = string.format('%s/local.d/%s', local_conf,
225      'statistic.conf')
226  local local_bayes = string.format('%s/local.d/%s', local_conf,
227      'classifier-bayes.conf')
228
229  if rspamd_util.file_exists(local_stat) and
230      rspamd_util.file_exists(local_bayes) then
231    logger.warnx(rspamd_config, 'conflicting files %s and %s are found: '..
232        'Rspamd classifier configuration might be broken!', local_stat, local_bayes)
233  end
234end
235
236-- Converts surbl module config to rbl module
237local function surbl_section_convert(cfg, section)
238  local rbl_section = cfg.rbl.rbls
239  local wl = section.whitelist
240  for name,value in pairs(section.rules or {}) do
241    if rbl_section[name] then
242      logger.warnx(rspamd_config, 'conflicting names in surbl and rbl rules: %s, prefer surbl rule!',
243          name)
244    end
245    local converted = {
246      urls = true,
247      ignore_defaults = true,
248    }
249
250    if wl then
251      converted.whitelist = wl
252    end
253
254    for k,v in pairs(value) do
255      local skip = false
256      -- Rename
257      if k == 'suffix' then k = 'rbl' end
258      if k == 'ips' then k = 'returncodes' end
259      if k == 'bits' then k = 'returnbits' end
260      if k == 'noip' then k = 'no_ip' end
261      -- Crappy legacy
262      if k == 'options' then
263        if v == 'noip' or v == 'no_ip' then
264          converted.no_ip = true
265          skip = true
266        end
267      end
268      if k:match('check_') then
269        local n = k:match('check_(.*)')
270        k = n
271      end
272
273      if k == 'dkim' and v then
274        converted.dkim_domainonly = false
275        converted.dkim_match_from = true
276      end
277
278      if k == 'emails' and v then
279        -- To match surbl behaviour
280        converted.emails_domainonly = true
281      end
282
283      if not skip then
284        converted[k] = lua_util.deepcopy(v)
285      end
286    end
287    rbl_section[name] = lua_util.override_defaults(rbl_section[name], converted)
288  end
289end
290
291-- Converts surbl module config to rbl module
292local function emails_section_convert(cfg, section)
293  local rbl_section = cfg.rbl.rbls
294  local wl = section.whitelist
295  for name,value in pairs(section.rules or {}) do
296    if rbl_section[name] then
297      logger.warnx(rspamd_config, 'conflicting names in emails and rbl rules: %s, prefer emails rule!',
298          name)
299    end
300    local converted = {
301      emails = true,
302      ignore_defaults = true,
303    }
304
305    if wl then
306      converted.whitelist = wl
307    end
308
309    for k,v in pairs(value) do
310      local skip = false
311      -- Rename
312      if k == 'dnsbl' then k = 'rbl' end
313      if k == 'check_replyto' then k = 'replyto' end
314      if k == 'hashlen' then k = 'hash_len' end
315      if k == 'encoding' then k = 'hash_format' end
316      if k == 'domain_only' then k = 'emails_domainonly' end
317      if k == 'delimiter' then k = 'emails_delimiter' end
318      if k == 'skip_body' then
319        skip = true
320        if v then
321          -- Hack
322          converted.emails = false
323          converted.replyto = true
324        else
325          converted.emails = true
326        end
327      end
328      if k == 'expect_ip' then
329        -- Another stupid hack
330        if not converted.return_codes then
331          converted.returncodes = {}
332        end
333        local symbol = value.symbol or name
334        converted.returncodes[symbol] = { v }
335        skip = true
336      end
337
338      if not skip then
339        converted[k] = lua_util.deepcopy(v)
340      end
341    end
342    rbl_section[name] = lua_util.override_defaults(rbl_section[name], converted)
343  end
344end
345
346return function(cfg)
347  local ret = false
348
349  if cfg['metric'] then
350    for _, v in metric_pairs(cfg.metric) do
351      cfg = convert_metric(cfg, v)
352    end
353    ret = true
354  end
355
356  if cfg.symbols then
357    for k, v in metric_pairs(cfg.symbols) do
358      symbol_transform(cfg, k, v)
359    end
360  end
361
362  check_statistics_sanity()
363
364  if not cfg.actions then
365    logger.errx('no actions defined')
366  else
367    -- Perform sanity check for actions
368    local actions_defs = {'no action', 'no_action', -- In case if that's added
369                          'greylist', 'add header', 'add_header',
370                          'rewrite subject', 'rewrite_subject', 'quarantine',
371                          'reject', 'discard'}
372
373    if not cfg.actions['no action'] and not cfg.actions['no_action'] and
374            not cfg.actions['accept'] then
375      for _,d in ipairs(actions_defs) do
376        if cfg.actions[d] then
377
378          local action_score = nil
379          if type(cfg.actions[d]) == 'number' then
380            action_score = cfg.actions[d]
381          elseif type(cfg.actions[d]) == 'table' and cfg.actions[d]['score'] then
382            action_score = cfg.actions[d]['score']
383          end
384
385          if type(cfg.actions[d]) ~= 'table' and not action_score then
386            cfg.actions[d] = nil
387          elseif type(action_score) == 'number' and action_score < 0 then
388            cfg.actions['no_action'] = cfg.actions[d] - 0.001
389            logger.infox(rspamd_config, 'set no_action score to: %s, as action %s has negative score',
390                    cfg.actions['no_action'], d)
391            break
392          end
393        end
394      end
395    end
396
397    local actions_set = lua_util.list_to_hash(actions_defs)
398
399    -- Now check actions section for garbadge
400    actions_set['unknown_weight'] = true
401    actions_set['grow_factor'] = true
402    actions_set['subject'] = true
403
404    for k,_ in pairs(cfg.actions) do
405      if not actions_set[k] then
406        logger.warnx(rspamd_config, 'unknown element in actions section: %s', k)
407      end
408    end
409
410    -- Performs thresholds sanity
411    -- We exclude greylist here as it can be set to whatever threshold in practice
412    local actions_order = {
413      'no_action',
414      'add_header',
415      'rewrite_subject',
416      'quarantine',
417      'reject',
418      'discard'
419    }
420    for i=1,(#actions_order - 1) do
421      local act = actions_order[i]
422
423      if cfg.actions[act] and type(cfg.actions[act]) == 'number' then
424        local score = cfg.actions[act]
425
426        for j=i+1,#actions_order do
427          local next_act = actions_order[j]
428          if cfg.actions[next_act] and type(cfg.actions[next_act]) == 'number' then
429            local next_score = cfg.actions[next_act]
430            if next_score <= score then
431              logger.errx(rspamd_config, 'invalid actions thresholds order: action %s (%s) must have lower '..
432                  'score than action %s (%s)', act, score, next_act, next_score)
433              ret = false
434            end
435          end
436        end
437      end
438    end
439  end
440
441  if not cfg.group then
442    logger.errx('no symbol groups defined')
443  else
444    if cfg.group[1] then
445      -- We need to merge groups
446      cfg.group = merge_groups(cfg.group)
447      ret = true
448    end
449    test_groups(cfg.group)
450  end
451
452  -- Deal with dkim settings
453  if not cfg.dkim then
454    cfg.dkim = {}
455  else
456    if cfg.dkim.sign_condition then
457      -- We have an obsoleted sign condition, so we need to either add dkim_signing and move it
458      -- there or just move sign condition there...
459      if not cfg.dkim_signing then
460        logger.warnx('obsoleted DKIM signing method used, converting it to "dkim_signing" module')
461        cfg.dkim_signing = {
462          sign_condition = cfg.dkim.sign_condition
463        }
464      else
465        if not cfg.dkim_signing.sign_condition then
466          logger.warnx('obsoleted DKIM signing method used, move it to "dkim_signing" module')
467          cfg.dkim_signing.sign_condition = cfg.dkim.sign_condition
468        else
469          logger.warnx('obsoleted DKIM signing method used, ignore it as "dkim_signing" also defines condition!')
470        end
471      end
472    end
473  end
474
475  -- Again: legacy stuff :(
476  if not cfg.dkim.sign_headers then
477    local sec = cfg.dkim_signing
478    if sec and sec[1] then sec = cfg.dkim_signing[1] end
479
480    if sec and sec.sign_headers then
481      cfg.dkim.sign_headers = sec.sign_headers
482    end
483  end
484
485  -- DKIM signing/ARC legacy
486  for _, mod in ipairs({'dkim_signing', 'arc'}) do
487    if cfg[mod] then
488      if cfg[mod].auth_only ~= nil then
489        if cfg[mod].sign_authenticated ~= nil then
490          logger.warnx(rspamd_config,
491              'both auth_only (%s) and sign_authenticated (%s) for %s are specified, prefer auth_only',
492              cfg[mod].auth_only, cfg[mod].sign_authenticated, mod)
493        end
494        cfg[mod].sign_authenticated = cfg[mod].auth_only
495      end
496    end
497  end
498
499  if cfg.dkim and cfg.dkim.sign_headers and type(cfg.dkim.sign_headers) == 'table' then
500    -- Flatten
501    cfg.dkim.sign_headers = table.concat(cfg.dkim.sign_headers, ':')
502  end
503
504  -- Try to find some obvious issues with configuration
505  for k,v in pairs(cfg) do
506    if type(v) == 'table' and v[k] and type (v[k]) == 'table' then
507      logger.errx('nested section: %s { %s { ... } }, it is likely a configuration error',
508              k, k)
509    end
510  end
511
512  -- If neural network is enabled we MUST have `check_all_filters` flag
513  if cfg.neural then
514    if not cfg.options then
515      cfg.options = {}
516    end
517
518    if not cfg.options.check_all_filters then
519      logger.infox(rspamd_config, 'enable `options.check_all_filters` for neural network')
520      cfg.options.check_all_filters = true
521    end
522  end
523
524  -- Deal with IP_SCORE
525  if cfg.ip_score and (cfg.ip_score.servers or cfg.redis.servers) then
526    logger.warnx(rspamd_config, 'ip_score module is deprecated in honor of reputation module!')
527
528    if not cfg.reputation then
529      cfg.reputation = {
530        rules = {}
531      }
532    end
533
534    if not cfg.reputation.rules then cfg.reputation.rules = {} end
535
536    if not fun.any(function(_, v) return v.selector and v.selector.ip end,
537        cfg.reputation.rules) then
538      logger.infox(rspamd_config, 'attach ip reputation element to use it')
539
540      cfg.reputation.rules.ip_score = {
541        selector = {
542          ip = {},
543        },
544        backend = {
545          redis = {},
546        }
547      }
548
549      if cfg.ip_score.servers then
550        cfg.reputation.rules.ip_score.backend.redis.servers = cfg.ip_score.servers
551      end
552
553      if cfg.symbols and cfg.symbols['IP_SCORE'] then
554        local t = cfg.symbols['IP_SCORE']
555
556        if not cfg.symbols['SENDER_REP_SPAM'] then
557          cfg.symbols['SENDER_REP_SPAM'] = t
558          cfg.symbols['SENDER_REP_HAM'] = t
559          cfg.symbols['SENDER_REP_HAM'].weight = -(t.weight or 0)
560        end
561      end
562    else
563      logger.infox(rspamd_config, 'ip reputation already exists, do not do any IP_SCORE transforms')
564    end
565  end
566
567  if cfg.surbl then
568    if not cfg.rbl then
569      cfg.rbl = {
570        rbls = {}
571      }
572    end
573    if not cfg.rbl.rbls then
574      cfg.rbl.rbls = {}
575    end
576    surbl_section_convert(cfg, cfg.surbl)
577    logger.infox(rspamd_config, 'converted surbl rules to rbl rules')
578    cfg.surbl = {}
579  end
580
581  if cfg.emails then
582    if not cfg.rbl then
583      cfg.rbl = {
584        rbls = {}
585      }
586    end
587    if not cfg.rbl.rbls then
588      cfg.rbl.rbls = {}
589    end
590    emails_section_convert(cfg, cfg.emails)
591    logger.infox(rspamd_config, 'converted emails rules to rbl rules')
592    cfg.emails = {}
593  end
594
595  return ret, cfg
596end
597