1# Copyright (C) 2016-2020 by the Free Software Foundation, Inc.
2#
3# This file is part of GNU Mailman.
4#
5# GNU Mailman is free software: you can redistribute it and/or modify it under
6# the terms of the GNU General Public License as published by the Free
7# Software Foundation, either version 3 of the License, or (at your option)
8# any later version.
9#
10# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
11# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13# more details.
14#
15# You should have received a copy of the GNU General Public License along with
16# GNU Mailman.  If not, see <https://www.gnu.org/licenses/>.
17
18"""DMARC mitigation rule."""
19
20import os
21import re
22import logging
23import dns.resolver
24
25from dns.exception import DNSException
26from email.utils import parseaddr
27from importlib_resources import read_binary
28from lazr.config import as_timedelta
29from mailman.config import config
30from mailman.core.i18n import _
31from mailman.interfaces.mailinglist import DMARCMitigateAction
32from mailman.interfaces.rules import IRule
33from mailman.utilities.datetime import now
34from mailman.utilities.protocols import get
35from mailman.utilities.string import wrap
36from public import public
37from requests.exceptions import HTTPError
38from urllib.error import URLError
39from zope.interface import implementer
40
41
42elog = logging.getLogger('mailman.error')
43vlog = logging.getLogger('mailman.vette')
44
45DOT = '.'
46EMPTYSTRING = ''
47KEEP_LOOKING = object()
48LOCAL_FILE_NAME = 'public_suffix_list.dat'
49
50# Map organizational domain suffix rules to a boolean indicating whether the
51# rule is an exception or not.
52suffix_cache = dict()
53
54
55def ensure_current_suffix_list():
56    # Read and parse the organizational domain suffix list.  First look in the
57    # cached directory to see if we already have a valid copy of it.
58    cached_copy_path = os.path.join(config.VAR_DIR, LOCAL_FILE_NAME)
59    lifetime = as_timedelta(config.dmarc.cache_lifetime)
60    download = False
61    try:
62        mtime = os.stat(cached_copy_path).st_mtime
63    except FileNotFoundError:
64        vlog.info('No cached copy of the public suffix list found')
65        download = True
66        cache_found = False
67    else:
68        cache_found = True
69        # Is the cached copy out-of-date?  Note that when we write a new cache
70        # version we explicitly set its mtime to the time in the future when
71        # the cache will expire.
72        if mtime < now().timestamp():
73            download = True
74            vlog.info('Cached copy of public suffix list is out of date')
75    if download:
76        try:
77            content = get(config.dmarc.org_domain_data_url)
78        except (URLError, HTTPError) as error:
79            elog.error('Unable to retrieve public suffix list from %s: %s',
80                       config.dmarc.org_domain_data_url,
81                       getattr(error, 'reason', str(error)))
82            if cache_found:
83                vlog.info('Using out of date public suffix list')
84                content = None
85            else:
86                # We couldn't access the URL and didn't even have an out of
87                # date suffix list cached.  Use the shipped version.
88                content = read_binary('mailman.rules.data', LOCAL_FILE_NAME)
89        if content is not None:
90            # Content is either a string or UTF-8 encoded bytes.
91            if isinstance(content, bytes):
92                content = content.decode('utf-8')
93            # Write the cache atomically.
94            new_path = cached_copy_path + '.new'
95            with open(new_path, 'w', encoding='utf-8') as fp:
96                fp.write(content)
97            # Set the expiry time to the future.
98            mtime = (now() + lifetime).timestamp()
99            os.utime(new_path, (mtime, mtime))
100            # Flip the new file into the cached location.  This does not
101            # modify the mtime.
102            os.rename(new_path, cached_copy_path)
103    return cached_copy_path
104
105
106def parse_suffix_list(filename=None):
107    # Parse the suffix list into a per process cache.
108    if filename is None:
109        filename = ensure_current_suffix_list()
110    # At this point the cached copy must exist and is as valid as possible.
111    # Read and return the contents as a UTF-8 string.
112    with open(filename, 'r', encoding='utf-8') as fp:
113        for line in fp:
114            if not line.strip() or line.startswith('//'):
115                continue
116            line = re.sub(r'\s.*', '', line)
117            if not line:
118                continue
119            parts = line.lower().split('.')
120            if parts[0].startswith('!'):
121                exception = True
122                parts = [parts[0][1:]] + parts[1:]
123            else:
124                exception = False
125            parts.reverse()
126            key = DOT.join(parts)
127            suffix_cache[key] = exception
128
129
130def get_domain(parts, label):
131    # A helper to get a domain name consisting of the first label+1 labels in
132    # parts.
133    domain = parts[:min(label+1, len(parts))]
134    domain.reverse()
135    return DOT.join(domain)
136
137
138def get_organizational_domain(domain):
139    # Given a domain name, this returns the corresponding Organizational
140    # Domain which may be the same as the input.
141    if len(suffix_cache) == 0:
142        parse_suffix_list()
143    hits = []
144    parts = domain.lower().split('.')
145    parts.reverse()
146    for key in suffix_cache:
147        key_parts = key.split('.')
148        if len(parts) >= len(key_parts):
149            for i in range(len(key_parts) - 1):
150                if parts[i] != key_parts[i] and key_parts[i] != '*':
151                    break
152            else:
153                if (parts[len(key_parts) - 1] == key_parts[-1] or
154                        key_parts[-1] == '*'):
155                    hits.append(key)
156    if not hits:
157        return get_domain(parts, 1)
158    label = 0
159    for key in hits:
160        key_parts = key.split('.')
161        if suffix_cache[key]:
162            # It's an exception.
163            return get_domain(parts, len(key_parts) - 1)
164        if len(key_parts) > label:
165            label = len(key_parts)
166    return get_domain(parts, label)
167
168
169def is_reject_or_quarantine(mlist, email, dmarc_domain, org=False):
170    # This takes a mailing list, an email address as in the From: header, the
171    # _dmarc host name for the domain in question, and a flag stating whether
172    # we should check the organizational domains.  It returns one of three
173    # values:
174    # * True if the DMARC policy is reject or quarantine;
175    # * False if is not;
176    # * A special sentinel if we should continue looking
177    resolver = dns.resolver.Resolver()
178    resolver.timeout = as_timedelta(
179        config.dmarc.resolver_timeout).total_seconds()
180    resolver.lifetime = as_timedelta(
181        config.dmarc.resolver_lifetime).total_seconds()
182    try:
183        txt_recs = resolver.query(dmarc_domain, dns.rdatatype.TXT)
184    except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):
185        return KEEP_LOOKING
186    except (dns.resolver.NoNameservers):
187        elog.error(
188            'DNSException: No Nameservers available for %s (%s).',
189            email, dmarc_domain)
190        # Typically this means a dnssec validation error.  Clients that don't
191        # perform validation *may* successfully see a _dmarc RR whereas a
192        # validating mailman server won't see the _dmarc RR.  We should
193        # mitigate this email to be safe.
194        return True
195    except DNSException as error:
196        elog.error(
197            'DNSException: Unable to query DMARC policy for %s (%s). %s',
198            email, dmarc_domain, error.__doc__)
199        # While we can't be sure what caused the error, there is potentially
200        # a DMARC policy record that we missed and that a receiver of the mail
201        # might see.  Thus, we should err on the side of caution and mitigate.
202        return True
203    # Be as robust as possible in parsing the result.
204    results_by_name = {}
205    cnames = {}
206    want_names = set([dmarc_domain + '.'])
207    # Check all the TXT records returned by DNS.  Keep track of the CNAMEs for
208    # checking later on.  Ignore any other non-TXT records.
209    for txt_rec in txt_recs.response.answer:
210        if txt_rec.rdtype == dns.rdatatype.CNAME:
211            cnames[txt_rec.name.to_text()] = (
212                txt_rec.items[0].target.to_text())
213        if txt_rec.rdtype != dns.rdatatype.TXT:
214            continue
215        result = EMPTYSTRING.join(
216            str(record, encoding='utf-8')
217            for record in txt_rec.items[0].strings)
218        name = txt_rec.name.to_text()
219        results_by_name.setdefault(name, []).append(result)
220    expands = list(want_names)
221    seen = set(expands)
222    while expands:
223        item = expands.pop(0)
224        if item in cnames:
225            if cnames[item] in seen:
226                # CNAME loop.
227                continue
228            expands.append(cnames[item])
229            seen.add(cnames[item])
230            want_names.add(cnames[item])
231            want_names.discard(item)
232    assert len(want_names) == 1, (
233        'Error in CNAME processing for {}; want_names != 1.'.format(
234            dmarc_domain))
235    for name in want_names:
236        if name not in results_by_name:
237            continue
238        dmarcs = [
239            record for record in results_by_name[name]
240            if record.startswith('v=DMARC1;')
241            ]
242        if len(dmarcs) == 0:
243            return KEEP_LOOKING
244        if len(dmarcs) > 1:
245            elog.error(
246                'RRset of TXT records for %s has %d v=DMARC1 entries; '
247                'testing them all',
248                dmarc_domain, len(dmarcs))
249        for entry in dmarcs:
250            mo = re.search(r'\bsp=(\w*)\b', entry, re.IGNORECASE)
251            if org and mo:
252                policy = mo.group(1).lower()
253            else:
254                mo = re.search(r'\bp=(\w*)\b', entry, re.IGNORECASE)
255                if mo:
256                    policy = mo.group(1).lower()
257                else:
258                    # This continue does actually get covered by
259                    # TestDMARCRules.test_domain_with_subdomain_policy() and
260                    # TestDMARCRules.test_no_policy() but because of
261                    # Coverage BitBucket issue #198 and
262                    # https://bugs.python.org/issue2506 coverage cannot report
263                    # it as such, so just pragma it away.
264                    continue                        # pragma: missed
265            if policy in ('reject', 'quarantine'):
266                vlog.info(
267                    '%s: DMARC lookup for %s (%s) found p=%s in %s = %s',
268                    mlist.list_name,
269                    email,
270                    dmarc_domain,
271                    policy,
272                    name,
273                    entry)
274                return True
275    return False
276
277
278def maybe_mitigate(mlist, email):
279    # This takes an email address, and returns True if DMARC policy is
280    # p=reject or p=quarantine.
281    email = email.lower()
282    # Scan from the right in case quoted local part has an '@'.
283    local, at, from_domain = email.rpartition('@')
284    if at != '@':
285        return False
286    answer = is_reject_or_quarantine(
287        mlist, email, '_dmarc.{}'.format(from_domain))
288    if answer is not KEEP_LOOKING:
289        return answer
290    org_dom = get_organizational_domain(from_domain)
291    if org_dom != from_domain:
292        answer = is_reject_or_quarantine(
293            mlist, email, '_dmarc.{}'.format(org_dom), org=True)
294        if answer is not KEEP_LOOKING:
295            return answer
296    return False
297
298
299@public
300@implementer(IRule)
301class DMARCMitigation:
302    """The DMARC mitigation rule."""
303
304    name = 'dmarc-mitigation'
305    description = _('Find DMARC policy of From: domain.')
306    record = True
307
308    def check(self, mlist, msg, msgdata):
309        """See `IRule`."""
310        if mlist.dmarc_mitigate_action is DMARCMitigateAction.no_mitigation:
311            # Don't bother to check if we're not going to do anything.
312            return False
313        display_name, address = parseaddr(str(msg.get('from', '')))
314        if maybe_mitigate(mlist, address):
315            # If dmarc_mitigate_action is discard or reject, this rule fires
316            # and jumps to the 'moderation' chain to do the actual discard.
317            # Otherwise, the rule misses but sets a flag for the dmarc handler
318            # to do the appropriate action.
319            msgdata['dmarc'] = True
320            if mlist.dmarc_mitigate_action is DMARCMitigateAction.discard:
321                msgdata['dmarc_action'] = 'discard'
322                with _.defer_translation():
323                    # This will be translated at the point of use.
324                    msgdata.setdefault('moderation_reasons', []).append(
325                        _('DMARC moderation'))
326            elif mlist.dmarc_mitigate_action is DMARCMitigateAction.reject:
327                listowner = mlist.owner_address       # noqa F841
328                with _.defer_translation():
329                    # This will be translated at the point of use.
330                    reason = (mlist.dmarc_moderation_notice or _(
331                        'You are not allowed to post to this mailing '
332                        'list From: a domain which publishes a DMARC '
333                        'policy of reject or quarantine, and your message'
334                        ' has been automatically rejected.  If you think '
335                        'that your messages are being rejected in error, '
336                        'contact the mailing list owner at ${listowner}.'))
337                msgdata.setdefault('moderation_reasons', []).append(
338                    wrap(reason))
339                msgdata['dmarc_action'] = 'reject'
340            else:
341                return False
342            msgdata['moderation_sender'] = address
343            return True
344        return False
345