1# Copyright (C) 2016-2020 by the Free Software Foundation, Inc. 2# 3# This file is part of GNU Mailman. 4# 5# GNU Mailman is free software: you can redistribute it and/or modify it under 6# the terms of the GNU General Public License as published by the Free 7# Software Foundation, either version 3 of the License, or (at your option) 8# any later version. 9# 10# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT 11# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13# more details. 14# 15# You should have received a copy of the GNU General Public License along with 16# GNU Mailman. If not, see <https://www.gnu.org/licenses/>. 17 18"""DMARC mitigation rule.""" 19 20import os 21import re 22import logging 23import dns.resolver 24 25from dns.exception import DNSException 26from email.utils import parseaddr 27from importlib_resources import read_binary 28from lazr.config import as_timedelta 29from mailman.config import config 30from mailman.core.i18n import _ 31from mailman.interfaces.mailinglist import DMARCMitigateAction 32from mailman.interfaces.rules import IRule 33from mailman.utilities.datetime import now 34from mailman.utilities.protocols import get 35from mailman.utilities.string import wrap 36from public import public 37from requests.exceptions import HTTPError 38from urllib.error import URLError 39from zope.interface import implementer 40 41 42elog = logging.getLogger('mailman.error') 43vlog = logging.getLogger('mailman.vette') 44 45DOT = '.' 46EMPTYSTRING = '' 47KEEP_LOOKING = object() 48LOCAL_FILE_NAME = 'public_suffix_list.dat' 49 50# Map organizational domain suffix rules to a boolean indicating whether the 51# rule is an exception or not. 52suffix_cache = dict() 53 54 55def ensure_current_suffix_list(): 56 # Read and parse the organizational domain suffix list. First look in the 57 # cached directory to see if we already have a valid copy of it. 58 cached_copy_path = os.path.join(config.VAR_DIR, LOCAL_FILE_NAME) 59 lifetime = as_timedelta(config.dmarc.cache_lifetime) 60 download = False 61 try: 62 mtime = os.stat(cached_copy_path).st_mtime 63 except FileNotFoundError: 64 vlog.info('No cached copy of the public suffix list found') 65 download = True 66 cache_found = False 67 else: 68 cache_found = True 69 # Is the cached copy out-of-date? Note that when we write a new cache 70 # version we explicitly set its mtime to the time in the future when 71 # the cache will expire. 72 if mtime < now().timestamp(): 73 download = True 74 vlog.info('Cached copy of public suffix list is out of date') 75 if download: 76 try: 77 content = get(config.dmarc.org_domain_data_url) 78 except (URLError, HTTPError) as error: 79 elog.error('Unable to retrieve public suffix list from %s: %s', 80 config.dmarc.org_domain_data_url, 81 getattr(error, 'reason', str(error))) 82 if cache_found: 83 vlog.info('Using out of date public suffix list') 84 content = None 85 else: 86 # We couldn't access the URL and didn't even have an out of 87 # date suffix list cached. Use the shipped version. 88 content = read_binary('mailman.rules.data', LOCAL_FILE_NAME) 89 if content is not None: 90 # Content is either a string or UTF-8 encoded bytes. 91 if isinstance(content, bytes): 92 content = content.decode('utf-8') 93 # Write the cache atomically. 94 new_path = cached_copy_path + '.new' 95 with open(new_path, 'w', encoding='utf-8') as fp: 96 fp.write(content) 97 # Set the expiry time to the future. 98 mtime = (now() + lifetime).timestamp() 99 os.utime(new_path, (mtime, mtime)) 100 # Flip the new file into the cached location. This does not 101 # modify the mtime. 102 os.rename(new_path, cached_copy_path) 103 return cached_copy_path 104 105 106def parse_suffix_list(filename=None): 107 # Parse the suffix list into a per process cache. 108 if filename is None: 109 filename = ensure_current_suffix_list() 110 # At this point the cached copy must exist and is as valid as possible. 111 # Read and return the contents as a UTF-8 string. 112 with open(filename, 'r', encoding='utf-8') as fp: 113 for line in fp: 114 if not line.strip() or line.startswith('//'): 115 continue 116 line = re.sub(r'\s.*', '', line) 117 if not line: 118 continue 119 parts = line.lower().split('.') 120 if parts[0].startswith('!'): 121 exception = True 122 parts = [parts[0][1:]] + parts[1:] 123 else: 124 exception = False 125 parts.reverse() 126 key = DOT.join(parts) 127 suffix_cache[key] = exception 128 129 130def get_domain(parts, label): 131 # A helper to get a domain name consisting of the first label+1 labels in 132 # parts. 133 domain = parts[:min(label+1, len(parts))] 134 domain.reverse() 135 return DOT.join(domain) 136 137 138def get_organizational_domain(domain): 139 # Given a domain name, this returns the corresponding Organizational 140 # Domain which may be the same as the input. 141 if len(suffix_cache) == 0: 142 parse_suffix_list() 143 hits = [] 144 parts = domain.lower().split('.') 145 parts.reverse() 146 for key in suffix_cache: 147 key_parts = key.split('.') 148 if len(parts) >= len(key_parts): 149 for i in range(len(key_parts) - 1): 150 if parts[i] != key_parts[i] and key_parts[i] != '*': 151 break 152 else: 153 if (parts[len(key_parts) - 1] == key_parts[-1] or 154 key_parts[-1] == '*'): 155 hits.append(key) 156 if not hits: 157 return get_domain(parts, 1) 158 label = 0 159 for key in hits: 160 key_parts = key.split('.') 161 if suffix_cache[key]: 162 # It's an exception. 163 return get_domain(parts, len(key_parts) - 1) 164 if len(key_parts) > label: 165 label = len(key_parts) 166 return get_domain(parts, label) 167 168 169def is_reject_or_quarantine(mlist, email, dmarc_domain, org=False): 170 # This takes a mailing list, an email address as in the From: header, the 171 # _dmarc host name for the domain in question, and a flag stating whether 172 # we should check the organizational domains. It returns one of three 173 # values: 174 # * True if the DMARC policy is reject or quarantine; 175 # * False if is not; 176 # * A special sentinel if we should continue looking 177 resolver = dns.resolver.Resolver() 178 resolver.timeout = as_timedelta( 179 config.dmarc.resolver_timeout).total_seconds() 180 resolver.lifetime = as_timedelta( 181 config.dmarc.resolver_lifetime).total_seconds() 182 try: 183 txt_recs = resolver.query(dmarc_domain, dns.rdatatype.TXT) 184 except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): 185 return KEEP_LOOKING 186 except (dns.resolver.NoNameservers): 187 elog.error( 188 'DNSException: No Nameservers available for %s (%s).', 189 email, dmarc_domain) 190 # Typically this means a dnssec validation error. Clients that don't 191 # perform validation *may* successfully see a _dmarc RR whereas a 192 # validating mailman server won't see the _dmarc RR. We should 193 # mitigate this email to be safe. 194 return True 195 except DNSException as error: 196 elog.error( 197 'DNSException: Unable to query DMARC policy for %s (%s). %s', 198 email, dmarc_domain, error.__doc__) 199 # While we can't be sure what caused the error, there is potentially 200 # a DMARC policy record that we missed and that a receiver of the mail 201 # might see. Thus, we should err on the side of caution and mitigate. 202 return True 203 # Be as robust as possible in parsing the result. 204 results_by_name = {} 205 cnames = {} 206 want_names = set([dmarc_domain + '.']) 207 # Check all the TXT records returned by DNS. Keep track of the CNAMEs for 208 # checking later on. Ignore any other non-TXT records. 209 for txt_rec in txt_recs.response.answer: 210 if txt_rec.rdtype == dns.rdatatype.CNAME: 211 cnames[txt_rec.name.to_text()] = ( 212 txt_rec.items[0].target.to_text()) 213 if txt_rec.rdtype != dns.rdatatype.TXT: 214 continue 215 result = EMPTYSTRING.join( 216 str(record, encoding='utf-8') 217 for record in txt_rec.items[0].strings) 218 name = txt_rec.name.to_text() 219 results_by_name.setdefault(name, []).append(result) 220 expands = list(want_names) 221 seen = set(expands) 222 while expands: 223 item = expands.pop(0) 224 if item in cnames: 225 if cnames[item] in seen: 226 # CNAME loop. 227 continue 228 expands.append(cnames[item]) 229 seen.add(cnames[item]) 230 want_names.add(cnames[item]) 231 want_names.discard(item) 232 assert len(want_names) == 1, ( 233 'Error in CNAME processing for {}; want_names != 1.'.format( 234 dmarc_domain)) 235 for name in want_names: 236 if name not in results_by_name: 237 continue 238 dmarcs = [ 239 record for record in results_by_name[name] 240 if record.startswith('v=DMARC1;') 241 ] 242 if len(dmarcs) == 0: 243 return KEEP_LOOKING 244 if len(dmarcs) > 1: 245 elog.error( 246 'RRset of TXT records for %s has %d v=DMARC1 entries; ' 247 'testing them all', 248 dmarc_domain, len(dmarcs)) 249 for entry in dmarcs: 250 mo = re.search(r'\bsp=(\w*)\b', entry, re.IGNORECASE) 251 if org and mo: 252 policy = mo.group(1).lower() 253 else: 254 mo = re.search(r'\bp=(\w*)\b', entry, re.IGNORECASE) 255 if mo: 256 policy = mo.group(1).lower() 257 else: 258 # This continue does actually get covered by 259 # TestDMARCRules.test_domain_with_subdomain_policy() and 260 # TestDMARCRules.test_no_policy() but because of 261 # Coverage BitBucket issue #198 and 262 # https://bugs.python.org/issue2506 coverage cannot report 263 # it as such, so just pragma it away. 264 continue # pragma: missed 265 if policy in ('reject', 'quarantine'): 266 vlog.info( 267 '%s: DMARC lookup for %s (%s) found p=%s in %s = %s', 268 mlist.list_name, 269 email, 270 dmarc_domain, 271 policy, 272 name, 273 entry) 274 return True 275 return False 276 277 278def maybe_mitigate(mlist, email): 279 # This takes an email address, and returns True if DMARC policy is 280 # p=reject or p=quarantine. 281 email = email.lower() 282 # Scan from the right in case quoted local part has an '@'. 283 local, at, from_domain = email.rpartition('@') 284 if at != '@': 285 return False 286 answer = is_reject_or_quarantine( 287 mlist, email, '_dmarc.{}'.format(from_domain)) 288 if answer is not KEEP_LOOKING: 289 return answer 290 org_dom = get_organizational_domain(from_domain) 291 if org_dom != from_domain: 292 answer = is_reject_or_quarantine( 293 mlist, email, '_dmarc.{}'.format(org_dom), org=True) 294 if answer is not KEEP_LOOKING: 295 return answer 296 return False 297 298 299@public 300@implementer(IRule) 301class DMARCMitigation: 302 """The DMARC mitigation rule.""" 303 304 name = 'dmarc-mitigation' 305 description = _('Find DMARC policy of From: domain.') 306 record = True 307 308 def check(self, mlist, msg, msgdata): 309 """See `IRule`.""" 310 if mlist.dmarc_mitigate_action is DMARCMitigateAction.no_mitigation: 311 # Don't bother to check if we're not going to do anything. 312 return False 313 display_name, address = parseaddr(str(msg.get('from', ''))) 314 if maybe_mitigate(mlist, address): 315 # If dmarc_mitigate_action is discard or reject, this rule fires 316 # and jumps to the 'moderation' chain to do the actual discard. 317 # Otherwise, the rule misses but sets a flag for the dmarc handler 318 # to do the appropriate action. 319 msgdata['dmarc'] = True 320 if mlist.dmarc_mitigate_action is DMARCMitigateAction.discard: 321 msgdata['dmarc_action'] = 'discard' 322 with _.defer_translation(): 323 # This will be translated at the point of use. 324 msgdata.setdefault('moderation_reasons', []).append( 325 _('DMARC moderation')) 326 elif mlist.dmarc_mitigate_action is DMARCMitigateAction.reject: 327 listowner = mlist.owner_address # noqa F841 328 with _.defer_translation(): 329 # This will be translated at the point of use. 330 reason = (mlist.dmarc_moderation_notice or _( 331 'You are not allowed to post to this mailing ' 332 'list From: a domain which publishes a DMARC ' 333 'policy of reject or quarantine, and your message' 334 ' has been automatically rejected. If you think ' 335 'that your messages are being rejected in error, ' 336 'contact the mailing list owner at ${listowner}.')) 337 msgdata.setdefault('moderation_reasons', []).append( 338 wrap(reason)) 339 msgdata['dmarc_action'] = 'reject' 340 else: 341 return False 342 msgdata['moderation_sender'] = address 343 return True 344 return False 345