1# Copyright (C) 2001-2018 by the Free Software Foundation, Inc. 2# 3# This program is free software; you can redistribute it and/or 4# modify it under the terms of the GNU General Public License 5# as published by the Free Software Foundation; either version 2 6# of the License, or (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 16# USA. 17 18"""Bounce queue runner.""" 19 20import os 21import re 22import time 23import cPickle 24 25from email.MIMEText import MIMEText 26from email.MIMEMessage import MIMEMessage 27from email.Utils import parseaddr 28 29from Mailman import mm_cfg 30from Mailman import Utils 31from Mailman import LockFile 32from Mailman.Errors import NotAMemberError 33from Mailman.Message import UserNotification 34from Mailman.Bouncer import _BounceInfo 35from Mailman.Bouncers import BouncerAPI 36from Mailman.Queue.Runner import Runner 37from Mailman.Queue.sbcache import get_switchboard 38from Mailman.Logging.Syslog import syslog 39from Mailman.i18n import _ 40 41COMMASPACE = ', ' 42 43try: 44 True, False 45except NameError: 46 True = 1 47 False = 0 48 49 50 51class BounceMixin: 52 def __init__(self): 53 # Registering a bounce means acquiring the list lock, and it would be 54 # too expensive to do this for each message. Instead, each bounce 55 # runner maintains an event log which is essentially a file with 56 # multiple pickles. Each bounce we receive gets appended to this file 57 # as a 4-tuple record: (listname, addr, today, msg) 58 # 59 # today is itself a 3-tuple of (year, month, day) 60 # 61 # Every once in a while (see _doperiodic()), the bounce runner cracks 62 # open the file, reads all the records and registers all the bounces. 63 # Then it truncates the file and continues on. We don't need to lock 64 # the bounce event file because bounce qrunners are single threaded 65 # and each creates a uniquely named file to contain the events. 66 # 67 # XXX When Python 2.3 is minimal require, we can use the new 68 # tempfile.TemporaryFile() function. 69 # 70 # XXX We used to classify bounces to the site list as bounce events 71 # for every list, but this caused severe problems. Here's the 72 # scenario: aperson@example.com is a member of 4 lists, and a list 73 # owner of the foo list. example.com has an aggressive spam filter 74 # which rejects any message that is spam or contains spam as an 75 # attachment. Now, a spambot sends a piece of spam to the foo list, 76 # but since that spambot is not a member, the list holds the message 77 # for approval, and sends a notification to aperson@example.com as 78 # list owner. That notification contains a copy of the spam. Now 79 # example.com rejects the message, causing a bounce to be sent to the 80 # site list's bounce address. The bounce runner would then dutifully 81 # register a bounce for all 4 lists that aperson@example.com was a 82 # member of, and eventually that person would get disabled on all 83 # their lists. So now we ignore site list bounces. Ce La Vie for 84 # password reminder bounces. 85 self._bounce_events_file = os.path.join( 86 mm_cfg.DATA_DIR, 'bounce-events-%05d.pck' % os.getpid()) 87 self._bounce_events_fp = None 88 self._bouncecnt = 0 89 self._nextaction = time.time() + mm_cfg.REGISTER_BOUNCES_EVERY 90 91 def _queue_bounces(self, listname, addrs, msg): 92 today = time.localtime()[:3] 93 if self._bounce_events_fp is None: 94 omask = os.umask(006) 95 try: 96 self._bounce_events_fp = open(self._bounce_events_file, 'a+b') 97 finally: 98 os.umask(omask) 99 for addr in addrs: 100 cPickle.dump((listname, addr, today, msg), 101 self._bounce_events_fp, 1) 102 self._bounce_events_fp.flush() 103 os.fsync(self._bounce_events_fp.fileno()) 104 self._bouncecnt += len(addrs) 105 106 def _register_bounces(self): 107 syslog('bounce', '%s processing %s queued bounces', 108 self, self._bouncecnt) 109 # Read all the records from the bounce file, then unlink it. Sort the 110 # records by listname for more efficient processing. 111 events = {} 112 self._bounce_events_fp.seek(0) 113 while True: 114 try: 115 listname, addr, day, msg = cPickle.load(self._bounce_events_fp) 116 except ValueError, e: 117 syslog('bounce', 'Error reading bounce events: %s', e) 118 except EOFError: 119 break 120 events.setdefault(listname, []).append((addr, day, msg)) 121 # Now register all events sorted by list 122 for listname in events.keys(): 123 mlist = self._open_list(listname) 124 mlist.Lock() 125 try: 126 for addr, day, msg in events[listname]: 127 mlist.registerBounce(addr, msg, day=day) 128 mlist.Save() 129 finally: 130 mlist.Unlock() 131 # Reset and free all the cached memory 132 self._bounce_events_fp.close() 133 self._bounce_events_fp = None 134 os.unlink(self._bounce_events_file) 135 self._bouncecnt = 0 136 137 def _cleanup(self): 138 if self._bouncecnt > 0: 139 self._register_bounces() 140 141 def _doperiodic(self): 142 now = time.time() 143 if self._nextaction > now or self._bouncecnt == 0: 144 return 145 # Let's go ahead and register the bounces we've got stored up 146 self._nextaction = now + mm_cfg.REGISTER_BOUNCES_EVERY 147 self._register_bounces() 148 149 def _probe_bounce(self, mlist, token): 150 locked = mlist.Locked() 151 if not locked: 152 mlist.Lock() 153 try: 154 op, addr, bmsg = mlist.pend_confirm(token) 155 # For Python 2.4 compatibility we need an inner try because 156 # try: ... except: ... finally: requires Python 2.5+ 157 try: 158 info = mlist.getBounceInfo(addr) 159 if not info: 160 # info was deleted before probe bounce was received. 161 # Just create a new info. 162 info = _BounceInfo(addr, 163 0.0, 164 time.localtime()[:3], 165 mlist.bounce_you_are_disabled_warnings 166 ) 167 mlist.disableBouncingMember(addr, info, bmsg) 168 # Only save the list if we're unlocking it 169 if not locked: 170 mlist.Save() 171 except NotAMemberError: 172 # Member was removed before probe bounce returned. 173 # Just ignore it. 174 pass 175 finally: 176 if not locked: 177 mlist.Unlock() 178 179 180 181class BounceRunner(Runner, BounceMixin): 182 QDIR = mm_cfg.BOUNCEQUEUE_DIR 183 184 def __init__(self, slice=None, numslices=1): 185 Runner.__init__(self, slice, numslices) 186 BounceMixin.__init__(self) 187 188 def _dispose(self, mlist, msg, msgdata): 189 # Make sure we have the most up-to-date state 190 mlist.Load() 191 outq = get_switchboard(mm_cfg.OUTQUEUE_DIR) 192 # There are a few possibilities here: 193 # 194 # - the message could have been VERP'd in which case, we know exactly 195 # who the message was destined for. That make our job easy. 196 # - the message could have been originally destined for a list owner, 197 # but a list owner address itself bounced. That's bad, and for now 198 # we'll simply attempt to deliver the message to the site list 199 # owner. 200 # Note that this means that automated bounce processing doesn't work 201 # for the site list. Because we can't reliably tell to what address 202 # a non-VERP'd bounce was originally sent, we have to treat all 203 # bounces sent to the site list as potential list owner bounces. 204 # - the list owner could have set list-bounces (or list-admin) as the 205 # owner address. That's really bad as it results in a loop of ever 206 # growing unrecognized bounce messages. We detect this based on the 207 # fact that this message itself will be from the site bounces 208 # address. We then send this to the site list owner instead. 209 # Notices to list-owner have their envelope sender and From: set to 210 # the site-bounces address. Check if this is this a bounce for a 211 # message to a list owner, coming to site-bounces, or a looping 212 # message sent directly to the -bounces address. We have to do these 213 # cases separately, because sending to site-owner will reset the 214 # envelope sender. 215 # Is this a site list bounce? 216 if (mlist.internal_name().lower() == 217 mm_cfg.MAILMAN_SITE_LIST.lower()): 218 # Send it on to the site owners, but craft the envelope sender to 219 # be the -loop detection address, so if /they/ bounce, we won't 220 # get stuck in a bounce loop. 221 outq.enqueue(msg, msgdata, 222 recips=mlist.owner, 223 envsender=Utils.get_site_email(extra='loop'), 224 nodecorate=1, 225 ) 226 return 227 # Is this a possible looping message sent directly to a list-bounces 228 # address other than the site list? 229 # Check From: because unix_from might be VERP'd. 230 # Also, check the From: that Message.OwnerNotification uses. 231 if (msg.get('from') == 232 Utils.get_site_email(mlist.host_name, 'bounces')): 233 # Just send it to the sitelist-owner address. If that bounces 234 # we'll handle it above. 235 outq.enqueue(msg, msgdata, 236 recips=[Utils.get_site_email(extra='owner')], 237 envsender=Utils.get_site_email(extra='loop'), 238 nodecorate=1, 239 ) 240 return 241 # List isn't doing bounce processing? 242 if not mlist.bounce_processing: 243 return 244 # Try VERP detection first, since it's quick and easy 245 addrs = verp_bounce(mlist, msg) 246 if addrs: 247 # We have an address, but check if the message is non-fatal. 248 if BouncerAPI.ScanMessages(mlist, msg) is BouncerAPI.Stop: 249 return 250 else: 251 # See if this was a probe message. 252 token = verp_probe(mlist, msg) 253 if token: 254 self._probe_bounce(mlist, token) 255 return 256 # That didn't give us anything useful, so try the old fashion 257 # bounce matching modules. 258 addrs = BouncerAPI.ScanMessages(mlist, msg) 259 if addrs is BouncerAPI.Stop: 260 # This is a recognized, non-fatal notice. Ignore it. 261 return 262 # If that still didn't return us any useful addresses, then send it on 263 # or discard it. 264 addrs = filter(None, addrs) 265 if not addrs: 266 syslog('bounce', 267 '%s: bounce message w/no discernable addresses: %s', 268 mlist.internal_name(), 269 msg.get('message-id', 'n/a')) 270 maybe_forward(mlist, msg) 271 return 272 # BAW: It's possible that there are None's in the list of addresses, 273 # although I'm unsure how that could happen. Possibly ScanMessages() 274 # can let None's sneak through. In any event, this will kill them. 275 # addrs = filter(None, addrs) 276 # MAS above filter moved up so we don't try to queue an empty list. 277 self._queue_bounces(mlist.internal_name(), addrs, msg) 278 279 _doperiodic = BounceMixin._doperiodic 280 281 def _cleanup(self): 282 BounceMixin._cleanup(self) 283 Runner._cleanup(self) 284 285 286 287def verp_bounce(mlist, msg): 288 bmailbox, bdomain = Utils.ParseEmail(mlist.GetBouncesEmail()) 289 # Sadly not every MTA bounces VERP messages correctly, or consistently. 290 # Fall back to Delivered-To: (Postfix), Envelope-To: (Exim) and 291 # Apparently-To:, and then short-circuit if we still don't have anything 292 # to work with. Note that there can be multiple Delivered-To: headers so 293 # we need to search them all (and we don't worry about false positives for 294 # forwarded email, because only one should match VERP_REGEXP). 295 vals = [] 296 for header in ('to', 'delivered-to', 'envelope-to', 'apparently-to'): 297 vals.extend(msg.get_all(header, [])) 298 for field in vals: 299 to = parseaddr(field)[1] 300 if not to: 301 continue # empty header 302 mo = re.search(mm_cfg.VERP_REGEXP, to) 303 if not mo: 304 continue # no match of regexp 305 try: 306 if bmailbox <> mo.group('bounces'): 307 continue # not a bounce to our list 308 # All is good 309 addr = '%s@%s' % mo.group('mailbox', 'host') 310 except IndexError: 311 syslog('error', 312 "VERP_REGEXP doesn't yield the right match groups: %s", 313 mm_cfg.VERP_REGEXP) 314 return [] 315 return [addr] 316 317 318 319def verp_probe(mlist, msg): 320 bmailbox, bdomain = Utils.ParseEmail(mlist.GetBouncesEmail()) 321 # Sadly not every MTA bounces VERP messages correctly, or consistently. 322 # Fall back to Delivered-To: (Postfix), Envelope-To: (Exim) and 323 # Apparently-To:, and then short-circuit if we still don't have anything 324 # to work with. Note that there can be multiple Delivered-To: headers so 325 # we need to search them all (and we don't worry about false positives for 326 # forwarded email, because only one should match VERP_REGEXP). 327 vals = [] 328 for header in ('to', 'delivered-to', 'envelope-to', 'apparently-to'): 329 vals.extend(msg.get_all(header, [])) 330 for field in vals: 331 to = parseaddr(field)[1] 332 if not to: 333 continue # empty header 334 mo = re.search(mm_cfg.VERP_PROBE_REGEXP, to) 335 if not mo: 336 continue # no match of regexp 337 try: 338 if bmailbox <> mo.group('bounces'): 339 continue # not a bounce to our list 340 # Extract the token and see if there's an entry 341 token = mo.group('token') 342 data = mlist.pend_confirm(token, expunge=False) 343 if data is not None: 344 return token 345 except IndexError: 346 syslog( 347 'error', 348 "VERP_PROBE_REGEXP doesn't yield the right match groups: %s", 349 mm_cfg.VERP_PROBE_REGEXP) 350 return None 351 352 353 354def maybe_forward(mlist, msg): 355 # Does the list owner want to get non-matching bounce messages? 356 # If not, simply discard it. 357 if mlist.bounce_unrecognized_goes_to_list_owner: 358 adminurl = mlist.GetScriptURL('admin', absolute=1) + '/bounce' 359 mlist.ForwardMessage(msg, 360 text=_("""\ 361The attached message was received as a bounce, but either the bounce format 362was not recognized, or no member addresses could be extracted from it. This 363mailing list has been configured to send all unrecognized bounce messages to 364the list administrator(s). 365 366For more information see: 367%(adminurl)s 368 369"""), 370 subject=_('Uncaught bounce notification'), 371 tomoderators=0) 372 syslog('bounce', 373 '%s: forwarding unrecognized, message-id: %s', 374 mlist.internal_name(), 375 msg.get('message-id', 'n/a')) 376 else: 377 syslog('bounce', 378 '%s: discarding unrecognized, message-id: %s', 379 mlist.internal_name(), 380 msg.get('message-id', 'n/a')) 381