1# Copyright (C) 2001-2018 by the Free Software Foundation, Inc.
2#
3# This program is free software; you can redistribute it and/or
4# modify it under the terms of the GNU General Public License
5# as published by the Free Software Foundation; either version 2
6# of the License, or (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
16# USA.
17
18"""Bounce queue runner."""
19
20import os
21import re
22import time
23import cPickle
24
25from email.MIMEText import MIMEText
26from email.MIMEMessage import MIMEMessage
27from email.Utils import parseaddr
28
29from Mailman import mm_cfg
30from Mailman import Utils
31from Mailman import LockFile
32from Mailman.Errors import NotAMemberError
33from Mailman.Message import UserNotification
34from Mailman.Bouncer import _BounceInfo
35from Mailman.Bouncers import BouncerAPI
36from Mailman.Queue.Runner import Runner
37from Mailman.Queue.sbcache import get_switchboard
38from Mailman.Logging.Syslog import syslog
39from Mailman.i18n import _
40
41COMMASPACE = ', '
42
43try:
44    True, False
45except NameError:
46    True = 1
47    False = 0
48
49
50
51class BounceMixin:
52    def __init__(self):
53        # Registering a bounce means acquiring the list lock, and it would be
54        # too expensive to do this for each message.  Instead, each bounce
55        # runner maintains an event log which is essentially a file with
56        # multiple pickles.  Each bounce we receive gets appended to this file
57        # as a 4-tuple record: (listname, addr, today, msg)
58        #
59        # today is itself a 3-tuple of (year, month, day)
60        #
61        # Every once in a while (see _doperiodic()), the bounce runner cracks
62        # open the file, reads all the records and registers all the bounces.
63        # Then it truncates the file and continues on.  We don't need to lock
64        # the bounce event file because bounce qrunners are single threaded
65        # and each creates a uniquely named file to contain the events.
66        #
67        # XXX When Python 2.3 is minimal require, we can use the new
68        # tempfile.TemporaryFile() function.
69        #
70        # XXX We used to classify bounces to the site list as bounce events
71        # for every list, but this caused severe problems.  Here's the
72        # scenario: aperson@example.com is a member of 4 lists, and a list
73        # owner of the foo list.  example.com has an aggressive spam filter
74        # which rejects any message that is spam or contains spam as an
75        # attachment.  Now, a spambot sends a piece of spam to the foo list,
76        # but since that spambot is not a member, the list holds the message
77        # for approval, and sends a notification to aperson@example.com as
78        # list owner.  That notification contains a copy of the spam.  Now
79        # example.com rejects the message, causing a bounce to be sent to the
80        # site list's bounce address.  The bounce runner would then dutifully
81        # register a bounce for all 4 lists that aperson@example.com was a
82        # member of, and eventually that person would get disabled on all
83        # their lists.  So now we ignore site list bounces.  Ce La Vie for
84        # password reminder bounces.
85        self._bounce_events_file = os.path.join(
86            mm_cfg.DATA_DIR, 'bounce-events-%05d.pck' % os.getpid())
87        self._bounce_events_fp = None
88        self._bouncecnt = 0
89        self._nextaction = time.time() + mm_cfg.REGISTER_BOUNCES_EVERY
90
91    def _queue_bounces(self, listname, addrs, msg):
92        today = time.localtime()[:3]
93        if self._bounce_events_fp is None:
94            omask = os.umask(006)
95            try:
96                self._bounce_events_fp = open(self._bounce_events_file, 'a+b')
97            finally:
98                os.umask(omask)
99        for addr in addrs:
100            cPickle.dump((listname, addr, today, msg),
101                         self._bounce_events_fp, 1)
102        self._bounce_events_fp.flush()
103        os.fsync(self._bounce_events_fp.fileno())
104        self._bouncecnt += len(addrs)
105
106    def _register_bounces(self):
107        syslog('bounce', '%s processing %s queued bounces',
108               self, self._bouncecnt)
109        # Read all the records from the bounce file, then unlink it.  Sort the
110        # records by listname for more efficient processing.
111        events = {}
112        self._bounce_events_fp.seek(0)
113        while True:
114            try:
115                listname, addr, day, msg = cPickle.load(self._bounce_events_fp)
116            except ValueError, e:
117                syslog('bounce', 'Error reading bounce events: %s', e)
118            except EOFError:
119                break
120            events.setdefault(listname, []).append((addr, day, msg))
121        # Now register all events sorted by list
122        for listname in events.keys():
123            mlist = self._open_list(listname)
124            mlist.Lock()
125            try:
126                for addr, day, msg in events[listname]:
127                    mlist.registerBounce(addr, msg, day=day)
128                mlist.Save()
129            finally:
130                mlist.Unlock()
131        # Reset and free all the cached memory
132        self._bounce_events_fp.close()
133        self._bounce_events_fp = None
134        os.unlink(self._bounce_events_file)
135        self._bouncecnt = 0
136
137    def _cleanup(self):
138        if self._bouncecnt > 0:
139            self._register_bounces()
140
141    def _doperiodic(self):
142        now = time.time()
143        if self._nextaction > now or self._bouncecnt == 0:
144            return
145        # Let's go ahead and register the bounces we've got stored up
146        self._nextaction = now + mm_cfg.REGISTER_BOUNCES_EVERY
147        self._register_bounces()
148
149    def _probe_bounce(self, mlist, token):
150        locked = mlist.Locked()
151        if not locked:
152            mlist.Lock()
153        try:
154            op, addr, bmsg = mlist.pend_confirm(token)
155            # For Python 2.4 compatibility we need an inner try because
156            # try: ... except: ... finally: requires Python 2.5+
157            try:
158                info = mlist.getBounceInfo(addr)
159                if not info:
160                    # info was deleted before probe bounce was received.
161                    # Just create a new info.
162                    info = _BounceInfo(addr,
163                                       0.0,
164                                       time.localtime()[:3],
165                                       mlist.bounce_you_are_disabled_warnings
166                                       )
167                mlist.disableBouncingMember(addr, info, bmsg)
168                # Only save the list if we're unlocking it
169                if not locked:
170                    mlist.Save()
171            except NotAMemberError:
172                # Member was removed before probe bounce returned.
173                # Just ignore it.
174                pass
175        finally:
176            if not locked:
177                mlist.Unlock()
178
179
180
181class BounceRunner(Runner, BounceMixin):
182    QDIR = mm_cfg.BOUNCEQUEUE_DIR
183
184    def __init__(self, slice=None, numslices=1):
185        Runner.__init__(self, slice, numslices)
186        BounceMixin.__init__(self)
187
188    def _dispose(self, mlist, msg, msgdata):
189        # Make sure we have the most up-to-date state
190        mlist.Load()
191        outq = get_switchboard(mm_cfg.OUTQUEUE_DIR)
192        # There are a few possibilities here:
193        #
194        # - the message could have been VERP'd in which case, we know exactly
195        #   who the message was destined for.  That make our job easy.
196        # - the message could have been originally destined for a list owner,
197        #   but a list owner address itself bounced.  That's bad, and for now
198        #   we'll simply attempt to deliver the message to the site list
199        #   owner.
200        #   Note that this means that automated bounce processing doesn't work
201        #   for the site list.  Because we can't reliably tell to what address
202        #   a non-VERP'd bounce was originally sent, we have to treat all
203        #   bounces sent to the site list as potential list owner bounces.
204        # - the list owner could have set list-bounces (or list-admin) as the
205        #   owner address.  That's really bad as it results in a loop of ever
206        #   growing unrecognized bounce messages.  We detect this based on the
207        #   fact that this message itself will be from the site bounces
208        #   address.  We then send this to the site list owner instead.
209        # Notices to list-owner have their envelope sender and From: set to
210        # the site-bounces address.  Check if this is this a bounce for a
211        # message to a list owner, coming to site-bounces, or a looping
212        # message sent directly to the -bounces address.  We have to do these
213        # cases separately, because sending to site-owner will reset the
214        # envelope sender.
215        # Is this a site list bounce?
216        if (mlist.internal_name().lower() ==
217                mm_cfg.MAILMAN_SITE_LIST.lower()):
218            # Send it on to the site owners, but craft the envelope sender to
219            # be the -loop detection address, so if /they/ bounce, we won't
220            # get stuck in a bounce loop.
221            outq.enqueue(msg, msgdata,
222                         recips=mlist.owner,
223                         envsender=Utils.get_site_email(extra='loop'),
224                         nodecorate=1,
225                         )
226            return
227        # Is this a possible looping message sent directly to a list-bounces
228        # address other than the site list?
229        # Check From: because unix_from might be VERP'd.
230        # Also, check the From: that Message.OwnerNotification uses.
231        if (msg.get('from') ==
232                Utils.get_site_email(mlist.host_name, 'bounces')):
233            # Just send it to the sitelist-owner address.  If that bounces
234            # we'll handle it above.
235            outq.enqueue(msg, msgdata,
236                         recips=[Utils.get_site_email(extra='owner')],
237                         envsender=Utils.get_site_email(extra='loop'),
238                         nodecorate=1,
239                         )
240            return
241        # List isn't doing bounce processing?
242        if not mlist.bounce_processing:
243            return
244        # Try VERP detection first, since it's quick and easy
245        addrs = verp_bounce(mlist, msg)
246        if addrs:
247            # We have an address, but check if the message is non-fatal.
248            if BouncerAPI.ScanMessages(mlist, msg) is BouncerAPI.Stop:
249                return
250        else:
251            # See if this was a probe message.
252            token = verp_probe(mlist, msg)
253            if token:
254                self._probe_bounce(mlist, token)
255                return
256            # That didn't give us anything useful, so try the old fashion
257            # bounce matching modules.
258            addrs = BouncerAPI.ScanMessages(mlist, msg)
259            if addrs is BouncerAPI.Stop:
260                # This is a recognized, non-fatal notice. Ignore it.
261                return
262        # If that still didn't return us any useful addresses, then send it on
263        # or discard it.
264        addrs = filter(None, addrs)
265        if not addrs:
266            syslog('bounce',
267                   '%s: bounce message w/no discernable addresses: %s',
268                   mlist.internal_name(),
269                   msg.get('message-id', 'n/a'))
270            maybe_forward(mlist, msg)
271            return
272        # BAW: It's possible that there are None's in the list of addresses,
273        # although I'm unsure how that could happen.  Possibly ScanMessages()
274        # can let None's sneak through.  In any event, this will kill them.
275        # addrs = filter(None, addrs)
276        # MAS above filter moved up so we don't try to queue an empty list.
277        self._queue_bounces(mlist.internal_name(), addrs, msg)
278
279    _doperiodic = BounceMixin._doperiodic
280
281    def _cleanup(self):
282        BounceMixin._cleanup(self)
283        Runner._cleanup(self)
284
285
286
287def verp_bounce(mlist, msg):
288    bmailbox, bdomain = Utils.ParseEmail(mlist.GetBouncesEmail())
289    # Sadly not every MTA bounces VERP messages correctly, or consistently.
290    # Fall back to Delivered-To: (Postfix), Envelope-To: (Exim) and
291    # Apparently-To:, and then short-circuit if we still don't have anything
292    # to work with.  Note that there can be multiple Delivered-To: headers so
293    # we need to search them all (and we don't worry about false positives for
294    # forwarded email, because only one should match VERP_REGEXP).
295    vals = []
296    for header in ('to', 'delivered-to', 'envelope-to', 'apparently-to'):
297        vals.extend(msg.get_all(header, []))
298    for field in vals:
299        to = parseaddr(field)[1]
300        if not to:
301            continue                          # empty header
302        mo = re.search(mm_cfg.VERP_REGEXP, to)
303        if not mo:
304            continue                          # no match of regexp
305        try:
306            if bmailbox <> mo.group('bounces'):
307                continue                      # not a bounce to our list
308            # All is good
309            addr = '%s@%s' % mo.group('mailbox', 'host')
310        except IndexError:
311            syslog('error',
312                   "VERP_REGEXP doesn't yield the right match groups: %s",
313                   mm_cfg.VERP_REGEXP)
314            return []
315        return [addr]
316
317
318
319def verp_probe(mlist, msg):
320    bmailbox, bdomain = Utils.ParseEmail(mlist.GetBouncesEmail())
321    # Sadly not every MTA bounces VERP messages correctly, or consistently.
322    # Fall back to Delivered-To: (Postfix), Envelope-To: (Exim) and
323    # Apparently-To:, and then short-circuit if we still don't have anything
324    # to work with.  Note that there can be multiple Delivered-To: headers so
325    # we need to search them all (and we don't worry about false positives for
326    # forwarded email, because only one should match VERP_REGEXP).
327    vals = []
328    for header in ('to', 'delivered-to', 'envelope-to', 'apparently-to'):
329        vals.extend(msg.get_all(header, []))
330    for field in vals:
331        to = parseaddr(field)[1]
332        if not to:
333            continue                          # empty header
334        mo = re.search(mm_cfg.VERP_PROBE_REGEXP, to)
335        if not mo:
336            continue                          # no match of regexp
337        try:
338            if bmailbox <> mo.group('bounces'):
339                continue                      # not a bounce to our list
340            # Extract the token and see if there's an entry
341            token = mo.group('token')
342            data = mlist.pend_confirm(token, expunge=False)
343            if data is not None:
344                return token
345        except IndexError:
346            syslog(
347                'error',
348                "VERP_PROBE_REGEXP doesn't yield the right match groups: %s",
349                mm_cfg.VERP_PROBE_REGEXP)
350    return None
351
352
353
354def maybe_forward(mlist, msg):
355    # Does the list owner want to get non-matching bounce messages?
356    # If not, simply discard it.
357    if mlist.bounce_unrecognized_goes_to_list_owner:
358        adminurl = mlist.GetScriptURL('admin', absolute=1) + '/bounce'
359        mlist.ForwardMessage(msg,
360                             text=_("""\
361The attached message was received as a bounce, but either the bounce format
362was not recognized, or no member addresses could be extracted from it.  This
363mailing list has been configured to send all unrecognized bounce messages to
364the list administrator(s).
365
366For more information see:
367%(adminurl)s
368
369"""),
370                             subject=_('Uncaught bounce notification'),
371                             tomoderators=0)
372        syslog('bounce',
373               '%s: forwarding unrecognized, message-id: %s',
374               mlist.internal_name(),
375               msg.get('message-id', 'n/a'))
376    else:
377        syslog('bounce',
378               '%s: discarding unrecognized, message-id: %s',
379               mlist.internal_name(),
380               msg.get('message-id', 'n/a'))
381