1# Copyright (C) 1998-2018 by the Free Software Foundation, Inc.
2#
3# This program is free software; you can redistribute it and/or
4# modify it under the terms of the GNU General Public License
5# as published by the Free Software Foundation; either version 2
6# of the License, or (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
16# USA.
17
18"""Standard Mailman message object.
19
20This is a subclass of email.Message but provides a slightly extended interface
21which is more convenient for use inside Mailman.
22"""
23
24import re
25from cStringIO import StringIO
26
27import email
28import email.Generator
29import email.Message
30import email.Utils
31from email.Charset import Charset
32from email.Header import Header
33
34from types import ListType, StringType
35
36from Mailman import mm_cfg
37from Mailman import Utils
38
39COMMASPACE = ', '
40
41mo = re.match(r'([\d.]+)', email.__version__)
42VERSION = tuple([int(s) for s in mo.group().split('.')])
43
44
45
46class Generator(email.Generator.Generator):
47    """Generates output from a Message object tree, keeping signatures.
48
49       Headers will by default _not_ be folded in attachments.
50    """
51    def __init__(self, outfp, mangle_from_=True,
52                 maxheaderlen=78, children_maxheaderlen=0):
53        email.Generator.Generator.__init__(self, outfp,
54                mangle_from_=mangle_from_, maxheaderlen=maxheaderlen)
55        self.__children_maxheaderlen = children_maxheaderlen
56
57    def clone(self, fp):
58        """Clone this generator with maxheaderlen set for children"""
59        return self.__class__(fp, self._mangle_from_,
60                self.__children_maxheaderlen, self.__children_maxheaderlen)
61
62
63
64class Message(email.Message.Message):
65    def __init__(self):
66        # We need a version number so that we can optimize __setstate__()
67        self.__version__ = VERSION
68        email.Message.Message.__init__(self)
69
70    # BAW: For debugging w/ bin/dumpdb.  Apparently pprint uses repr.
71    def __repr__(self):
72        return self.__str__()
73
74    def __setstate__(self, d):
75        # The base class attributes have changed over time.  Which could
76        # affect Mailman if messages are sitting in the queue at the time of
77        # upgrading the email package.  We shouldn't burden email with this,
78        # so we handle schema updates here.
79        self.__dict__ = d
80        # We know that email 2.4.3 is up-to-date
81        version = d.get('__version__', (0, 0, 0))
82        d['__version__'] = VERSION
83        if version >= VERSION:
84            return
85        # Messages grew a _charset attribute between email version 0.97 and 1.1
86        if not d.has_key('_charset'):
87            self._charset = None
88        # Messages grew a _default_type attribute between v2.1 and v2.2
89        if not d.has_key('_default_type'):
90            # We really have no idea whether this message object is contained
91            # inside a multipart/digest or not, so I think this is the best we
92            # can do.
93            self._default_type = 'text/plain'
94        # Header instances used to allow both strings and Charsets in their
95        # _chunks, but by email 2.4.3 now it's just Charsets.
96        headers = []
97        hchanged = 0
98        for k, v in self._headers:
99            if isinstance(v, Header):
100                chunks = []
101                cchanged = 0
102                for s, charset in v._chunks:
103                    if isinstance(charset, StringType):
104                        charset = Charset(charset)
105                        cchanged = 1
106                    chunks.append((s, charset))
107                if cchanged:
108                    v._chunks = chunks
109                    hchanged = 1
110            headers.append((k, v))
111        if hchanged:
112            self._headers = headers
113
114    # I think this method ought to eventually be deprecated
115    def get_sender(self, use_envelope=None, preserve_case=0):
116        """Return the address considered to be the author of the email.
117
118        This can return either the From: header, the Sender: header or the
119        envelope header (a.k.a. the unixfrom header).  The first non-empty
120        header value found is returned.  However the search order is
121        determined by the following:
122
123        - If mm_cfg.USE_ENVELOPE_SENDER is true, then the search order is
124          Sender:, From:, unixfrom
125
126        - Otherwise, the search order is From:, Sender:, unixfrom
127
128        The optional argument use_envelope, if given overrides the
129        mm_cfg.USE_ENVELOPE_SENDER setting.  It should be set to either 0 or 1
130        (don't use None since that indicates no-override).
131
132        unixfrom should never be empty.  The return address is always
133        lowercased, unless preserve_case is true.
134
135        This method differs from get_senders() in that it returns one and only
136        one address, and uses a different search order.
137        """
138        senderfirst = mm_cfg.USE_ENVELOPE_SENDER
139        if use_envelope is not None:
140            senderfirst = use_envelope
141        if senderfirst:
142            headers = ('sender', 'from')
143        else:
144            headers = ('from', 'sender')
145        for h in headers:
146            # Use only the first occurrance of Sender: or From:, although it's
147            # not likely there will be more than one.
148            fieldval = self[h]
149            if not fieldval:
150                continue
151            # Work around bug in email 2.5.8 (and ?) involving getaddresses()
152            # from multi-line header values.
153            # Don't use Utils.oneline() here because the header must not be
154            # decoded before parsing since the decoded header may contain
155            # an unquoted comma or other delimiter in a real name.
156            fieldval = ''.join(fieldval.splitlines())
157            addrs = email.Utils.getaddresses([fieldval])
158            try:
159                realname, address = addrs[0]
160            except IndexError:
161                continue
162            if address:
163                break
164        else:
165            # We didn't find a non-empty header, so let's fall back to the
166            # unixfrom address.  This should never be empty, but if it ever
167            # is, it's probably a Really Bad Thing.  Further, we just assume
168            # that if the unixfrom exists, the second field is the address.
169            unixfrom = self.get_unixfrom()
170            if unixfrom:
171                address = unixfrom.split()[1]
172            else:
173                # TBD: now what?!
174                address = ''
175        if not preserve_case:
176            return address.lower()
177        return address
178
179    def get_senders(self, preserve_case=0, headers=None):
180        """Return a list of addresses representing the author of the email.
181
182        The list will contain the following addresses (in order)
183        depending on availability:
184
185        1. From:
186        2. unixfrom
187        3. Reply-To:
188        4. Sender:
189
190        The return addresses are always lower cased, unless `preserve_case' is
191        true.  Optional `headers' gives an alternative search order, with None
192        meaning, search the unixfrom header.  Items in `headers' are field
193        names without the trailing colon.
194        """
195        if headers is None:
196            headers = mm_cfg.SENDER_HEADERS
197        pairs = []
198        for h in headers:
199            if h is None:
200                # get_unixfrom() returns None if there's no envelope
201                fieldval = self.get_unixfrom() or ''
202                try:
203                    pairs.append(('', fieldval.split()[1]))
204                except IndexError:
205                    # Ignore badly formatted unixfroms
206                    pass
207            else:
208                fieldvals = self.get_all(h)
209                if fieldvals:
210                    # See comment above in get_sender() regarding
211                    # getaddresses() and multi-line headers
212                    fieldvals = [''.join(fv.splitlines())
213                                 for fv in fieldvals]
214                    pairs.extend(email.Utils.getaddresses(fieldvals))
215        authors = []
216        for pair in pairs:
217            address = pair[1]
218            if address is not None and not preserve_case:
219                address = address.lower()
220            authors.append(address)
221        return authors
222
223    def get_filename(self, failobj=None):
224        """Some MUA have bugs in RFC2231 filename encoding and cause
225        Mailman to stop delivery in Scrubber.py (called from ToDigest.py).
226        """
227        try:
228            filename = email.Message.Message.get_filename(self, failobj)
229            return filename
230        except (UnicodeError, LookupError, ValueError):
231            return failobj
232
233
234    def as_string(self, unixfrom=False, mangle_from_=True):
235        """Return entire formatted message as a string using
236        Mailman.Message.Generator.
237
238        Operates like email.Message.Message.as_string, only
239        using Mailman's Message.Generator class. Only the top headers will
240        get folded.
241        """
242        fp = StringIO()
243        g = Generator(fp, mangle_from_=mangle_from_)
244        g.flatten(self, unixfrom=unixfrom)
245        return fp.getvalue()
246
247
248
249class UserNotification(Message):
250    """Class for internally crafted messages."""
251
252    def __init__(self, recip, sender, subject=None, text=None, lang=None):
253        Message.__init__(self)
254        charset = None
255        if lang is not None:
256            charset = Charset(Utils.GetCharSet(lang))
257        if text is not None:
258            self.set_payload(text, charset)
259        if subject is None:
260            subject = '(no subject)'
261        self['Subject'] = Header(subject, charset, header_name='Subject',
262                                 errors='replace')
263        self['From'] = sender
264        if isinstance(recip, ListType):
265            self['To'] = COMMASPACE.join(recip)
266            self.recips = recip
267        else:
268            self['To'] = recip
269            self.recips = [recip]
270
271    def send(self, mlist, noprecedence=False, **_kws):
272        """Sends the message by enqueuing it to the `virgin' queue.
273
274        This is used for all internally crafted messages.
275        """
276        # Since we're crafting the message from whole cloth, let's make sure
277        # this message has a Message-ID.  Yes, the MTA would give us one, but
278        # this is useful for logging to logs/smtp.
279        if not self.has_key('message-id'):
280            self['Message-ID'] = Utils.unique_message_id(mlist)
281        # Ditto for Date: which is required by RFC 2822
282        if not self.has_key('date'):
283            self['Date'] = email.Utils.formatdate(localtime=1)
284        # UserNotifications are typically for admin messages, and for messages
285        # other than list explosions.  Send these out as Precedence: bulk, but
286        # don't override an existing Precedence: header.
287        # Also, if the message is To: the list-owner address, set Precedence:
288        # list.  See note below in OwnerNotification.
289        if not (self.has_key('precedence') or noprecedence):
290            if self.get('to') == mlist.GetOwnerEmail():
291                self['Precedence'] = 'list'
292            else:
293                self['Precedence'] = 'bulk'
294        self._enqueue(mlist, **_kws)
295
296    def _enqueue(self, mlist, **_kws):
297        # Not imported at module scope to avoid import loop
298        from Mailman.Queue.sbcache import get_switchboard
299        virginq = get_switchboard(mm_cfg.VIRGINQUEUE_DIR)
300        # The message metadata better have a `recip' attribute
301        virginq.enqueue(self,
302                        listname = mlist.internal_name(),
303                        recips = self.recips,
304                        nodecorate = 1,
305                        reduced_list_headers = 1,
306                        **_kws)
307
308
309
310class OwnerNotification(UserNotification):
311    """Like user notifications, but this message goes to the list owners."""
312
313    def __init__(self, mlist, subject=None, text=None, tomoderators=1):
314        recips = mlist.owner[:]
315        if tomoderators:
316            recips.extend(mlist.moderator)
317        # We have to set the owner to the site's -bounces address, otherwise
318        # we'll get a mail loop if an owner's address bounces.
319        sender = Utils.get_site_email(mlist.host_name, 'bounces')
320        lang = mlist.preferred_language
321        UserNotification.__init__(self, recips, sender, subject, text, lang)
322        # Hack the To header to look like it's going to the -owner address
323        del self['to']
324        self['To'] = mlist.GetOwnerEmail()
325        self._sender = sender
326        # User notifications are normally sent with Precedence: bulk.  This
327        # is appropriate as they can be backscatter of rejected spam.
328        # Owner notifications are not backscatter and are perhaps more
329        # important than 'bulk' so give them Precedence: list by default.
330        # (LP: #1313146)
331        self['Precedence'] = 'list'
332
333    def _enqueue(self, mlist, **_kws):
334        # Not imported at module scope to avoid import loop
335        from Mailman.Queue.sbcache import get_switchboard
336        virginq = get_switchboard(mm_cfg.VIRGINQUEUE_DIR)
337        # The message metadata better have a `recip' attribute
338        virginq.enqueue(self,
339                        listname = mlist.internal_name(),
340                        recips = self.recips,
341                        nodecorate = 1,
342                        reduced_list_headers = 1,
343                        envsender = self._sender,
344                        **_kws)
345