1# Copyright (C) 2002-2018 by the Free Software Foundation, Inc.
3# This program is free software; you can redistribute it and/or
4# modify it under the terms of the GNU General Public License
5# as published by the Free Software Foundation; either version 2
6# of the License, or (at your option) any later version.
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# GNU General Public License for more details.
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
16# USA.
18"""MIME-stripping filter for Mailman.
20This module scans a message for MIME content, removing those sections whose
21MIME types match one of a list of matches.  multipart/alternative sections are
22replaced by the first non-empty component, and multipart/mixed sections
23wrapping only single sections after other processing are replaced by their
27import os
28import errno
29import tempfile
30from os.path import splitext
32from email.Iterators import typed_subpart_iterator
34from Mailman import mm_cfg
35from Mailman import Errors
36from Mailman.Message import UserNotification
37from Mailman.Queue.sbcache import get_switchboard
38from Mailman.Logging.Syslog import syslog
39from Mailman.Version import VERSION
40from Mailman.i18n import _
41from Mailman.Utils import oneline
45def process(mlist, msg, msgdata):
46    # Short-circuits
47    if not mlist.filter_content:
48        return
49    if msgdata.get('isdigest'):
50        return
51    # We also don't care about our own digests or plaintext
52    ctype = msg.get_content_type()
53    mtype = msg.get_content_maintype()
54    # Check to see if the outer type matches one of the filter types
55    filtertypes = mlist.filter_mime_types
56    passtypes = mlist.pass_mime_types
57    if ctype in filtertypes or mtype in filtertypes:
58        dispose(mlist, msg, msgdata,
59                _("The message's content type was explicitly disallowed"))
60    # Check to see if there is a pass types and the outer type doesn't match
61    # one of these types
62    if passtypes and not (ctype in passtypes or mtype in passtypes):
63        dispose(mlist, msg, msgdata,
64                _("The message's content type was not explicitly allowed"))
65    # Filter by file extensions
66    filterexts = mlist.filter_filename_extensions
67    passexts = mlist.pass_filename_extensions
68    fext = get_file_ext(msg)
69    if fext:
70        if fext in filterexts:
71            dispose(mlist, msg, msgdata,
72                 _("The message's file extension was explicitly disallowed"))
73        if passexts and not (fext in passexts):
74            dispose(mlist, msg, msgdata,
75                 _("The message's file extension was not explicitly allowed"))
76    numparts = len([subpart for subpart in msg.walk()])
77    # If the message is a multipart, filter out matching subparts
78    if msg.is_multipart():
79        # Recursively filter out any subparts that match the filter list
80        prelen = len(msg.get_payload())
81        filter_parts(msg, filtertypes, passtypes, filterexts, passexts)
82        # If the outer message is now an empty multipart (and it wasn't
83        # before!) then, again it gets discarded.
84        postlen = len(msg.get_payload())
85        if postlen == 0 and prelen > 0:
86            dispose(mlist, msg, msgdata,
87                    _("After content filtering, the message was empty"))
88    # Now replace all multipart/alternatives with just the first non-empty
89    # alternative.  BAW: We have to special case when the outer part is a
90    # multipart/alternative because we need to retain most of the outer part's
91    # headers.  For now we'll move the subpart's payload into the outer part,
92    # and then copy over its Content-Type: and Content-Transfer-Encoding:
93    # headers (any others?).
94    if mlist.collapse_alternatives:
95        collapse_multipart_alternatives(msg)
96        if ctype == 'multipart/alternative':
97            firstalt = msg.get_payload(0)
98            reset_payload(msg, firstalt)
99    # Now that we've collapsed the MPA parts, go through the message
100    # and recast any multipart parts with only one sub-part as just
101    # the sub-part.
102    if msg.is_multipart():
103        recast_multipart(msg)
104    # If we removed some parts, make note of this
105    changedp = 0
106    if numparts <> len([subpart for subpart in msg.walk()]):
107        changedp = 1
108    # Now perhaps convert all text/html to text/plain
109    if mlist.convert_html_to_plaintext and mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND:
110        changedp += to_plaintext(msg)
111    # If we're left with only two parts, an empty body and one attachment,
112    # recast the message to one of just that part
113    if msg.is_multipart() and len(msg.get_payload()) == 2:
114        if msg.get_payload(0).get_payload() == '':
115            useful = msg.get_payload(1)
116            reset_payload(msg, useful)
117            changedp = 1
118    if changedp:
119        msg['X-Content-Filtered-By'] = 'Mailman/MimeDel %s' % VERSION
123def reset_payload(msg, subpart):
124    # Reset payload of msg to contents of subpart, and fix up content headers
125    payload = subpart.get_payload()
126    msg.set_payload(payload)
127    del msg['content-type']
128    del msg['content-transfer-encoding']
129    del msg['content-disposition']
130    del msg['content-description']
131    msg['Content-Type'] = subpart.get('content-type', 'text/plain')
132    cte = subpart.get('content-transfer-encoding')
133    if cte:
134        msg['Content-Transfer-Encoding'] = cte
135    cdisp = subpart.get('content-disposition')
136    if cdisp:
137        msg['Content-Disposition'] = cdisp
138    cdesc = subpart.get('content-description')
139    if cdesc:
140        msg['Content-Description'] = cdesc
144def filter_parts(msg, filtertypes, passtypes, filterexts, passexts):
145    # Look at all the message's subparts, and recursively filter
146    if not msg.is_multipart():
147        return 1
148    payload = msg.get_payload()
149    prelen = len(payload)
150    newpayload = []
151    for subpart in payload:
152        keep = filter_parts(subpart, filtertypes, passtypes,
153                            filterexts, passexts)
154        if not keep:
155            continue
156        ctype = subpart.get_content_type()
157        mtype = subpart.get_content_maintype()
158        if ctype in filtertypes or mtype in filtertypes:
159            # Throw this subpart away
160            continue
161        if passtypes and not (ctype in passtypes or mtype in passtypes):
162            # Throw this subpart away
163            continue
164        # check file extension
165        fext = get_file_ext(subpart)
166        if fext:
167            if fext in filterexts:
168                continue
169            if passexts and not (fext in passexts):
170                continue
171        newpayload.append(subpart)
172    # Check to see if we discarded all the subparts
173    postlen = len(newpayload)
174    msg.set_payload(newpayload)
175    if postlen == 0 and prelen > 0:
176        # We threw away everything
177        return 0
178    return 1
182def collapse_multipart_alternatives(msg):
183    if not msg.is_multipart():
184        return
185    newpayload = []
186    for subpart in msg.get_payload():
187        if subpart.get_content_type() == 'multipart/alternative':
188            try:
189                firstalt = subpart.get_payload(0)
190                if msg.get_content_type() == 'message/rfc822':
191                    # This is a multipart/alternative message in a
192                    # message/rfc822 part. We treat it specially so as not to
193                    # lose the headers.
194                    reset_payload(subpart, firstalt)
195                    newpayload.append(subpart)
196                else:
197                    newpayload.append(firstalt)
198            except (IndexError, TypeError):
199                pass
200        elif subpart.is_multipart():
201            collapse_multipart_alternatives(subpart)
202            newpayload.append(subpart)
203        else:
204            newpayload.append(subpart)
205    msg.set_payload(newpayload)
209def recast_multipart(msg):
210    # If we're left with a multipart message with only one sub-part, recast
211    # the message to just the sub-part, but not if the part is message/rfc822
212    # because we don't want to lose the headers.
213    # Also, if this is a multipart/signed part, stop now as the original part
214    # may have had a multipart sub-part with only one sub-sub-part, the sig
215    # may still be valid and going further may break it.  (LP: #1551075)
216    if msg.get_content_type() == 'multipart/signed':
217        return
218    if msg.is_multipart():
219        if (len(msg.get_payload()) == 1 and
220                msg.get_content_type() <> 'message/rfc822'):
221            reset_payload(msg, msg.get_payload(0))
222            # now that we've recast this part, check the subordinate parts
223            recast_multipart(msg)
224        else:
225            # This part's OK but check deeper.
226            for part in msg.get_payload():
227                recast_multipart(part)
231def to_plaintext(msg):
232    changedp = 0
233    for subpart in typed_subpart_iterator(msg, 'text', 'html'):
234        filename = tempfile.mktemp('.html')
235        fp = open(filename, 'w')
236        try:
237            fp.write(subpart.get_payload(decode=1))
238            fp.close()
239            cmd = os.popen(mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND %
240                           {'filename': filename})
241            plaintext = cmd.read()
242            rtn = cmd.close()
243            if rtn:
244                syslog('error', 'HTML->text/plain error: %s', rtn)
245        finally:
246            try:
247                os.unlink(filename)
248            except OSError, e:
249                if e.errno <> errno.ENOENT: raise
250        # Now replace the payload of the subpart and twiddle the Content-Type:
251        del subpart['content-transfer-encoding']
252        subpart.set_payload(plaintext)
253        subpart.set_type('text/plain')
254        changedp = 1
255    return changedp
259def dispose(mlist, msg, msgdata, why):
260    # filter_action == 0 just discards, see below
261    if mlist.filter_action == 1:
262        # Bounce the message to the original author
263        raise Errors.RejectMessage, why
264    if mlist.filter_action == 2:
265        # Forward it on to the list owner
266        listname = mlist.internal_name()
267        mlist.ForwardMessage(
268            msg,
269            text=_("""\
270The attached message matched the %(listname)s mailing list's content filtering
271rules and was prevented from being forwarded on to the list membership.  You
272are receiving the only remaining copy of the discarded message.
275            subject=_('Content filtered message notification'))
276    if mlist.filter_action == 3 and \
278        badq = get_switchboard(mm_cfg.BADQUEUE_DIR)
279        badq.enqueue(msg, msgdata)
280    # Most cases also discard the message
281    raise Errors.DiscardMessage
283def get_file_ext(m):
284    """
285    Get filename extension. Caution: some virus don't put filename
286    in 'Content-Disposition' header.
288    fext = ''
289    filename = m.get_filename('') or m.get_param('name', '')
290    if filename:
291        fext = splitext(oneline(filename,'utf-8'))[1]
292        if len(fext) > 1:
293            fext = fext[1:]
294        else:
295            fext = ''
296    return fext.lower()