1# Copyright (C) 2002-2018 by the Free Software Foundation, Inc.
2#
3# This program is free software; you can redistribute it and/or
4# modify it under the terms of the GNU General Public License
5# as published by the Free Software Foundation; either version 2
6# of the License, or (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
16# USA.
17
18"""MIME-stripping filter for Mailman.
19
20This module scans a message for MIME content, removing those sections whose
21MIME types match one of a list of matches.  multipart/alternative sections are
22replaced by the first non-empty component, and multipart/mixed sections
23wrapping only single sections after other processing are replaced by their
24contents.
25"""
26
27import os
28import errno
29import tempfile
30from os.path import splitext
31
32from email.Iterators import typed_subpart_iterator
33
34from Mailman import mm_cfg
35from Mailman import Errors
36from Mailman.Message import UserNotification
37from Mailman.Queue.sbcache import get_switchboard
38from Mailman.Logging.Syslog import syslog
39from Mailman.Version import VERSION
40from Mailman.i18n import _
41from Mailman.Utils import oneline
42
43
44
45def process(mlist, msg, msgdata):
46    # Short-circuits
47    if not mlist.filter_content:
48        return
49    if msgdata.get('isdigest'):
50        return
51    # We also don't care about our own digests or plaintext
52    ctype = msg.get_content_type()
53    mtype = msg.get_content_maintype()
54    # Check to see if the outer type matches one of the filter types
55    filtertypes = mlist.filter_mime_types
56    passtypes = mlist.pass_mime_types
57    if ctype in filtertypes or mtype in filtertypes:
58        dispose(mlist, msg, msgdata,
59                _("The message's content type was explicitly disallowed"))
60    # Check to see if there is a pass types and the outer type doesn't match
61    # one of these types
62    if passtypes and not (ctype in passtypes or mtype in passtypes):
63        dispose(mlist, msg, msgdata,
64                _("The message's content type was not explicitly allowed"))
65    # Filter by file extensions
66    filterexts = mlist.filter_filename_extensions
67    passexts = mlist.pass_filename_extensions
68    fext = get_file_ext(msg)
69    if fext:
70        if fext in filterexts:
71            dispose(mlist, msg, msgdata,
72                 _("The message's file extension was explicitly disallowed"))
73        if passexts and not (fext in passexts):
74            dispose(mlist, msg, msgdata,
75                 _("The message's file extension was not explicitly allowed"))
76    numparts = len([subpart for subpart in msg.walk()])
77    # If the message is a multipart, filter out matching subparts
78    if msg.is_multipart():
79        # Recursively filter out any subparts that match the filter list
80        prelen = len(msg.get_payload())
81        filter_parts(msg, filtertypes, passtypes, filterexts, passexts)
82        # If the outer message is now an empty multipart (and it wasn't
83        # before!) then, again it gets discarded.
84        postlen = len(msg.get_payload())
85        if postlen == 0 and prelen > 0:
86            dispose(mlist, msg, msgdata,
87                    _("After content filtering, the message was empty"))
88    # Now replace all multipart/alternatives with just the first non-empty
89    # alternative.  BAW: We have to special case when the outer part is a
90    # multipart/alternative because we need to retain most of the outer part's
91    # headers.  For now we'll move the subpart's payload into the outer part,
92    # and then copy over its Content-Type: and Content-Transfer-Encoding:
93    # headers (any others?).
94    if mlist.collapse_alternatives:
95        collapse_multipart_alternatives(msg)
96        if ctype == 'multipart/alternative':
97            firstalt = msg.get_payload(0)
98            reset_payload(msg, firstalt)
99    # Now that we've collapsed the MPA parts, go through the message
100    # and recast any multipart parts with only one sub-part as just
101    # the sub-part.
102    if msg.is_multipart():
103        recast_multipart(msg)
104    # If we removed some parts, make note of this
105    changedp = 0
106    if numparts <> len([subpart for subpart in msg.walk()]):
107        changedp = 1
108    # Now perhaps convert all text/html to text/plain
109    if mlist.convert_html_to_plaintext and mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND:
110        changedp += to_plaintext(msg)
111    # If we're left with only two parts, an empty body and one attachment,
112    # recast the message to one of just that part
113    if msg.is_multipart() and len(msg.get_payload()) == 2:
114        if msg.get_payload(0).get_payload() == '':
115            useful = msg.get_payload(1)
116            reset_payload(msg, useful)
117            changedp = 1
118    if changedp:
119        msg['X-Content-Filtered-By'] = 'Mailman/MimeDel %s' % VERSION
120
121
122
123def reset_payload(msg, subpart):
124    # Reset payload of msg to contents of subpart, and fix up content headers
125    payload = subpart.get_payload()
126    msg.set_payload(payload)
127    del msg['content-type']
128    del msg['content-transfer-encoding']
129    del msg['content-disposition']
130    del msg['content-description']
131    msg['Content-Type'] = subpart.get('content-type', 'text/plain')
132    cte = subpart.get('content-transfer-encoding')
133    if cte:
134        msg['Content-Transfer-Encoding'] = cte
135    cdisp = subpart.get('content-disposition')
136    if cdisp:
137        msg['Content-Disposition'] = cdisp
138    cdesc = subpart.get('content-description')
139    if cdesc:
140        msg['Content-Description'] = cdesc
141
142
143
144def filter_parts(msg, filtertypes, passtypes, filterexts, passexts):
145    # Look at all the message's subparts, and recursively filter
146    if not msg.is_multipart():
147        return 1
148    payload = msg.get_payload()
149    prelen = len(payload)
150    newpayload = []
151    for subpart in payload:
152        keep = filter_parts(subpart, filtertypes, passtypes,
153                            filterexts, passexts)
154        if not keep:
155            continue
156        ctype = subpart.get_content_type()
157        mtype = subpart.get_content_maintype()
158        if ctype in filtertypes or mtype in filtertypes:
159            # Throw this subpart away
160            continue
161        if passtypes and not (ctype in passtypes or mtype in passtypes):
162            # Throw this subpart away
163            continue
164        # check file extension
165        fext = get_file_ext(subpart)
166        if fext:
167            if fext in filterexts:
168                continue
169            if passexts and not (fext in passexts):
170                continue
171        newpayload.append(subpart)
172    # Check to see if we discarded all the subparts
173    postlen = len(newpayload)
174    msg.set_payload(newpayload)
175    if postlen == 0 and prelen > 0:
176        # We threw away everything
177        return 0
178    return 1
179
180
181
182def collapse_multipart_alternatives(msg):
183    if not msg.is_multipart():
184        return
185    newpayload = []
186    for subpart in msg.get_payload():
187        if subpart.get_content_type() == 'multipart/alternative':
188            try:
189                firstalt = subpart.get_payload(0)
190                if msg.get_content_type() == 'message/rfc822':
191                    # This is a multipart/alternative message in a
192                    # message/rfc822 part. We treat it specially so as not to
193                    # lose the headers.
194                    reset_payload(subpart, firstalt)
195                    newpayload.append(subpart)
196                else:
197                    newpayload.append(firstalt)
198            except (IndexError, TypeError):
199                pass
200        elif subpart.is_multipart():
201            collapse_multipart_alternatives(subpart)
202            newpayload.append(subpart)
203        else:
204            newpayload.append(subpart)
205    msg.set_payload(newpayload)
206
207
208
209def recast_multipart(msg):
210    # If we're left with a multipart message with only one sub-part, recast
211    # the message to just the sub-part, but not if the part is message/rfc822
212    # because we don't want to lose the headers.
213    # Also, if this is a multipart/signed part, stop now as the original part
214    # may have had a multipart sub-part with only one sub-sub-part, the sig
215    # may still be valid and going further may break it.  (LP: #1551075)
216    if msg.get_content_type() == 'multipart/signed':
217        return
218    if msg.is_multipart():
219        if (len(msg.get_payload()) == 1 and
220                msg.get_content_type() <> 'message/rfc822'):
221            reset_payload(msg, msg.get_payload(0))
222            # now that we've recast this part, check the subordinate parts
223            recast_multipart(msg)
224        else:
225            # This part's OK but check deeper.
226            for part in msg.get_payload():
227                recast_multipart(part)
228
229
230
231def to_plaintext(msg):
232    changedp = 0
233    for subpart in typed_subpart_iterator(msg, 'text', 'html'):
234        filename = tempfile.mktemp('.html')
235        fp = open(filename, 'w')
236        try:
237            fp.write(subpart.get_payload(decode=1))
238            fp.close()
239            cmd = os.popen(mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND %
240                           {'filename': filename})
241            plaintext = cmd.read()
242            rtn = cmd.close()
243            if rtn:
244                syslog('error', 'HTML->text/plain error: %s', rtn)
245        finally:
246            try:
247                os.unlink(filename)
248            except OSError, e:
249                if e.errno <> errno.ENOENT: raise
250        # Now replace the payload of the subpart and twiddle the Content-Type:
251        del subpart['content-transfer-encoding']
252        subpart.set_payload(plaintext)
253        subpart.set_type('text/plain')
254        changedp = 1
255    return changedp
256
257
258
259def dispose(mlist, msg, msgdata, why):
260    # filter_action == 0 just discards, see below
261    if mlist.filter_action == 1:
262        # Bounce the message to the original author
263        raise Errors.RejectMessage, why
264    if mlist.filter_action == 2:
265        # Forward it on to the list owner
266        listname = mlist.internal_name()
267        mlist.ForwardMessage(
268            msg,
269            text=_("""\
270The attached message matched the %(listname)s mailing list's content filtering
271rules and was prevented from being forwarded on to the list membership.  You
272are receiving the only remaining copy of the discarded message.
273
274"""),
275            subject=_('Content filtered message notification'))
276    if mlist.filter_action == 3 and \
277           mm_cfg.OWNERS_CAN_PRESERVE_FILTERED_MESSAGES:
278        badq = get_switchboard(mm_cfg.BADQUEUE_DIR)
279        badq.enqueue(msg, msgdata)
280    # Most cases also discard the message
281    raise Errors.DiscardMessage
282
283def get_file_ext(m):
284    """
285    Get filename extension. Caution: some virus don't put filename
286    in 'Content-Disposition' header.
287"""
288    fext = ''
289    filename = m.get_filename('') or m.get_param('name', '')
290    if filename:
291        fext = splitext(oneline(filename,'utf-8'))[1]
292        if len(fext) > 1:
293            fext = fext[1:]
294        else:
295            fext = ''
296    return fext.lower()
297