1# Copyright (C) 2002-2018 by the Free Software Foundation, Inc. 2# 3# This program is free software; you can redistribute it and/or 4# modify it under the terms of the GNU General Public License 5# as published by the Free Software Foundation; either version 2 6# of the License, or (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 16# USA. 17 18"""MIME-stripping filter for Mailman. 19 20This module scans a message for MIME content, removing those sections whose 21MIME types match one of a list of matches. multipart/alternative sections are 22replaced by the first non-empty component, and multipart/mixed sections 23wrapping only single sections after other processing are replaced by their 24contents. 25""" 26 27import os 28import errno 29import tempfile 30from os.path import splitext 31 32from email.Iterators import typed_subpart_iterator 33 34from Mailman import mm_cfg 35from Mailman import Errors 36from Mailman.Message import UserNotification 37from Mailman.Queue.sbcache import get_switchboard 38from Mailman.Logging.Syslog import syslog 39from Mailman.Version import VERSION 40from Mailman.i18n import _ 41from Mailman.Utils import oneline 42 43 44 45def process(mlist, msg, msgdata): 46 # Short-circuits 47 if not mlist.filter_content: 48 return 49 if msgdata.get('isdigest'): 50 return 51 # We also don't care about our own digests or plaintext 52 ctype = msg.get_content_type() 53 mtype = msg.get_content_maintype() 54 # Check to see if the outer type matches one of the filter types 55 filtertypes = mlist.filter_mime_types 56 passtypes = mlist.pass_mime_types 57 if ctype in filtertypes or mtype in filtertypes: 58 dispose(mlist, msg, msgdata, 59 _("The message's content type was explicitly disallowed")) 60 # Check to see if there is a pass types and the outer type doesn't match 61 # one of these types 62 if passtypes and not (ctype in passtypes or mtype in passtypes): 63 dispose(mlist, msg, msgdata, 64 _("The message's content type was not explicitly allowed")) 65 # Filter by file extensions 66 filterexts = mlist.filter_filename_extensions 67 passexts = mlist.pass_filename_extensions 68 fext = get_file_ext(msg) 69 if fext: 70 if fext in filterexts: 71 dispose(mlist, msg, msgdata, 72 _("The message's file extension was explicitly disallowed")) 73 if passexts and not (fext in passexts): 74 dispose(mlist, msg, msgdata, 75 _("The message's file extension was not explicitly allowed")) 76 numparts = len([subpart for subpart in msg.walk()]) 77 # If the message is a multipart, filter out matching subparts 78 if msg.is_multipart(): 79 # Recursively filter out any subparts that match the filter list 80 prelen = len(msg.get_payload()) 81 filter_parts(msg, filtertypes, passtypes, filterexts, passexts) 82 # If the outer message is now an empty multipart (and it wasn't 83 # before!) then, again it gets discarded. 84 postlen = len(msg.get_payload()) 85 if postlen == 0 and prelen > 0: 86 dispose(mlist, msg, msgdata, 87 _("After content filtering, the message was empty")) 88 # Now replace all multipart/alternatives with just the first non-empty 89 # alternative. BAW: We have to special case when the outer part is a 90 # multipart/alternative because we need to retain most of the outer part's 91 # headers. For now we'll move the subpart's payload into the outer part, 92 # and then copy over its Content-Type: and Content-Transfer-Encoding: 93 # headers (any others?). 94 if mlist.collapse_alternatives: 95 collapse_multipart_alternatives(msg) 96 if ctype == 'multipart/alternative': 97 firstalt = msg.get_payload(0) 98 reset_payload(msg, firstalt) 99 # Now that we've collapsed the MPA parts, go through the message 100 # and recast any multipart parts with only one sub-part as just 101 # the sub-part. 102 if msg.is_multipart(): 103 recast_multipart(msg) 104 # If we removed some parts, make note of this 105 changedp = 0 106 if numparts <> len([subpart for subpart in msg.walk()]): 107 changedp = 1 108 # Now perhaps convert all text/html to text/plain 109 if mlist.convert_html_to_plaintext and mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND: 110 changedp += to_plaintext(msg) 111 # If we're left with only two parts, an empty body and one attachment, 112 # recast the message to one of just that part 113 if msg.is_multipart() and len(msg.get_payload()) == 2: 114 if msg.get_payload(0).get_payload() == '': 115 useful = msg.get_payload(1) 116 reset_payload(msg, useful) 117 changedp = 1 118 if changedp: 119 msg['X-Content-Filtered-By'] = 'Mailman/MimeDel %s' % VERSION 120 121 122 123def reset_payload(msg, subpart): 124 # Reset payload of msg to contents of subpart, and fix up content headers 125 payload = subpart.get_payload() 126 msg.set_payload(payload) 127 del msg['content-type'] 128 del msg['content-transfer-encoding'] 129 del msg['content-disposition'] 130 del msg['content-description'] 131 msg['Content-Type'] = subpart.get('content-type', 'text/plain') 132 cte = subpart.get('content-transfer-encoding') 133 if cte: 134 msg['Content-Transfer-Encoding'] = cte 135 cdisp = subpart.get('content-disposition') 136 if cdisp: 137 msg['Content-Disposition'] = cdisp 138 cdesc = subpart.get('content-description') 139 if cdesc: 140 msg['Content-Description'] = cdesc 141 142 143 144def filter_parts(msg, filtertypes, passtypes, filterexts, passexts): 145 # Look at all the message's subparts, and recursively filter 146 if not msg.is_multipart(): 147 return 1 148 payload = msg.get_payload() 149 prelen = len(payload) 150 newpayload = [] 151 for subpart in payload: 152 keep = filter_parts(subpart, filtertypes, passtypes, 153 filterexts, passexts) 154 if not keep: 155 continue 156 ctype = subpart.get_content_type() 157 mtype = subpart.get_content_maintype() 158 if ctype in filtertypes or mtype in filtertypes: 159 # Throw this subpart away 160 continue 161 if passtypes and not (ctype in passtypes or mtype in passtypes): 162 # Throw this subpart away 163 continue 164 # check file extension 165 fext = get_file_ext(subpart) 166 if fext: 167 if fext in filterexts: 168 continue 169 if passexts and not (fext in passexts): 170 continue 171 newpayload.append(subpart) 172 # Check to see if we discarded all the subparts 173 postlen = len(newpayload) 174 msg.set_payload(newpayload) 175 if postlen == 0 and prelen > 0: 176 # We threw away everything 177 return 0 178 return 1 179 180 181 182def collapse_multipart_alternatives(msg): 183 if not msg.is_multipart(): 184 return 185 newpayload = [] 186 for subpart in msg.get_payload(): 187 if subpart.get_content_type() == 'multipart/alternative': 188 try: 189 firstalt = subpart.get_payload(0) 190 if msg.get_content_type() == 'message/rfc822': 191 # This is a multipart/alternative message in a 192 # message/rfc822 part. We treat it specially so as not to 193 # lose the headers. 194 reset_payload(subpart, firstalt) 195 newpayload.append(subpart) 196 else: 197 newpayload.append(firstalt) 198 except (IndexError, TypeError): 199 pass 200 elif subpart.is_multipart(): 201 collapse_multipart_alternatives(subpart) 202 newpayload.append(subpart) 203 else: 204 newpayload.append(subpart) 205 msg.set_payload(newpayload) 206 207 208 209def recast_multipart(msg): 210 # If we're left with a multipart message with only one sub-part, recast 211 # the message to just the sub-part, but not if the part is message/rfc822 212 # because we don't want to lose the headers. 213 # Also, if this is a multipart/signed part, stop now as the original part 214 # may have had a multipart sub-part with only one sub-sub-part, the sig 215 # may still be valid and going further may break it. (LP: #1551075) 216 if msg.get_content_type() == 'multipart/signed': 217 return 218 if msg.is_multipart(): 219 if (len(msg.get_payload()) == 1 and 220 msg.get_content_type() <> 'message/rfc822'): 221 reset_payload(msg, msg.get_payload(0)) 222 # now that we've recast this part, check the subordinate parts 223 recast_multipart(msg) 224 else: 225 # This part's OK but check deeper. 226 for part in msg.get_payload(): 227 recast_multipart(part) 228 229 230 231def to_plaintext(msg): 232 changedp = 0 233 for subpart in typed_subpart_iterator(msg, 'text', 'html'): 234 filename = tempfile.mktemp('.html') 235 fp = open(filename, 'w') 236 try: 237 fp.write(subpart.get_payload(decode=1)) 238 fp.close() 239 cmd = os.popen(mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND % 240 {'filename': filename}) 241 plaintext = cmd.read() 242 rtn = cmd.close() 243 if rtn: 244 syslog('error', 'HTML->text/plain error: %s', rtn) 245 finally: 246 try: 247 os.unlink(filename) 248 except OSError, e: 249 if e.errno <> errno.ENOENT: raise 250 # Now replace the payload of the subpart and twiddle the Content-Type: 251 del subpart['content-transfer-encoding'] 252 subpart.set_payload(plaintext) 253 subpart.set_type('text/plain') 254 changedp = 1 255 return changedp 256 257 258 259def dispose(mlist, msg, msgdata, why): 260 # filter_action == 0 just discards, see below 261 if mlist.filter_action == 1: 262 # Bounce the message to the original author 263 raise Errors.RejectMessage, why 264 if mlist.filter_action == 2: 265 # Forward it on to the list owner 266 listname = mlist.internal_name() 267 mlist.ForwardMessage( 268 msg, 269 text=_("""\ 270The attached message matched the %(listname)s mailing list's content filtering 271rules and was prevented from being forwarded on to the list membership. You 272are receiving the only remaining copy of the discarded message. 273 274"""), 275 subject=_('Content filtered message notification')) 276 if mlist.filter_action == 3 and \ 277 mm_cfg.OWNERS_CAN_PRESERVE_FILTERED_MESSAGES: 278 badq = get_switchboard(mm_cfg.BADQUEUE_DIR) 279 badq.enqueue(msg, msgdata) 280 # Most cases also discard the message 281 raise Errors.DiscardMessage 282 283def get_file_ext(m): 284 """ 285 Get filename extension. Caution: some virus don't put filename 286 in 'Content-Disposition' header. 287""" 288 fext = '' 289 filename = m.get_filename('') or m.get_param('name', '') 290 if filename: 291 fext = splitext(oneline(filename,'utf-8'))[1] 292 if len(fext) > 1: 293 fext = fext[1:] 294 else: 295 fext = '' 296 return fext.lower() 297