1# Copyright (C) 1998-2018 by the Free Software Foundation, Inc. 2# 3# This program is free software; you can redistribute it and/or 4# modify it under the terms of the GNU General Public License 5# as published by the Free Software Foundation; either version 2 6# of the License, or (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 16# USA. 17 18"""Cook a message's Subject header. 19Also do other manipulations of From:, Reply-To: and Cc: depending on 20list configuration. 21""" 22 23from __future__ import nested_scopes 24import re 25from types import UnicodeType 26 27from email.Charset import Charset 28from email.Header import Header, decode_header, make_header 29from email.Utils import parseaddr, formataddr, getaddresses 30from email.Errors import HeaderParseError 31 32from Mailman import i18n 33from Mailman import mm_cfg 34from Mailman import Utils 35from Mailman.i18n import _ 36from Mailman.Logging.Syslog import syslog 37 38CONTINUATION = ',\n ' 39COMMASPACE = ', ' 40MAXLINELEN = 78 41 42# True/False 43try: 44 True, False 45except NameError: 46 True = 1 47 False = 0 48 49 50 51def _isunicode(s): 52 return isinstance(s, UnicodeType) 53 54nonascii = re.compile('[^\s!-~]') 55 56def uheader(mlist, s, header_name=None, continuation_ws=' ', maxlinelen=None): 57 # Get the charset to encode the string in. Then search if there is any 58 # non-ascii character is in the string. If there is and the charset is 59 # us-ascii then we use iso-8859-1 instead. If the string is ascii only 60 # we use 'us-ascii' if another charset is specified. 61 charset = Utils.GetCharSet(mlist.preferred_language) 62 if nonascii.search(s): 63 # use list charset but ... 64 if charset == 'us-ascii': 65 charset = 'iso-8859-1' 66 else: 67 # there is no nonascii so ... 68 charset = 'us-ascii' 69 try: 70 return Header(s, charset, maxlinelen, header_name, continuation_ws) 71 except UnicodeError: 72 syslog('error', 'list: %s: can\'t decode "%s" as %s', 73 mlist.internal_name(), s, charset) 74 return Header('', charset, maxlinelen, header_name, continuation_ws) 75 76def change_header(name, value, mlist, msg, msgdata, delete=True, repl=True): 77 if ((msgdata.get('from_is_list') == 2 or 78 (msgdata.get('from_is_list') == 0 and mlist.from_is_list == 2)) and 79 not msgdata.get('_fasttrack') 80 ) or name.lower() in ('from', 'reply-to', 'cc'): 81 # The or name.lower() in ... above is because when we are munging 82 # the From:, we want to defer the resultant changes to From:, 83 # Reply-To:, and/or Cc: until after the message passes through 84 # ToDigest, ToArchive and ToUsenet. Thus, we put them in 85 # msgdata[add_header] here and apply them in WrapMessage. 86 msgdata.setdefault('add_header', {})[name] = value 87 elif repl or not msg.has_key(name): 88 if delete: 89 del msg[name] 90 msg[name] = value 91 92 93 94def process(mlist, msg, msgdata): 95 # Set the "X-Ack: no" header if noack flag is set. 96 if msgdata.get('noack'): 97 change_header('X-Ack', 'no', mlist, msg, msgdata) 98 # Because we're going to modify various important headers in the email 99 # message, we want to save some of the information in the msgdata 100 # dictionary for later. Specifically, the sender header will get waxed, 101 # but we need it for the Acknowledge module later. 102 # We may have already saved it; if so, don't clobber it here. 103 if 'original_sender' not in msgdata: 104 msgdata['original_sender'] = msg.get_sender() 105 # VirginRunner sets _fasttrack for internally crafted messages. 106 fasttrack = msgdata.get('_fasttrack') 107 if not msgdata.get('isdigest') and not fasttrack: 108 try: 109 prefix_subject(mlist, msg, msgdata) 110 except (UnicodeError, ValueError): 111 # TK: Sometimes subject header is not MIME encoded for 8bit 112 # simply abort prefixing. 113 pass 114 # Mark message so we know we've been here, but leave any existing 115 # X-BeenThere's intact. 116 change_header('X-BeenThere', mlist.GetListEmail(), 117 mlist, msg, msgdata, delete=False) 118 # Add Precedence: and other useful headers. None of these are standard 119 # and finding information on some of them are fairly difficult. Some are 120 # just common practice, and we'll add more here as they become necessary. 121 # Good places to look are: 122 # 123 # http://www.dsv.su.se/~jpalme/ietf/jp-ietf-home.html 124 # http://www.faqs.org/rfcs/rfc2076.html 125 # 126 # None of these headers are added if they already exist. BAW: some 127 # consider the advertising of this a security breach. I.e. if there are 128 # known exploits in a particular version of Mailman and we know a site is 129 # using such an old version, they may be vulnerable. It's too easy to 130 # edit the code to add a configuration variable to handle this. 131 change_header('X-Mailman-Version', mm_cfg.VERSION, 132 mlist, msg, msgdata, repl=False) 133 # We set "Precedence: list" because this is the recommendation from the 134 # sendmail docs, the most authoritative source of this header's semantics. 135 change_header('Precedence', 'list', 136 mlist, msg, msgdata, repl=False) 137 # Do we change the from so the list takes ownership of the email 138 if (msgdata.get('from_is_list') or mlist.from_is_list) and not fasttrack: 139 # Be as robust as possible here. 140 faddrs = getaddresses(msg.get_all('from', [])) 141 # Strip the nulls and bad emails. 142 faddrs = [x for x in faddrs if x[1].find('@') > 0] 143 if len(faddrs) == 1: 144 realname, email = o_from = faddrs[0] 145 else: 146 # No From: or multiple addresses. Just punt and take 147 # the get_sender result. 148 realname = '' 149 email = msgdata['original_sender'] 150 o_from = (realname, email) 151 if not realname: 152 if mlist.isMember(email): 153 realname = mlist.getMemberName(email) or email 154 else: 155 realname = email 156 # Remove domain from realname if it looks like an email address 157 realname = re.sub(r'@([^ .]+\.)+[^ .]+$', '---', realname) 158 # Make a display name and RFC 2047 encode it if necessary. This is 159 # difficult and kludgy. If the realname came from From: it should be 160 # ascii or RFC 2047 encoded. If it came from the list, it should be 161 # in the charset of the list's preferred language or possibly unicode. 162 # if it's from the email address, it should be ascii. In any case, 163 # make it a unicode. 164 if isinstance(realname, unicode): 165 urn = realname 166 else: 167 rn, cs = ch_oneline(realname) 168 urn = unicode(rn, cs, errors='replace') 169 # likewise, the list's real_name which should be ascii, but use the 170 # charset of the list's preferred_language which should be a superset. 171 lcs = Utils.GetCharSet(mlist.preferred_language) 172 ulrn = unicode(mlist.real_name, lcs, errors='replace') 173 # get translated 'via' with dummy replacements 174 realname = '%(realname)s' 175 lrn = '%(lrn)s' 176 # We want the i18n context to be the list's preferred_language. It 177 # could be the poster's. 178 otrans = i18n.get_translation() 179 i18n.set_language(mlist.preferred_language) 180 via = _('%(realname)s via %(lrn)s') 181 i18n.set_translation(otrans) 182 uvia = unicode(via, lcs, errors='replace') 183 # Replace the dummy replacements. 184 uvia = re.sub(u'%\(lrn\)s', ulrn, re.sub(u'%\(realname\)s', urn, uvia)) 185 # And get an RFC 2047 encoded header string. 186 dn = str(Header(uvia, lcs)) 187 change_header('From', 188 formataddr((dn, mlist.GetListEmail())), 189 mlist, msg, msgdata) 190 else: 191 # Use this as a flag 192 o_from = None 193 # Reply-To: munging. Do not do this if the message is "fast tracked", 194 # meaning it is internally crafted and delivered to a specific user. BAW: 195 # Yuck, I really hate this feature but I've caved under the sheer pressure 196 # of the (very vocal) folks want it. OTOH, RFC 2822 allows Reply-To: to 197 # be a list of addresses, so instead of replacing the original, simply 198 # augment it. RFC 2822 allows max one Reply-To: header so collapse them 199 # if we're adding a value, otherwise don't touch it. (Should we collapse 200 # in all cases?) 201 # MAS: We need to do some things with the original From: if we've munged 202 # it for DMARC mitigation. We have goals for this process which are 203 # not completely compatible, so we do the best we can. Our goals are: 204 # 1) as long as the list is not anonymous, the original From: address 205 # should be obviously exposed, i.e. not just in a header that MUAs 206 # don't display. 207 # 2) the original From: address should not be in a comment or display 208 # name in the new From: because it is claimed that multiple domains 209 # in any fields in From: are indicative of spamminess. This means 210 # it should be in Reply-To: or Cc:. 211 # 3) the behavior of an MUA doing a 'reply' or 'reply all' should be 212 # consistent regardless of whether or not the From: is munged. 213 # Goal 3) implies sometimes the original From: should be in Reply-To: 214 # and sometimes in Cc:, and even so, this goal won't be achieved in 215 # all cases with all MUAs. In cases of conflict, the above ordering of 216 # goals is priority order. 217 218 if not fasttrack: 219 # A convenience function, requires nested scopes. pair is (name, addr) 220 new = [] 221 d = {} 222 def add(pair): 223 lcaddr = pair[1].lower() 224 if d.has_key(lcaddr): 225 return 226 d[lcaddr] = pair 227 new.append(pair) 228 # List admin wants an explicit Reply-To: added 229 if mlist.reply_goes_to_list == 2: 230 add(parseaddr(mlist.reply_to_address)) 231 # If we're not first stripping existing Reply-To: then we need to add 232 # the original Reply-To:'s to the list we're building up. In both 233 # cases we'll zap the existing field because RFC 2822 says max one is 234 # allowed. 235 o_rt = False 236 if not mlist.first_strip_reply_to: 237 orig = msg.get_all('reply-to', []) 238 for pair in getaddresses(orig): 239 # There's an original Reply-To: and we're not removing it. 240 add(pair) 241 o_rt = True 242 # We also need to put the old From: in Reply-To: in all cases where 243 # it is not going in Cc:. This is when reply_goes_to_list == 0 and 244 # either there was no original Reply-To: or we stripped it. 245 # However, if there was an original Reply-To:, unstripped, and it 246 # contained the original From: address we need to flag that it's 247 # there so we don't add the original From: to Cc: 248 if o_from and mlist.reply_goes_to_list == 0: 249 if o_rt: 250 if d.has_key(o_from[1].lower()): 251 # Original From: address is in original Reply-To:. 252 # Pretend we added it. 253 o_from = None 254 else: 255 add(o_from) 256 # Flag that we added it. 257 o_from = None 258 # Set Reply-To: header to point back to this list. Add this last 259 # because some folks think that some MUAs make it easier to delete 260 # addresses from the right than from the left. 261 if mlist.reply_goes_to_list == 1: 262 i18ndesc = uheader(mlist, mlist.description, 'Reply-To') 263 add((str(i18ndesc), mlist.GetListEmail())) 264 # Don't put Reply-To: back if there's nothing to add! 265 if new: 266 # Preserve order 267 change_header('Reply-To', 268 COMMASPACE.join([formataddr(pair) for pair in new]), 269 mlist, msg, msgdata) 270 else: 271 del msg['reply-to'] 272 # The To field normally contains the list posting address. However 273 # when messages are fully personalized, that header will get 274 # overwritten with the address of the recipient. We need to get the 275 # posting address in one of the recipient headers or they won't be 276 # able to reply back to the list. It's possible the posting address 277 # was munged into the Reply-To header, but if not, we'll add it to a 278 # Cc header. BAW: should we force it into a Reply-To header in the 279 # above code? 280 # Also skip Cc if this is an anonymous list as list posting address 281 # is already in From and Reply-To in this case. 282 # We do add the Cc in cases where From: header munging is being done 283 # because even though the list address is in From:, the Reply-To: 284 # poster will override it. Brain dead MUAs may then address the list 285 # twice on a 'reply all', but reasonable MUAs should do the right 286 # thing. We also add the original From: to Cc: if it wasn't added 287 # to Reply-To: 288 add_list = (mlist.personalize == 2 and 289 mlist.reply_goes_to_list <> 1 and 290 not mlist.anonymous_list) 291 if add_list or o_from: 292 # Watch out for existing Cc headers, merge, and remove dups. Note 293 # that RFC 2822 says only zero or one Cc header is allowed. 294 new = [] 295 d = {} 296 # If we're adding the original From:, add it first. 297 if o_from: 298 add(o_from) 299 # AvoidDuplicates may have set a new Cc: in msgdata.add_header, 300 # so check that. 301 if (msgdata.has_key('add_header') and 302 msgdata['add_header'].has_key('Cc')): 303 for pair in getaddresses([msgdata['add_header']['Cc']]): 304 add(pair) 305 else: 306 for pair in getaddresses(msg.get_all('cc', [])): 307 add(pair) 308 if add_list: 309 i18ndesc = uheader(mlist, mlist.description, 'Cc') 310 add((str(i18ndesc), mlist.GetListEmail())) 311 change_header('Cc', 312 COMMASPACE.join([formataddr(pair) for pair in new]), 313 mlist, msg, msgdata) 314 # Add list-specific headers as defined in RFC 2369 and RFC 2919, but only 315 # if the message is being crafted for a specific list (e.g. not for the 316 # password reminders). 317 # 318 # BAW: Some people really hate the List-* headers. It seems that the free 319 # version of Eudora (possibly on for some platforms) does not hide these 320 # headers by default, pissing off their users. Too bad. Fix the MUAs. 321 if msgdata.get('_nolist') or not mlist.include_rfc2369_headers: 322 return 323 # This will act like an email address for purposes of formataddr() 324 listid = '%s.%s' % (mlist.internal_name(), mlist.host_name) 325 cset = Utils.GetCharSet(mlist.preferred_language) 326 if mlist.description: 327 # Don't wrap the header since here we just want to get it properly RFC 328 # 2047 encoded. 329 i18ndesc = uheader(mlist, mlist.description, 'List-Id', maxlinelen=998) 330 listid_h = formataddr((str(i18ndesc), listid)) 331 # With some charsets (utf-8?) and some invalid chars, str(18ndesc) can 332 # be empty. 333 if str(i18ndesc): 334 listid_h = formataddr((str(i18ndesc), listid)) 335 else: 336 listid_h = '<%s>' % listid 337 else: 338 # without desc we need to ensure the MUST brackets 339 listid_h = '<%s>' % listid 340 # We always add a List-ID: header. 341 change_header('List-Id', listid_h, mlist, msg, msgdata) 342 # For internally crafted messages, we also add a (nonstandard), 343 # "X-List-Administrivia: yes" header. For all others (i.e. those coming 344 # from list posts), we add a bunch of other RFC 2369 headers. 345 requestaddr = mlist.GetRequestEmail() 346 subfieldfmt = '<%s>, <mailto:%s?subject=%ssubscribe>' 347 listinfo = mlist.GetScriptURL('listinfo', absolute=1) 348 useropts = mlist.GetScriptURL('options', absolute=1) 349 headers = {} 350 if msgdata.get('reduced_list_headers'): 351 headers['X-List-Administrivia'] = 'yes' 352 else: 353 headers.update({ 354 'List-Help' : '<mailto:%s?subject=help>' % requestaddr, 355 'List-Unsubscribe': subfieldfmt % (useropts, requestaddr, 'un'), 356 'List-Subscribe' : subfieldfmt % (listinfo, requestaddr, ''), 357 }) 358 # List-Post: is controlled by a separate attribute 359 if mlist.include_list_post_header: 360 headers['List-Post'] = '<mailto:%s>' % mlist.GetListEmail() 361 # Add this header if we're archiving 362 if mlist.archive: 363 archiveurl = mlist.GetBaseArchiveURL() 364 headers['List-Archive'] = '<%s>' % archiveurl 365 # First we delete any pre-existing headers because the RFC permits only 366 # one copy of each, and we want to be sure it's ours. 367 for h, v in headers.items(): 368 # Wrap these lines if they are too long. 78 character width probably 369 # shouldn't be hardcoded, but is at least text-MUA friendly. The 370 # adding of 2 is for the colon-space separator. 371 if len(h) + 2 + len(v) > 78: 372 v = CONTINUATION.join(v.split(', ')) 373 change_header(h, v, mlist, msg, msgdata) 374 375 376 377def prefix_subject(mlist, msg, msgdata): 378 # Add the subject prefix unless the message is a digest or is being fast 379 # tracked (e.g. internally crafted, delivered to a single user such as the 380 # list admin). 381 prefix = mlist.subject_prefix.strip() 382 if not prefix: 383 return 384 subject = msg.get('subject', '') 385 # Try to figure out what the continuation_ws is for the header 386 if isinstance(subject, Header): 387 lines = str(subject).splitlines() 388 else: 389 lines = subject.splitlines() 390 ws = ' ' 391 if len(lines) > 1 and lines[1] and lines[1][0] in ' \t': 392 ws = lines[1][0] 393 msgdata['origsubj'] = subject 394 # The subject may be multilingual but we take the first charset as major 395 # one and try to decode. If it is decodable, returned subject is in one 396 # line and cset is properly set. If fail, subject is mime-encoded and 397 # cset is set as us-ascii. See detail for ch_oneline() (CookHeaders one 398 # line function). 399 subject, cset = ch_oneline(subject) 400 # TK: Python interpreter has evolved to be strict on ascii charset code 401 # range. It is safe to use unicode string when manupilating header 402 # contents with re module. It would be best to return unicode in 403 # ch_oneline() but here is temporary solution. 404 subject = unicode(subject, cset) 405 # If the subject_prefix contains '%d', it is replaced with the 406 # mailing list sequential number. Sequential number format allows 407 # '%d' or '%05d' like pattern. 408 prefix_pattern = re.escape(prefix) 409 # unescape '%' :-< 410 prefix_pattern = '%'.join(prefix_pattern.split(r'\%')) 411 p = re.compile('%\d*d') 412 if p.search(prefix, 1): 413 # prefix have number, so we should search prefix w/number in subject. 414 # Also, force new style. 415 prefix_pattern = p.sub(r'\s*\d+\s*', prefix_pattern) 416 old_style = False 417 else: 418 old_style = mm_cfg.OLD_STYLE_PREFIXING 419 subject = re.sub(prefix_pattern, '', subject) 420 # Previously the following re didn't have the first \s*. It would fail 421 # if the incoming Subject: was like '[prefix] Re: Re: Re:' because of the 422 # leading space after stripping the prefix. It is not known what MUA would 423 # create such a Subject:, but the issue was reported. 424 rematch = re.match( 425 '(\s*(RE|AW|SV|VS)\s*(\[\d+\])?\s*:\s*)+', 426 subject, re.I) 427 if rematch: 428 subject = subject[rematch.end():] 429 recolon = 'Re:' 430 else: 431 recolon = '' 432 # Strip leading and trailing whitespace from subject. 433 subject = subject.strip() 434 # At this point, subject may become null if someone post mail with 435 # Subject: [subject prefix] 436 if subject == '': 437 # We want the i18n context to be the list's preferred_language. It 438 # could be the poster's. 439 otrans = i18n.get_translation() 440 i18n.set_language(mlist.preferred_language) 441 subject = _('(no subject)') 442 i18n.set_translation(otrans) 443 cset = Utils.GetCharSet(mlist.preferred_language) 444 subject = unicode(subject, cset) 445 # and substitute %d in prefix with post_id 446 try: 447 prefix = prefix % mlist.post_id 448 except TypeError: 449 pass 450 # If charset is 'us-ascii', try to concatnate as string because there 451 # is some weirdness in Header module (TK) 452 if cset == 'us-ascii': 453 try: 454 if old_style: 455 h = u' '.join([recolon, prefix, subject]) 456 else: 457 if recolon: 458 h = u' '.join([prefix, recolon, subject]) 459 else: 460 h = u' '.join([prefix, subject]) 461 h = h.encode('us-ascii') 462 h = uheader(mlist, h, 'Subject', continuation_ws=ws) 463 change_header('Subject', h, mlist, msg, msgdata) 464 ss = u' '.join([recolon, subject]) 465 ss = ss.encode('us-ascii') 466 ss = uheader(mlist, ss, 'Subject', continuation_ws=ws) 467 msgdata['stripped_subject'] = ss 468 return 469 except UnicodeError: 470 pass 471 # Get the header as a Header instance, with proper unicode conversion 472 # Because of rfc2047 encoding, spaces between encoded words can be 473 # insignificant, so we need to append spaces to our encoded stuff. 474 prefix += ' ' 475 if recolon: 476 recolon += ' ' 477 if old_style: 478 h = uheader(mlist, recolon, 'Subject', continuation_ws=ws) 479 h.append(prefix) 480 else: 481 h = uheader(mlist, prefix, 'Subject', continuation_ws=ws) 482 h.append(recolon) 483 # TK: Subject is concatenated and unicode string. 484 subject = subject.encode(cset, 'replace') 485 h.append(subject, cset) 486 change_header('Subject', h, mlist, msg, msgdata) 487 ss = uheader(mlist, recolon, 'Subject', continuation_ws=ws) 488 ss.append(subject, cset) 489 msgdata['stripped_subject'] = ss 490 491 492 493def ch_oneline(headerstr): 494 # Decode header string in one line and convert into single charset 495 # copied and modified from ToDigest.py and Utils.py 496 # return (string, cset) tuple as check for failure 497 try: 498 d = decode_header(headerstr) 499 # at this point, we should rstrip() every string because some 500 # MUA deliberately add trailing spaces when composing return 501 # message. 502 d = [(s.rstrip(), c) for (s,c) in d] 503 cset = 'us-ascii' 504 for x in d: 505 # search for no-None charset 506 if x[1]: 507 cset = x[1] 508 break 509 h = make_header(d) 510 ustr = h.__unicode__() 511 oneline = u''.join(ustr.splitlines()) 512 return oneline.encode(cset, 'replace'), cset 513 except (LookupError, UnicodeError, ValueError, HeaderParseError): 514 # possibly charset problem. return with undecoded string in one line. 515 return ''.join(headerstr.splitlines()), 'us-ascii' 516