1# Copyright (C) 2001-2010 Python Software Foundation 2# Contact: email-sig@python.org 3# email package unit tests 4 5import re 6import time 7import base64 8import unittest 9import textwrap 10 11from io import StringIO, BytesIO 12from itertools import chain 13from random import choice 14from threading import Thread 15from unittest.mock import patch 16 17import email 18import email.policy 19 20from email.charset import Charset 21from email.header import Header, decode_header, make_header 22from email.parser import Parser, HeaderParser 23from email.generator import Generator, DecodedGenerator, BytesGenerator 24from email.message import Message 25from email.mime.application import MIMEApplication 26from email.mime.audio import MIMEAudio 27from email.mime.text import MIMEText 28from email.mime.image import MIMEImage 29from email.mime.base import MIMEBase 30from email.mime.message import MIMEMessage 31from email.mime.multipart import MIMEMultipart 32from email.mime.nonmultipart import MIMENonMultipart 33from email import utils 34from email import errors 35from email import encoders 36from email import iterators 37from email import base64mime 38from email import quoprimime 39 40from test.support import unlink, start_threads 41from test.test_email import openfile, TestEmailBase 42 43# These imports are documented to work, but we are testing them using a 44# different path, so we import them here just to make sure they are importable. 45from email.parser import FeedParser, BytesFeedParser 46 47NL = '\n' 48EMPTYSTRING = '' 49SPACE = ' ' 50 51 52# Test various aspects of the Message class's API 53class TestMessageAPI(TestEmailBase): 54 def test_get_all(self): 55 eq = self.assertEqual 56 msg = self._msgobj('msg_20.txt') 57 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) 58 eq(msg.get_all('xx', 'n/a'), 'n/a') 59 60 def test_getset_charset(self): 61 eq = self.assertEqual 62 msg = Message() 63 eq(msg.get_charset(), None) 64 charset = Charset('iso-8859-1') 65 msg.set_charset(charset) 66 eq(msg['mime-version'], '1.0') 67 eq(msg.get_content_type(), 'text/plain') 68 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') 69 eq(msg.get_param('charset'), 'iso-8859-1') 70 eq(msg['content-transfer-encoding'], 'quoted-printable') 71 eq(msg.get_charset().input_charset, 'iso-8859-1') 72 # Remove the charset 73 msg.set_charset(None) 74 eq(msg.get_charset(), None) 75 eq(msg['content-type'], 'text/plain') 76 # Try adding a charset when there's already MIME headers present 77 msg = Message() 78 msg['MIME-Version'] = '2.0' 79 msg['Content-Type'] = 'text/x-weird' 80 msg['Content-Transfer-Encoding'] = 'quinted-puntable' 81 msg.set_charset(charset) 82 eq(msg['mime-version'], '2.0') 83 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') 84 eq(msg['content-transfer-encoding'], 'quinted-puntable') 85 86 def test_set_charset_from_string(self): 87 eq = self.assertEqual 88 msg = Message() 89 msg.set_charset('us-ascii') 90 eq(msg.get_charset().input_charset, 'us-ascii') 91 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 92 93 def test_set_payload_with_charset(self): 94 msg = Message() 95 charset = Charset('iso-8859-1') 96 msg.set_payload('This is a string payload', charset) 97 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') 98 99 def test_set_payload_with_8bit_data_and_charset(self): 100 data = b'\xd0\x90\xd0\x91\xd0\x92' 101 charset = Charset('utf-8') 102 msg = Message() 103 msg.set_payload(data, charset) 104 self.assertEqual(msg['content-transfer-encoding'], 'base64') 105 self.assertEqual(msg.get_payload(decode=True), data) 106 self.assertEqual(msg.get_payload(), '0JDQkdCS\n') 107 108 def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self): 109 data = b'\xd0\x90\xd0\x91\xd0\x92' 110 charset = Charset('utf-8') 111 charset.body_encoding = None # Disable base64 encoding 112 msg = Message() 113 msg.set_payload(data.decode('utf-8'), charset) 114 self.assertEqual(msg['content-transfer-encoding'], '8bit') 115 self.assertEqual(msg.get_payload(decode=True), data) 116 117 def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self): 118 data = b'\xd0\x90\xd0\x91\xd0\x92' 119 charset = Charset('utf-8') 120 charset.body_encoding = None # Disable base64 encoding 121 msg = Message() 122 msg.set_payload(data, charset) 123 self.assertEqual(msg['content-transfer-encoding'], '8bit') 124 self.assertEqual(msg.get_payload(decode=True), data) 125 126 def test_set_payload_to_list(self): 127 msg = Message() 128 msg.set_payload([]) 129 self.assertEqual(msg.get_payload(), []) 130 131 def test_attach_when_payload_is_string(self): 132 msg = Message() 133 msg['Content-Type'] = 'multipart/mixed' 134 msg.set_payload('string payload') 135 sub_msg = MIMEMessage(Message()) 136 self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart", 137 msg.attach, sub_msg) 138 139 def test_get_charsets(self): 140 eq = self.assertEqual 141 142 msg = self._msgobj('msg_08.txt') 143 charsets = msg.get_charsets() 144 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) 145 146 msg = self._msgobj('msg_09.txt') 147 charsets = msg.get_charsets('dingbat') 148 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', 149 'koi8-r']) 150 151 msg = self._msgobj('msg_12.txt') 152 charsets = msg.get_charsets() 153 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', 154 'iso-8859-3', 'us-ascii', 'koi8-r']) 155 156 def test_get_filename(self): 157 eq = self.assertEqual 158 159 msg = self._msgobj('msg_04.txt') 160 filenames = [p.get_filename() for p in msg.get_payload()] 161 eq(filenames, ['msg.txt', 'msg.txt']) 162 163 msg = self._msgobj('msg_07.txt') 164 subpart = msg.get_payload(1) 165 eq(subpart.get_filename(), 'dingusfish.gif') 166 167 def test_get_filename_with_name_parameter(self): 168 eq = self.assertEqual 169 170 msg = self._msgobj('msg_44.txt') 171 filenames = [p.get_filename() for p in msg.get_payload()] 172 eq(filenames, ['msg.txt', 'msg.txt']) 173 174 def test_get_boundary(self): 175 eq = self.assertEqual 176 msg = self._msgobj('msg_07.txt') 177 # No quotes! 178 eq(msg.get_boundary(), 'BOUNDARY') 179 180 def test_set_boundary(self): 181 eq = self.assertEqual 182 # This one has no existing boundary parameter, but the Content-Type: 183 # header appears fifth. 184 msg = self._msgobj('msg_01.txt') 185 msg.set_boundary('BOUNDARY') 186 header, value = msg.items()[4] 187 eq(header.lower(), 'content-type') 188 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') 189 # This one has a Content-Type: header, with a boundary, stuck in the 190 # middle of its headers. Make sure the order is preserved; it should 191 # be fifth. 192 msg = self._msgobj('msg_04.txt') 193 msg.set_boundary('BOUNDARY') 194 header, value = msg.items()[4] 195 eq(header.lower(), 'content-type') 196 eq(value, 'multipart/mixed; boundary="BOUNDARY"') 197 # And this one has no Content-Type: header at all. 198 msg = self._msgobj('msg_03.txt') 199 self.assertRaises(errors.HeaderParseError, 200 msg.set_boundary, 'BOUNDARY') 201 202 def test_make_boundary(self): 203 msg = MIMEMultipart('form-data') 204 # Note that when the boundary gets created is an implementation 205 # detail and might change. 206 self.assertEqual(msg.items()[0][1], 'multipart/form-data') 207 # Trigger creation of boundary 208 msg.as_string() 209 self.assertEqual(msg.items()[0][1][:33], 210 'multipart/form-data; boundary="==') 211 # XXX: there ought to be tests of the uniqueness of the boundary, too. 212 213 def test_message_rfc822_only(self): 214 # Issue 7970: message/rfc822 not in multipart parsed by 215 # HeaderParser caused an exception when flattened. 216 with openfile('msg_46.txt') as fp: 217 msgdata = fp.read() 218 parser = HeaderParser() 219 msg = parser.parsestr(msgdata) 220 out = StringIO() 221 gen = Generator(out, True, 0) 222 gen.flatten(msg, False) 223 self.assertEqual(out.getvalue(), msgdata) 224 225 def test_byte_message_rfc822_only(self): 226 # Make sure new bytes header parser also passes this. 227 with openfile('msg_46.txt') as fp: 228 msgdata = fp.read().encode('ascii') 229 parser = email.parser.BytesHeaderParser() 230 msg = parser.parsebytes(msgdata) 231 out = BytesIO() 232 gen = email.generator.BytesGenerator(out) 233 gen.flatten(msg) 234 self.assertEqual(out.getvalue(), msgdata) 235 236 def test_get_decoded_payload(self): 237 eq = self.assertEqual 238 msg = self._msgobj('msg_10.txt') 239 # The outer message is a multipart 240 eq(msg.get_payload(decode=True), None) 241 # Subpart 1 is 7bit encoded 242 eq(msg.get_payload(0).get_payload(decode=True), 243 b'This is a 7bit encoded message.\n') 244 # Subpart 2 is quopri 245 eq(msg.get_payload(1).get_payload(decode=True), 246 b'\xa1This is a Quoted Printable encoded message!\n') 247 # Subpart 3 is base64 248 eq(msg.get_payload(2).get_payload(decode=True), 249 b'This is a Base64 encoded message.') 250 # Subpart 4 is base64 with a trailing newline, which 251 # used to be stripped (issue 7143). 252 eq(msg.get_payload(3).get_payload(decode=True), 253 b'This is a Base64 encoded message.\n') 254 # Subpart 5 has no Content-Transfer-Encoding: header. 255 eq(msg.get_payload(4).get_payload(decode=True), 256 b'This has no Content-Transfer-Encoding: header.\n') 257 258 def test_get_decoded_uu_payload(self): 259 eq = self.assertEqual 260 msg = Message() 261 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') 262 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 263 msg['content-transfer-encoding'] = cte 264 eq(msg.get_payload(decode=True), b'hello world') 265 # Now try some bogus data 266 msg.set_payload('foo') 267 eq(msg.get_payload(decode=True), b'foo') 268 269 def test_get_payload_n_raises_on_non_multipart(self): 270 msg = Message() 271 self.assertRaises(TypeError, msg.get_payload, 1) 272 273 def test_decoded_generator(self): 274 eq = self.assertEqual 275 msg = self._msgobj('msg_07.txt') 276 with openfile('msg_17.txt') as fp: 277 text = fp.read() 278 s = StringIO() 279 g = DecodedGenerator(s) 280 g.flatten(msg) 281 eq(s.getvalue(), text) 282 283 def test__contains__(self): 284 msg = Message() 285 msg['From'] = 'Me' 286 msg['to'] = 'You' 287 # Check for case insensitivity 288 self.assertIn('from', msg) 289 self.assertIn('From', msg) 290 self.assertIn('FROM', msg) 291 self.assertIn('to', msg) 292 self.assertIn('To', msg) 293 self.assertIn('TO', msg) 294 295 def test_as_string(self): 296 msg = self._msgobj('msg_01.txt') 297 with openfile('msg_01.txt') as fp: 298 text = fp.read() 299 self.assertEqual(text, str(msg)) 300 fullrepr = msg.as_string(unixfrom=True) 301 lines = fullrepr.split('\n') 302 self.assertTrue(lines[0].startswith('From ')) 303 self.assertEqual(text, NL.join(lines[1:])) 304 305 def test_as_string_policy(self): 306 msg = self._msgobj('msg_01.txt') 307 newpolicy = msg.policy.clone(linesep='\r\n') 308 fullrepr = msg.as_string(policy=newpolicy) 309 s = StringIO() 310 g = Generator(s, policy=newpolicy) 311 g.flatten(msg) 312 self.assertEqual(fullrepr, s.getvalue()) 313 314 def test_nonascii_as_string_without_cte(self): 315 m = textwrap.dedent("""\ 316 MIME-Version: 1.0 317 Content-type: text/plain; charset="iso-8859-1" 318 319 Test if non-ascii messages with no Content-Transfer-Encoding set 320 can be as_string'd: 321 Föö bär 322 """) 323 source = m.encode('iso-8859-1') 324 expected = textwrap.dedent("""\ 325 MIME-Version: 1.0 326 Content-type: text/plain; charset="iso-8859-1" 327 Content-Transfer-Encoding: quoted-printable 328 329 Test if non-ascii messages with no Content-Transfer-Encoding set 330 can be as_string'd: 331 F=F6=F6 b=E4r 332 """) 333 msg = email.message_from_bytes(source) 334 self.assertEqual(msg.as_string(), expected) 335 336 def test_nonascii_as_string_without_content_type_and_cte(self): 337 m = textwrap.dedent("""\ 338 MIME-Version: 1.0 339 340 Test if non-ascii messages with no Content-Type nor 341 Content-Transfer-Encoding set can be as_string'd: 342 Föö bär 343 """) 344 source = m.encode('iso-8859-1') 345 expected = source.decode('ascii', 'replace') 346 msg = email.message_from_bytes(source) 347 self.assertEqual(msg.as_string(), expected) 348 349 def test_as_bytes(self): 350 msg = self._msgobj('msg_01.txt') 351 with openfile('msg_01.txt') as fp: 352 data = fp.read().encode('ascii') 353 self.assertEqual(data, bytes(msg)) 354 fullrepr = msg.as_bytes(unixfrom=True) 355 lines = fullrepr.split(b'\n') 356 self.assertTrue(lines[0].startswith(b'From ')) 357 self.assertEqual(data, b'\n'.join(lines[1:])) 358 359 def test_as_bytes_policy(self): 360 msg = self._msgobj('msg_01.txt') 361 newpolicy = msg.policy.clone(linesep='\r\n') 362 fullrepr = msg.as_bytes(policy=newpolicy) 363 s = BytesIO() 364 g = BytesGenerator(s,policy=newpolicy) 365 g.flatten(msg) 366 self.assertEqual(fullrepr, s.getvalue()) 367 368 # test_headerregistry.TestContentTypeHeader.bad_params 369 def test_bad_param(self): 370 msg = email.message_from_string("Content-Type: blarg; baz; boo\n") 371 self.assertEqual(msg.get_param('baz'), '') 372 373 def test_missing_filename(self): 374 msg = email.message_from_string("From: foo\n") 375 self.assertEqual(msg.get_filename(), None) 376 377 def test_bogus_filename(self): 378 msg = email.message_from_string( 379 "Content-Disposition: blarg; filename\n") 380 self.assertEqual(msg.get_filename(), '') 381 382 def test_missing_boundary(self): 383 msg = email.message_from_string("From: foo\n") 384 self.assertEqual(msg.get_boundary(), None) 385 386 def test_get_params(self): 387 eq = self.assertEqual 388 msg = email.message_from_string( 389 'X-Header: foo=one; bar=two; baz=three\n') 390 eq(msg.get_params(header='x-header'), 391 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) 392 msg = email.message_from_string( 393 'X-Header: foo; bar=one; baz=two\n') 394 eq(msg.get_params(header='x-header'), 395 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 396 eq(msg.get_params(), None) 397 msg = email.message_from_string( 398 'X-Header: foo; bar="one"; baz=two\n') 399 eq(msg.get_params(header='x-header'), 400 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 401 402 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals 403 def test_get_param_liberal(self): 404 msg = Message() 405 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' 406 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') 407 408 def test_get_param(self): 409 eq = self.assertEqual 410 msg = email.message_from_string( 411 "X-Header: foo=one; bar=two; baz=three\n") 412 eq(msg.get_param('bar', header='x-header'), 'two') 413 eq(msg.get_param('quuz', header='x-header'), None) 414 eq(msg.get_param('quuz'), None) 415 msg = email.message_from_string( 416 'X-Header: foo; bar="one"; baz=two\n') 417 eq(msg.get_param('foo', header='x-header'), '') 418 eq(msg.get_param('bar', header='x-header'), 'one') 419 eq(msg.get_param('baz', header='x-header'), 'two') 420 # XXX: We are not RFC-2045 compliant! We cannot parse: 421 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' 422 # msg.get_param("weird") 423 # yet. 424 425 # test_headerregistry.TestContentTypeHeader.spaces_around_semis 426 def test_get_param_funky_continuation_lines(self): 427 msg = self._msgobj('msg_22.txt') 428 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') 429 430 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes 431 def test_get_param_with_semis_in_quotes(self): 432 msg = email.message_from_string( 433 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') 434 self.assertEqual(msg.get_param('name'), 'Jim&&Jill') 435 self.assertEqual(msg.get_param('name', unquote=False), 436 '"Jim&&Jill"') 437 438 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value 439 def test_get_param_with_quotes(self): 440 msg = email.message_from_string( 441 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') 442 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 443 msg = email.message_from_string( 444 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") 445 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 446 447 def test_field_containment(self): 448 msg = email.message_from_string('Header: exists') 449 self.assertIn('header', msg) 450 self.assertIn('Header', msg) 451 self.assertIn('HEADER', msg) 452 self.assertNotIn('headerx', msg) 453 454 def test_set_param(self): 455 eq = self.assertEqual 456 msg = Message() 457 msg.set_param('charset', 'iso-2022-jp') 458 eq(msg.get_param('charset'), 'iso-2022-jp') 459 msg.set_param('importance', 'high value') 460 eq(msg.get_param('importance'), 'high value') 461 eq(msg.get_param('importance', unquote=False), '"high value"') 462 eq(msg.get_params(), [('text/plain', ''), 463 ('charset', 'iso-2022-jp'), 464 ('importance', 'high value')]) 465 eq(msg.get_params(unquote=False), [('text/plain', ''), 466 ('charset', '"iso-2022-jp"'), 467 ('importance', '"high value"')]) 468 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') 469 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') 470 471 def test_del_param(self): 472 eq = self.assertEqual 473 msg = self._msgobj('msg_05.txt') 474 eq(msg.get_params(), 475 [('multipart/report', ''), ('report-type', 'delivery-status'), 476 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 477 old_val = msg.get_param("report-type") 478 msg.del_param("report-type") 479 eq(msg.get_params(), 480 [('multipart/report', ''), 481 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 482 msg.set_param("report-type", old_val) 483 eq(msg.get_params(), 484 [('multipart/report', ''), 485 ('boundary', 'D1690A7AC1.996856090/mail.example.com'), 486 ('report-type', old_val)]) 487 488 def test_del_param_on_other_header(self): 489 msg = Message() 490 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') 491 msg.del_param('filename', 'content-disposition') 492 self.assertEqual(msg['content-disposition'], 'attachment') 493 494 def test_del_param_on_nonexistent_header(self): 495 msg = Message() 496 # Deleting param on empty msg should not raise exception. 497 msg.del_param('filename', 'content-disposition') 498 499 def test_del_nonexistent_param(self): 500 msg = Message() 501 msg.add_header('Content-Type', 'text/plain', charset='utf-8') 502 existing_header = msg['Content-Type'] 503 msg.del_param('foobar', header='Content-Type') 504 self.assertEqual(msg['Content-Type'], existing_header) 505 506 def test_set_type(self): 507 eq = self.assertEqual 508 msg = Message() 509 self.assertRaises(ValueError, msg.set_type, 'text') 510 msg.set_type('text/plain') 511 eq(msg['content-type'], 'text/plain') 512 msg.set_param('charset', 'us-ascii') 513 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 514 msg.set_type('text/html') 515 eq(msg['content-type'], 'text/html; charset="us-ascii"') 516 517 def test_set_type_on_other_header(self): 518 msg = Message() 519 msg['X-Content-Type'] = 'text/plain' 520 msg.set_type('application/octet-stream', 'X-Content-Type') 521 self.assertEqual(msg['x-content-type'], 'application/octet-stream') 522 523 def test_get_content_type_missing(self): 524 msg = Message() 525 self.assertEqual(msg.get_content_type(), 'text/plain') 526 527 def test_get_content_type_missing_with_default_type(self): 528 msg = Message() 529 msg.set_default_type('message/rfc822') 530 self.assertEqual(msg.get_content_type(), 'message/rfc822') 531 532 def test_get_content_type_from_message_implicit(self): 533 msg = self._msgobj('msg_30.txt') 534 self.assertEqual(msg.get_payload(0).get_content_type(), 535 'message/rfc822') 536 537 def test_get_content_type_from_message_explicit(self): 538 msg = self._msgobj('msg_28.txt') 539 self.assertEqual(msg.get_payload(0).get_content_type(), 540 'message/rfc822') 541 542 def test_get_content_type_from_message_text_plain_implicit(self): 543 msg = self._msgobj('msg_03.txt') 544 self.assertEqual(msg.get_content_type(), 'text/plain') 545 546 def test_get_content_type_from_message_text_plain_explicit(self): 547 msg = self._msgobj('msg_01.txt') 548 self.assertEqual(msg.get_content_type(), 'text/plain') 549 550 def test_get_content_maintype_missing(self): 551 msg = Message() 552 self.assertEqual(msg.get_content_maintype(), 'text') 553 554 def test_get_content_maintype_missing_with_default_type(self): 555 msg = Message() 556 msg.set_default_type('message/rfc822') 557 self.assertEqual(msg.get_content_maintype(), 'message') 558 559 def test_get_content_maintype_from_message_implicit(self): 560 msg = self._msgobj('msg_30.txt') 561 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 562 563 def test_get_content_maintype_from_message_explicit(self): 564 msg = self._msgobj('msg_28.txt') 565 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 566 567 def test_get_content_maintype_from_message_text_plain_implicit(self): 568 msg = self._msgobj('msg_03.txt') 569 self.assertEqual(msg.get_content_maintype(), 'text') 570 571 def test_get_content_maintype_from_message_text_plain_explicit(self): 572 msg = self._msgobj('msg_01.txt') 573 self.assertEqual(msg.get_content_maintype(), 'text') 574 575 def test_get_content_subtype_missing(self): 576 msg = Message() 577 self.assertEqual(msg.get_content_subtype(), 'plain') 578 579 def test_get_content_subtype_missing_with_default_type(self): 580 msg = Message() 581 msg.set_default_type('message/rfc822') 582 self.assertEqual(msg.get_content_subtype(), 'rfc822') 583 584 def test_get_content_subtype_from_message_implicit(self): 585 msg = self._msgobj('msg_30.txt') 586 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 587 588 def test_get_content_subtype_from_message_explicit(self): 589 msg = self._msgobj('msg_28.txt') 590 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 591 592 def test_get_content_subtype_from_message_text_plain_implicit(self): 593 msg = self._msgobj('msg_03.txt') 594 self.assertEqual(msg.get_content_subtype(), 'plain') 595 596 def test_get_content_subtype_from_message_text_plain_explicit(self): 597 msg = self._msgobj('msg_01.txt') 598 self.assertEqual(msg.get_content_subtype(), 'plain') 599 600 def test_get_content_maintype_error(self): 601 msg = Message() 602 msg['Content-Type'] = 'no-slash-in-this-string' 603 self.assertEqual(msg.get_content_maintype(), 'text') 604 605 def test_get_content_subtype_error(self): 606 msg = Message() 607 msg['Content-Type'] = 'no-slash-in-this-string' 608 self.assertEqual(msg.get_content_subtype(), 'plain') 609 610 def test_replace_header(self): 611 eq = self.assertEqual 612 msg = Message() 613 msg.add_header('First', 'One') 614 msg.add_header('Second', 'Two') 615 msg.add_header('Third', 'Three') 616 eq(msg.keys(), ['First', 'Second', 'Third']) 617 eq(msg.values(), ['One', 'Two', 'Three']) 618 msg.replace_header('Second', 'Twenty') 619 eq(msg.keys(), ['First', 'Second', 'Third']) 620 eq(msg.values(), ['One', 'Twenty', 'Three']) 621 msg.add_header('First', 'Eleven') 622 msg.replace_header('First', 'One Hundred') 623 eq(msg.keys(), ['First', 'Second', 'Third', 'First']) 624 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) 625 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') 626 627 def test_get_content_disposition(self): 628 msg = Message() 629 self.assertIsNone(msg.get_content_disposition()) 630 msg.add_header('Content-Disposition', 'attachment', 631 filename='random.avi') 632 self.assertEqual(msg.get_content_disposition(), 'attachment') 633 msg.replace_header('Content-Disposition', 'inline') 634 self.assertEqual(msg.get_content_disposition(), 'inline') 635 msg.replace_header('Content-Disposition', 'InlinE') 636 self.assertEqual(msg.get_content_disposition(), 'inline') 637 638 # test_defect_handling:test_invalid_chars_in_base64_payload 639 def test_broken_base64_payload(self): 640 x = 'AwDp0P7//y6LwKEAcPa/6Q=9' 641 msg = Message() 642 msg['content-type'] = 'audio/x-midi' 643 msg['content-transfer-encoding'] = 'base64' 644 msg.set_payload(x) 645 self.assertEqual(msg.get_payload(decode=True), 646 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0' 647 b'\xa1\x00p\xf6\xbf\xe9\x0f')) 648 self.assertIsInstance(msg.defects[0], 649 errors.InvalidBase64CharactersDefect) 650 651 def test_broken_unicode_payload(self): 652 # This test improves coverage but is not a compliance test. 653 # The behavior in this situation is currently undefined by the API. 654 x = 'this is a br\xf6ken thing to do' 655 msg = Message() 656 msg['content-type'] = 'text/plain' 657 msg['content-transfer-encoding'] = '8bit' 658 msg.set_payload(x) 659 self.assertEqual(msg.get_payload(decode=True), 660 bytes(x, 'raw-unicode-escape')) 661 662 def test_questionable_bytes_payload(self): 663 # This test improves coverage but is not a compliance test, 664 # since it involves poking inside the black box. 665 x = 'this is a quéstionable thing to do'.encode('utf-8') 666 msg = Message() 667 msg['content-type'] = 'text/plain; charset="utf-8"' 668 msg['content-transfer-encoding'] = '8bit' 669 msg._payload = x 670 self.assertEqual(msg.get_payload(decode=True), x) 671 672 # Issue 1078919 673 def test_ascii_add_header(self): 674 msg = Message() 675 msg.add_header('Content-Disposition', 'attachment', 676 filename='bud.gif') 677 self.assertEqual('attachment; filename="bud.gif"', 678 msg['Content-Disposition']) 679 680 def test_noascii_add_header(self): 681 msg = Message() 682 msg.add_header('Content-Disposition', 'attachment', 683 filename="Fußballer.ppt") 684 self.assertEqual( 685 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt', 686 msg['Content-Disposition']) 687 688 def test_nonascii_add_header_via_triple(self): 689 msg = Message() 690 msg.add_header('Content-Disposition', 'attachment', 691 filename=('iso-8859-1', '', 'Fußballer.ppt')) 692 self.assertEqual( 693 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt', 694 msg['Content-Disposition']) 695 696 def test_ascii_add_header_with_tspecial(self): 697 msg = Message() 698 msg.add_header('Content-Disposition', 'attachment', 699 filename="windows [filename].ppt") 700 self.assertEqual( 701 'attachment; filename="windows [filename].ppt"', 702 msg['Content-Disposition']) 703 704 def test_nonascii_add_header_with_tspecial(self): 705 msg = Message() 706 msg.add_header('Content-Disposition', 'attachment', 707 filename="Fußballer [filename].ppt") 708 self.assertEqual( 709 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", 710 msg['Content-Disposition']) 711 712 def test_binary_quopri_payload(self): 713 for charset in ('latin-1', 'ascii'): 714 msg = Message() 715 msg['content-type'] = 'text/plain; charset=%s' % charset 716 msg['content-transfer-encoding'] = 'quoted-printable' 717 msg.set_payload(b'foo=e6=96=87bar') 718 self.assertEqual( 719 msg.get_payload(decode=True), 720 b'foo\xe6\x96\x87bar', 721 'get_payload returns wrong result with charset %s.' % charset) 722 723 def test_binary_base64_payload(self): 724 for charset in ('latin-1', 'ascii'): 725 msg = Message() 726 msg['content-type'] = 'text/plain; charset=%s' % charset 727 msg['content-transfer-encoding'] = 'base64' 728 msg.set_payload(b'Zm9v5paHYmFy') 729 self.assertEqual( 730 msg.get_payload(decode=True), 731 b'foo\xe6\x96\x87bar', 732 'get_payload returns wrong result with charset %s.' % charset) 733 734 def test_binary_uuencode_payload(self): 735 for charset in ('latin-1', 'ascii'): 736 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 737 msg = Message() 738 msg['content-type'] = 'text/plain; charset=%s' % charset 739 msg['content-transfer-encoding'] = encoding 740 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") 741 self.assertEqual( 742 msg.get_payload(decode=True), 743 b'foo\xe6\x96\x87bar', 744 str(('get_payload returns wrong result ', 745 'with charset {0} and encoding {1}.')).\ 746 format(charset, encoding)) 747 748 def test_add_header_with_name_only_param(self): 749 msg = Message() 750 msg.add_header('Content-Disposition', 'inline', foo_bar=None) 751 self.assertEqual("inline; foo-bar", msg['Content-Disposition']) 752 753 def test_add_header_with_no_value(self): 754 msg = Message() 755 msg.add_header('X-Status', None) 756 self.assertEqual('', msg['X-Status']) 757 758 # Issue 5871: reject an attempt to embed a header inside a header value 759 # (header injection attack). 760 def test_embedded_header_via_Header_rejected(self): 761 msg = Message() 762 msg['Dummy'] = Header('dummy\nX-Injected-Header: test') 763 self.assertRaises(errors.HeaderParseError, msg.as_string) 764 765 def test_embedded_header_via_string_rejected(self): 766 msg = Message() 767 msg['Dummy'] = 'dummy\nX-Injected-Header: test' 768 self.assertRaises(errors.HeaderParseError, msg.as_string) 769 770 def test_unicode_header_defaults_to_utf8_encoding(self): 771 # Issue 14291 772 m = MIMEText('abc\n') 773 m['Subject'] = 'É test' 774 self.assertEqual(str(m),textwrap.dedent("""\ 775 Content-Type: text/plain; charset="us-ascii" 776 MIME-Version: 1.0 777 Content-Transfer-Encoding: 7bit 778 Subject: =?utf-8?q?=C3=89_test?= 779 780 abc 781 """)) 782 783 def test_unicode_body_defaults_to_utf8_encoding(self): 784 # Issue 14291 785 m = MIMEText('É testabc\n') 786 self.assertEqual(str(m),textwrap.dedent("""\ 787 Content-Type: text/plain; charset="utf-8" 788 MIME-Version: 1.0 789 Content-Transfer-Encoding: base64 790 791 w4kgdGVzdGFiYwo= 792 """)) 793 794 795# Test the email.encoders module 796class TestEncoders(unittest.TestCase): 797 798 def test_EncodersEncode_base64(self): 799 with openfile('PyBanner048.gif', 'rb') as fp: 800 bindata = fp.read() 801 mimed = email.mime.image.MIMEImage(bindata) 802 base64ed = mimed.get_payload() 803 # the transfer-encoded body lines should all be <=76 characters 804 lines = base64ed.split('\n') 805 self.assertLessEqual(max([ len(x) for x in lines ]), 76) 806 807 def test_encode_empty_payload(self): 808 eq = self.assertEqual 809 msg = Message() 810 msg.set_charset('us-ascii') 811 eq(msg['content-transfer-encoding'], '7bit') 812 813 def test_default_cte(self): 814 eq = self.assertEqual 815 # 7bit data and the default us-ascii _charset 816 msg = MIMEText('hello world') 817 eq(msg['content-transfer-encoding'], '7bit') 818 # Similar, but with 8bit data 819 msg = MIMEText('hello \xf8 world') 820 eq(msg['content-transfer-encoding'], 'base64') 821 # And now with a different charset 822 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') 823 eq(msg['content-transfer-encoding'], 'quoted-printable') 824 825 def test_encode7or8bit(self): 826 # Make sure a charset whose input character set is 8bit but 827 # whose output character set is 7bit gets a transfer-encoding 828 # of 7bit. 829 eq = self.assertEqual 830 msg = MIMEText('文\n', _charset='euc-jp') 831 eq(msg['content-transfer-encoding'], '7bit') 832 eq(msg.as_string(), textwrap.dedent("""\ 833 MIME-Version: 1.0 834 Content-Type: text/plain; charset="iso-2022-jp" 835 Content-Transfer-Encoding: 7bit 836 837 \x1b$BJ8\x1b(B 838 """)) 839 840 def test_qp_encode_latin1(self): 841 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') 842 self.assertEqual(str(msg), textwrap.dedent("""\ 843 MIME-Version: 1.0 844 Content-Type: text/text; charset="iso-8859-1" 845 Content-Transfer-Encoding: quoted-printable 846 847 =E1=F6 848 """)) 849 850 def test_qp_encode_non_latin1(self): 851 # Issue 16948 852 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') 853 self.assertEqual(str(msg), textwrap.dedent("""\ 854 MIME-Version: 1.0 855 Content-Type: text/text; charset="iso-8859-2" 856 Content-Transfer-Encoding: quoted-printable 857 858 =BF 859 """)) 860 861 862# Test long header wrapping 863class TestLongHeaders(TestEmailBase): 864 865 maxDiff = None 866 867 def test_split_long_continuation(self): 868 eq = self.ndiffAssertEqual 869 msg = email.message_from_string("""\ 870Subject: bug demonstration 871\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 872\tmore text 873 874test 875""") 876 sfp = StringIO() 877 g = Generator(sfp) 878 g.flatten(msg) 879 eq(sfp.getvalue(), """\ 880Subject: bug demonstration 881\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 882\tmore text 883 884test 885""") 886 887 def test_another_long_almost_unsplittable_header(self): 888 eq = self.ndiffAssertEqual 889 hstr = """\ 890bug demonstration 891\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 892\tmore text""" 893 h = Header(hstr, continuation_ws='\t') 894 eq(h.encode(), """\ 895bug demonstration 896\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 897\tmore text""") 898 h = Header(hstr.replace('\t', ' ')) 899 eq(h.encode(), """\ 900bug demonstration 901 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 902 more text""") 903 904 def test_long_nonstring(self): 905 eq = self.ndiffAssertEqual 906 g = Charset("iso-8859-1") 907 cz = Charset("iso-8859-2") 908 utf8 = Charset("utf-8") 909 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband ' 910 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 911 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 912 b'bef\xf6rdert. ') 913 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 914 b'd\xf9vtipu.. ') 915 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 916 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 917 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 918 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 919 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 920 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 921 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 922 '\u3044\u307e\u3059\u3002') 923 h = Header(g_head, g, header_name='Subject') 924 h.append(cz_head, cz) 925 h.append(utf8_head, utf8) 926 msg = Message() 927 msg['Subject'] = h 928 sfp = StringIO() 929 g = Generator(sfp) 930 g.flatten(msg) 931 eq(sfp.getvalue(), """\ 932Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?= 933 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?= 934 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?= 935 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?= 936 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 937 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= 938 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?= 939 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?= 940 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?= 941 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?= 942 =?utf-8?b?44CC?= 943 944""") 945 eq(h.encode(maxlinelen=76), """\ 946=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?= 947 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?= 948 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?= 949 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?= 950 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= 951 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?= 952 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?= 953 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?= 954 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?= 955 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?= 956 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") 957 958 def test_long_header_encode(self): 959 eq = self.ndiffAssertEqual 960 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 961 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 962 header_name='X-Foobar-Spoink-Defrobnit') 963 eq(h.encode(), '''\ 964wasnipoop; giraffes="very-long-necked-animals"; 965 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 966 967 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self): 968 eq = self.ndiffAssertEqual 969 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 970 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 971 header_name='X-Foobar-Spoink-Defrobnit', 972 continuation_ws='\t') 973 eq(h.encode(), '''\ 974wasnipoop; giraffes="very-long-necked-animals"; 975 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 976 977 def test_long_header_encode_with_tab_continuation(self): 978 eq = self.ndiffAssertEqual 979 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t' 980 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 981 header_name='X-Foobar-Spoink-Defrobnit', 982 continuation_ws='\t') 983 eq(h.encode(), '''\ 984wasnipoop; giraffes="very-long-necked-animals"; 985\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 986 987 def test_header_encode_with_different_output_charset(self): 988 h = Header('文', 'euc-jp') 989 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=") 990 991 def test_long_header_encode_with_different_output_charset(self): 992 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4' 993 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4' 994 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4' 995 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp') 996 res = """\ 997=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?= 998 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=""" 999 self.assertEqual(h.encode(), res) 1000 1001 def test_header_splitter(self): 1002 eq = self.ndiffAssertEqual 1003 msg = MIMEText('') 1004 # It'd be great if we could use add_header() here, but that doesn't 1005 # guarantee an order of the parameters. 1006 msg['X-Foobar-Spoink-Defrobnit'] = ( 1007 'wasnipoop; giraffes="very-long-necked-animals"; ' 1008 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') 1009 sfp = StringIO() 1010 g = Generator(sfp) 1011 g.flatten(msg) 1012 eq(sfp.getvalue(), '''\ 1013Content-Type: text/plain; charset="us-ascii" 1014MIME-Version: 1.0 1015Content-Transfer-Encoding: 7bit 1016X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; 1017 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" 1018 1019''') 1020 1021 def test_no_semis_header_splitter(self): 1022 eq = self.ndiffAssertEqual 1023 msg = Message() 1024 msg['From'] = 'test@dom.ain' 1025 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10)) 1026 msg.set_payload('Test') 1027 sfp = StringIO() 1028 g = Generator(sfp) 1029 g.flatten(msg) 1030 eq(sfp.getvalue(), """\ 1031From: test@dom.ain 1032References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> 1033 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> 1034 1035Test""") 1036 1037 def test_last_split_chunk_does_not_fit(self): 1038 eq = self.ndiffAssertEqual 1039 h = Header('Subject: the first part of this is short, but_the_second' 1040 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1041 '_all_by_itself') 1042 eq(h.encode(), """\ 1043Subject: the first part of this is short, 1044 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1045 1046 def test_splittable_leading_char_followed_by_overlong_unsplitable(self): 1047 eq = self.ndiffAssertEqual 1048 h = Header(', but_the_second' 1049 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1050 '_all_by_itself') 1051 eq(h.encode(), """\ 1052, 1053 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1054 1055 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self): 1056 eq = self.ndiffAssertEqual 1057 h = Header(', , but_the_second' 1058 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1059 '_all_by_itself') 1060 eq(h.encode(), """\ 1061, , 1062 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1063 1064 def test_trailing_splitable_on_overlong_unsplitable(self): 1065 eq = self.ndiffAssertEqual 1066 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1067 'be_on_a_line_all_by_itself;') 1068 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" 1069 "be_on_a_line_all_by_itself;") 1070 1071 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self): 1072 eq = self.ndiffAssertEqual 1073 h = Header('; ' 1074 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1075 'be_on_a_line_all_by_itself; ') 1076 eq(h.encode(), """\ 1077; 1078 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1079 1080 def test_long_header_with_multiple_sequential_split_chars(self): 1081 eq = self.ndiffAssertEqual 1082 h = Header('This is a long line that has two whitespaces in a row. ' 1083 'This used to cause truncation of the header when folded') 1084 eq(h.encode(), """\ 1085This is a long line that has two whitespaces in a row. This used to cause 1086 truncation of the header when folded""") 1087 1088 def test_splitter_split_on_punctuation_only_if_fws_with_header(self): 1089 eq = self.ndiffAssertEqual 1090 h = Header('thisverylongheaderhas;semicolons;and,commas,but' 1091 'they;arenotlegal;fold,points') 1092 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;" 1093 "arenotlegal;fold,points") 1094 1095 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): 1096 eq = self.ndiffAssertEqual 1097 h = Header('this is a test where we need to have more than one line ' 1098 'before; our final line that is just too big to fit;; ' 1099 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1100 'be_on_a_line_all_by_itself;') 1101 eq(h.encode(), """\ 1102this is a test where we need to have more than one line before; 1103 our final line that is just too big to fit;; 1104 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") 1105 1106 def test_overlong_last_part_followed_by_split_point(self): 1107 eq = self.ndiffAssertEqual 1108 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1109 'be_on_a_line_all_by_itself ') 1110 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_" 1111 "should_be_on_a_line_all_by_itself ") 1112 1113 def test_multiline_with_overlong_parts_separated_by_two_split_points(self): 1114 eq = self.ndiffAssertEqual 1115 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_' 1116 'before_our_final_line_; ; ' 1117 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1118 'be_on_a_line_all_by_itself; ') 1119 eq(h.encode(), """\ 1120this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_; 1121 ; 1122 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1123 1124 def test_multiline_with_overlong_last_part_followed_by_split_point(self): 1125 eq = self.ndiffAssertEqual 1126 h = Header('this is a test where we need to have more than one line ' 1127 'before our final line; ; ' 1128 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1129 'be_on_a_line_all_by_itself; ') 1130 eq(h.encode(), """\ 1131this is a test where we need to have more than one line before our final line; 1132 ; 1133 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1134 1135 def test_long_header_with_whitespace_runs(self): 1136 eq = self.ndiffAssertEqual 1137 msg = Message() 1138 msg['From'] = 'test@dom.ain' 1139 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10) 1140 msg.set_payload('Test') 1141 sfp = StringIO() 1142 g = Generator(sfp) 1143 g.flatten(msg) 1144 eq(sfp.getvalue(), """\ 1145From: test@dom.ain 1146References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1147 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1148 <foo@dom.ain> <foo@dom.ain>\x20\x20 1149 1150Test""") 1151 1152 def test_long_run_with_semi_header_splitter(self): 1153 eq = self.ndiffAssertEqual 1154 msg = Message() 1155 msg['From'] = 'test@dom.ain' 1156 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc' 1157 msg.set_payload('Test') 1158 sfp = StringIO() 1159 g = Generator(sfp) 1160 g.flatten(msg) 1161 eq(sfp.getvalue(), """\ 1162From: test@dom.ain 1163References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1164 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1165 <foo@dom.ain>; abc 1166 1167Test""") 1168 1169 def test_splitter_split_on_punctuation_only_if_fws(self): 1170 eq = self.ndiffAssertEqual 1171 msg = Message() 1172 msg['From'] = 'test@dom.ain' 1173 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' 1174 'they;arenotlegal;fold,points') 1175 msg.set_payload('Test') 1176 sfp = StringIO() 1177 g = Generator(sfp) 1178 g.flatten(msg) 1179 # XXX the space after the header should not be there. 1180 eq(sfp.getvalue(), """\ 1181From: test@dom.ain 1182References:\x20 1183 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points 1184 1185Test""") 1186 1187 def test_no_split_long_header(self): 1188 eq = self.ndiffAssertEqual 1189 hstr = 'References: ' + 'x' * 80 1190 h = Header(hstr) 1191 # These come on two lines because Headers are really field value 1192 # classes and don't really know about their field names. 1193 eq(h.encode(), """\ 1194References: 1195 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") 1196 h = Header('x' * 80) 1197 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') 1198 1199 def test_splitting_multiple_long_lines(self): 1200 eq = self.ndiffAssertEqual 1201 hstr = """\ 1202from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1203\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1204\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1205""" 1206 h = Header(hstr, continuation_ws='\t') 1207 eq(h.encode(), """\ 1208from babylon.socal-raves.org (localhost [127.0.0.1]); 1209 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1210 for <mailman-admin@babylon.socal-raves.org>; 1211 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1212\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1213 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1214 for <mailman-admin@babylon.socal-raves.org>; 1215 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1216\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1217 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1218 for <mailman-admin@babylon.socal-raves.org>; 1219 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""") 1220 1221 def test_splitting_first_line_only_is_long(self): 1222 eq = self.ndiffAssertEqual 1223 hstr = """\ 1224from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) 1225\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1226\tid 17k4h5-00034i-00 1227\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" 1228 h = Header(hstr, maxlinelen=78, header_name='Received', 1229 continuation_ws='\t') 1230 eq(h.encode(), """\ 1231from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] 1232 helo=cthulhu.gerg.ca) 1233\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1234\tid 17k4h5-00034i-00 1235\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") 1236 1237 def test_long_8bit_header(self): 1238 eq = self.ndiffAssertEqual 1239 msg = Message() 1240 h = Header('Britische Regierung gibt', 'iso-8859-1', 1241 header_name='Subject') 1242 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') 1243 eq(h.encode(maxlinelen=76), """\ 1244=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1245 =?iso-8859-1?q?hore-Windkraftprojekte?=""") 1246 msg['Subject'] = h 1247 eq(msg.as_string(maxheaderlen=76), """\ 1248Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1249 =?iso-8859-1?q?hore-Windkraftprojekte?= 1250 1251""") 1252 eq(msg.as_string(maxheaderlen=0), """\ 1253Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= 1254 1255""") 1256 1257 def test_long_8bit_header_no_charset(self): 1258 eq = self.ndiffAssertEqual 1259 msg = Message() 1260 header_string = ('Britische Regierung gibt gr\xfcnes Licht ' 1261 'f\xfcr Offshore-Windkraftprojekte ' 1262 '<a-very-long-address@example.com>') 1263 msg['Reply-To'] = header_string 1264 eq(msg.as_string(maxheaderlen=78), """\ 1265Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1266 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1267 1268""") 1269 msg = Message() 1270 msg['Reply-To'] = Header(header_string, 1271 header_name='Reply-To') 1272 eq(msg.as_string(maxheaderlen=78), """\ 1273Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1274 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1275 1276""") 1277 1278 def test_long_to_header(self): 1279 eq = self.ndiffAssertEqual 1280 to = ('"Someone Test #A" <someone@eecs.umich.edu>,' 1281 '<someone@eecs.umich.edu>, ' 1282 '"Someone Test #B" <someone@umich.edu>, ' 1283 '"Someone Test #C" <someone@eecs.umich.edu>, ' 1284 '"Someone Test #D" <someone@eecs.umich.edu>') 1285 msg = Message() 1286 msg['To'] = to 1287 eq(msg.as_string(maxheaderlen=78), '''\ 1288To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>, 1289 "Someone Test #B" <someone@umich.edu>, 1290 "Someone Test #C" <someone@eecs.umich.edu>, 1291 "Someone Test #D" <someone@eecs.umich.edu> 1292 1293''') 1294 1295 def test_long_line_after_append(self): 1296 eq = self.ndiffAssertEqual 1297 s = 'This is an example of string which has almost the limit of header length.' 1298 h = Header(s) 1299 h.append('Add another line.') 1300 eq(h.encode(maxlinelen=76), """\ 1301This is an example of string which has almost the limit of header length. 1302 Add another line.""") 1303 1304 def test_shorter_line_with_append(self): 1305 eq = self.ndiffAssertEqual 1306 s = 'This is a shorter line.' 1307 h = Header(s) 1308 h.append('Add another sentence. (Surprise?)') 1309 eq(h.encode(), 1310 'This is a shorter line. Add another sentence. (Surprise?)') 1311 1312 def test_long_field_name(self): 1313 eq = self.ndiffAssertEqual 1314 fn = 'X-Very-Very-Very-Long-Header-Name' 1315 gs = ('Die Mieter treten hier ein werden mit einem Foerderband ' 1316 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 1317 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 1318 'bef\xf6rdert. ') 1319 h = Header(gs, 'iso-8859-1', header_name=fn) 1320 # BAW: this seems broken because the first line is too long 1321 eq(h.encode(maxlinelen=76), """\ 1322=?iso-8859-1?q?Die_Mieter_treten_hier_e?= 1323 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?= 1324 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?= 1325 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") 1326 1327 def test_long_received_header(self): 1328 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) ' 1329 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; ' 1330 'Wed, 05 Mar 2003 18:10:18 -0700') 1331 msg = Message() 1332 msg['Received-1'] = Header(h, continuation_ws='\t') 1333 msg['Received-2'] = h 1334 # This should be splitting on spaces not semicolons. 1335 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1336Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1337 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1338 Wed, 05 Mar 2003 18:10:18 -0700 1339Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1340 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1341 Wed, 05 Mar 2003 18:10:18 -0700 1342 1343""") 1344 1345 def test_string_headerinst_eq(self): 1346 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.' 1347 'tu-muenchen.de> (David Bremner\'s message of ' 1348 '"Thu, 6 Mar 2003 13:58:21 +0100")') 1349 msg = Message() 1350 msg['Received-1'] = Header(h, header_name='Received-1', 1351 continuation_ws='\t') 1352 msg['Received-2'] = h 1353 # XXX The space after the ':' should not be there. 1354 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1355Received-1:\x20 1356 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1357 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1358Received-2:\x20 1359 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1360 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1361 1362""") 1363 1364 def test_long_unbreakable_lines_with_continuation(self): 1365 eq = self.ndiffAssertEqual 1366 msg = Message() 1367 t = """\ 1368iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1369 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" 1370 msg['Face-1'] = t 1371 msg['Face-2'] = Header(t, header_name='Face-2') 1372 msg['Face-3'] = ' ' + t 1373 # XXX This splitting is all wrong. It the first value line should be 1374 # snug against the field name or the space after the header not there. 1375 eq(msg.as_string(maxheaderlen=78), """\ 1376Face-1:\x20 1377 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1378 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1379Face-2:\x20 1380 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1381 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1382Face-3:\x20 1383 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1384 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1385 1386""") 1387 1388 def test_another_long_multiline_header(self): 1389 eq = self.ndiffAssertEqual 1390 m = ('Received: from siimage.com ' 1391 '([172.25.1.3]) by zima.siliconimage.com with ' 1392 'Microsoft SMTPSVC(5.0.2195.4905); ' 1393 'Wed, 16 Oct 2002 07:41:11 -0700') 1394 msg = email.message_from_string(m) 1395 eq(msg.as_string(maxheaderlen=78), '''\ 1396Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with 1397 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 1398 1399''') 1400 1401 def test_long_lines_with_different_header(self): 1402 eq = self.ndiffAssertEqual 1403 h = ('List-Unsubscribe: ' 1404 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,' 1405 ' <mailto:spamassassin-talk-request@lists.sourceforge.net' 1406 '?subject=unsubscribe>') 1407 msg = Message() 1408 msg['List'] = h 1409 msg['List'] = Header(h, header_name='List') 1410 eq(msg.as_string(maxheaderlen=78), """\ 1411List: List-Unsubscribe: 1412 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1413 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1414List: List-Unsubscribe: 1415 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1416 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1417 1418""") 1419 1420 def test_long_rfc2047_header_with_embedded_fws(self): 1421 h = Header(textwrap.dedent("""\ 1422 We're going to pretend this header is in a non-ascii character set 1423 \tto see if line wrapping with encoded words and embedded 1424 folding white space works"""), 1425 charset='utf-8', 1426 header_name='Test') 1427 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ 1428 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= 1429 =?utf-8?q?cter_set?= 1430 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= 1431 =?utf-8?q?_folding_white_space_works?=""")+'\n') 1432 1433 1434 1435# Test mangling of "From " lines in the body of a message 1436class TestFromMangling(unittest.TestCase): 1437 def setUp(self): 1438 self.msg = Message() 1439 self.msg['From'] = 'aaa@bbb.org' 1440 self.msg.set_payload("""\ 1441From the desk of A.A.A.: 1442Blah blah blah 1443""") 1444 1445 def test_mangled_from(self): 1446 s = StringIO() 1447 g = Generator(s, mangle_from_=True) 1448 g.flatten(self.msg) 1449 self.assertEqual(s.getvalue(), """\ 1450From: aaa@bbb.org 1451 1452>From the desk of A.A.A.: 1453Blah blah blah 1454""") 1455 1456 def test_dont_mangle_from(self): 1457 s = StringIO() 1458 g = Generator(s, mangle_from_=False) 1459 g.flatten(self.msg) 1460 self.assertEqual(s.getvalue(), """\ 1461From: aaa@bbb.org 1462 1463From the desk of A.A.A.: 1464Blah blah blah 1465""") 1466 1467 def test_mangle_from_in_preamble_and_epilog(self): 1468 s = StringIO() 1469 g = Generator(s, mangle_from_=True) 1470 msg = email.message_from_string(textwrap.dedent("""\ 1471 From: foo@bar.com 1472 Mime-Version: 1.0 1473 Content-Type: multipart/mixed; boundary=XXX 1474 1475 From somewhere unknown 1476 1477 --XXX 1478 Content-Type: text/plain 1479 1480 foo 1481 1482 --XXX-- 1483 1484 From somewhere unknowable 1485 """)) 1486 g.flatten(msg) 1487 self.assertEqual(len([1 for x in s.getvalue().split('\n') 1488 if x.startswith('>From ')]), 2) 1489 1490 def test_mangled_from_with_bad_bytes(self): 1491 source = textwrap.dedent("""\ 1492 Content-Type: text/plain; charset="utf-8" 1493 MIME-Version: 1.0 1494 Content-Transfer-Encoding: 8bit 1495 From: aaa@bbb.org 1496 1497 """).encode('utf-8') 1498 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') 1499 b = BytesIO() 1500 g = BytesGenerator(b, mangle_from_=True) 1501 g.flatten(msg) 1502 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n') 1503 1504 def test_mutltipart_with_bad_bytes_in_cte(self): 1505 # bpo30835 1506 source = textwrap.dedent("""\ 1507 From: aperson@example.com 1508 Content-Type: multipart/mixed; boundary="1" 1509 Content-Transfer-Encoding: \xc8 1510 """).encode('utf-8') 1511 msg = email.message_from_bytes(source) 1512 1513 1514# Test the basic MIMEAudio class 1515class TestMIMEAudio(unittest.TestCase): 1516 def setUp(self): 1517 with openfile('audiotest.au', 'rb') as fp: 1518 self._audiodata = fp.read() 1519 self._au = MIMEAudio(self._audiodata) 1520 1521 def test_guess_minor_type(self): 1522 self.assertEqual(self._au.get_content_type(), 'audio/basic') 1523 1524 def test_encoding(self): 1525 payload = self._au.get_payload() 1526 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1527 self._audiodata) 1528 1529 def test_checkSetMinor(self): 1530 au = MIMEAudio(self._audiodata, 'fish') 1531 self.assertEqual(au.get_content_type(), 'audio/fish') 1532 1533 def test_add_header(self): 1534 eq = self.assertEqual 1535 self._au.add_header('Content-Disposition', 'attachment', 1536 filename='audiotest.au') 1537 eq(self._au['content-disposition'], 1538 'attachment; filename="audiotest.au"') 1539 eq(self._au.get_params(header='content-disposition'), 1540 [('attachment', ''), ('filename', 'audiotest.au')]) 1541 eq(self._au.get_param('filename', header='content-disposition'), 1542 'audiotest.au') 1543 missing = [] 1544 eq(self._au.get_param('attachment', header='content-disposition'), '') 1545 self.assertIs(self._au.get_param('foo', failobj=missing, 1546 header='content-disposition'), missing) 1547 # Try some missing stuff 1548 self.assertIs(self._au.get_param('foobar', missing), missing) 1549 self.assertIs(self._au.get_param('attachment', missing, 1550 header='foobar'), missing) 1551 1552 1553 1554# Test the basic MIMEImage class 1555class TestMIMEImage(unittest.TestCase): 1556 def setUp(self): 1557 with openfile('PyBanner048.gif', 'rb') as fp: 1558 self._imgdata = fp.read() 1559 self._im = MIMEImage(self._imgdata) 1560 1561 def test_guess_minor_type(self): 1562 self.assertEqual(self._im.get_content_type(), 'image/gif') 1563 1564 def test_encoding(self): 1565 payload = self._im.get_payload() 1566 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1567 self._imgdata) 1568 1569 def test_checkSetMinor(self): 1570 im = MIMEImage(self._imgdata, 'fish') 1571 self.assertEqual(im.get_content_type(), 'image/fish') 1572 1573 def test_add_header(self): 1574 eq = self.assertEqual 1575 self._im.add_header('Content-Disposition', 'attachment', 1576 filename='dingusfish.gif') 1577 eq(self._im['content-disposition'], 1578 'attachment; filename="dingusfish.gif"') 1579 eq(self._im.get_params(header='content-disposition'), 1580 [('attachment', ''), ('filename', 'dingusfish.gif')]) 1581 eq(self._im.get_param('filename', header='content-disposition'), 1582 'dingusfish.gif') 1583 missing = [] 1584 eq(self._im.get_param('attachment', header='content-disposition'), '') 1585 self.assertIs(self._im.get_param('foo', failobj=missing, 1586 header='content-disposition'), missing) 1587 # Try some missing stuff 1588 self.assertIs(self._im.get_param('foobar', missing), missing) 1589 self.assertIs(self._im.get_param('attachment', missing, 1590 header='foobar'), missing) 1591 1592 1593 1594# Test the basic MIMEApplication class 1595class TestMIMEApplication(unittest.TestCase): 1596 def test_headers(self): 1597 eq = self.assertEqual 1598 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff') 1599 eq(msg.get_content_type(), 'application/octet-stream') 1600 eq(msg['content-transfer-encoding'], 'base64') 1601 1602 def test_body(self): 1603 eq = self.assertEqual 1604 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1605 msg = MIMEApplication(bytesdata) 1606 # whitespace in the cte encoded block is RFC-irrelevant. 1607 eq(msg.get_payload().strip(), '+vv8/f7/') 1608 eq(msg.get_payload(decode=True), bytesdata) 1609 1610 def test_binary_body_with_encode_7or8bit(self): 1611 # Issue 17171. 1612 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1613 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit) 1614 # Treated as a string, this will be invalid code points. 1615 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1616 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1617 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit') 1618 s = BytesIO() 1619 g = BytesGenerator(s) 1620 g.flatten(msg) 1621 wireform = s.getvalue() 1622 msg2 = email.message_from_bytes(wireform) 1623 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1624 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1625 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit') 1626 1627 def test_binary_body_with_encode_noop(self): 1628 # Issue 16564: This does not produce an RFC valid message, since to be 1629 # valid it should have a CTE of binary. But the below works in 1630 # Python2, and is documented as working this way. 1631 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1632 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1633 # Treated as a string, this will be invalid code points. 1634 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1635 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1636 s = BytesIO() 1637 g = BytesGenerator(s) 1638 g.flatten(msg) 1639 wireform = s.getvalue() 1640 msg2 = email.message_from_bytes(wireform) 1641 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1642 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1643 1644 def test_binary_body_with_unicode_linend_encode_noop(self): 1645 # Issue 19003: This is a variation on #16564. 1646 bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff' 1647 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1648 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1649 s = BytesIO() 1650 g = BytesGenerator(s) 1651 g.flatten(msg) 1652 wireform = s.getvalue() 1653 msg2 = email.message_from_bytes(wireform) 1654 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1655 1656 def test_binary_body_with_encode_quopri(self): 1657 # Issue 14360. 1658 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff ' 1659 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri) 1660 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1661 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1662 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable') 1663 s = BytesIO() 1664 g = BytesGenerator(s) 1665 g.flatten(msg) 1666 wireform = s.getvalue() 1667 msg2 = email.message_from_bytes(wireform) 1668 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1669 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1670 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable') 1671 1672 def test_binary_body_with_encode_base64(self): 1673 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1674 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64) 1675 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1676 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1677 s = BytesIO() 1678 g = BytesGenerator(s) 1679 g.flatten(msg) 1680 wireform = s.getvalue() 1681 msg2 = email.message_from_bytes(wireform) 1682 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1683 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1684 1685 1686# Test the basic MIMEText class 1687class TestMIMEText(unittest.TestCase): 1688 def setUp(self): 1689 self._msg = MIMEText('hello there') 1690 1691 def test_types(self): 1692 eq = self.assertEqual 1693 eq(self._msg.get_content_type(), 'text/plain') 1694 eq(self._msg.get_param('charset'), 'us-ascii') 1695 missing = [] 1696 self.assertIs(self._msg.get_param('foobar', missing), missing) 1697 self.assertIs(self._msg.get_param('charset', missing, header='foobar'), 1698 missing) 1699 1700 def test_payload(self): 1701 self.assertEqual(self._msg.get_payload(), 'hello there') 1702 self.assertFalse(self._msg.is_multipart()) 1703 1704 def test_charset(self): 1705 eq = self.assertEqual 1706 msg = MIMEText('hello there', _charset='us-ascii') 1707 eq(msg.get_charset().input_charset, 'us-ascii') 1708 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1709 # Also accept a Charset instance 1710 charset = Charset('utf-8') 1711 charset.body_encoding = None 1712 msg = MIMEText('hello there', _charset=charset) 1713 eq(msg.get_charset().input_charset, 'utf-8') 1714 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1715 eq(msg.get_payload(), 'hello there') 1716 1717 def test_7bit_input(self): 1718 eq = self.assertEqual 1719 msg = MIMEText('hello there', _charset='us-ascii') 1720 eq(msg.get_charset().input_charset, 'us-ascii') 1721 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1722 1723 def test_7bit_input_no_charset(self): 1724 eq = self.assertEqual 1725 msg = MIMEText('hello there') 1726 eq(msg.get_charset(), 'us-ascii') 1727 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1728 self.assertIn('hello there', msg.as_string()) 1729 1730 def test_utf8_input(self): 1731 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1732 eq = self.assertEqual 1733 msg = MIMEText(teststr, _charset='utf-8') 1734 eq(msg.get_charset().output_charset, 'utf-8') 1735 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1736 eq(msg.get_payload(decode=True), teststr.encode('utf-8')) 1737 1738 @unittest.skip("can't fix because of backward compat in email5, " 1739 "will fix in email6") 1740 def test_utf8_input_no_charset(self): 1741 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1742 self.assertRaises(UnicodeEncodeError, MIMEText, teststr) 1743 1744 1745 1746# Test complicated multipart/* messages 1747class TestMultipart(TestEmailBase): 1748 def setUp(self): 1749 with openfile('PyBanner048.gif', 'rb') as fp: 1750 data = fp.read() 1751 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') 1752 image = MIMEImage(data, name='dingusfish.gif') 1753 image.add_header('content-disposition', 'attachment', 1754 filename='dingusfish.gif') 1755 intro = MIMEText('''\ 1756Hi there, 1757 1758This is the dingus fish. 1759''') 1760 container.attach(intro) 1761 container.attach(image) 1762 container['From'] = 'Barry <barry@digicool.com>' 1763 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>' 1764 container['Subject'] = 'Here is your dingus fish' 1765 1766 now = 987809702.54848599 1767 timetuple = time.localtime(now) 1768 if timetuple[-1] == 0: 1769 tzsecs = time.timezone 1770 else: 1771 tzsecs = time.altzone 1772 if tzsecs > 0: 1773 sign = '-' 1774 else: 1775 sign = '+' 1776 tzoffset = ' %s%04d' % (sign, tzsecs / 36) 1777 container['Date'] = time.strftime( 1778 '%a, %d %b %Y %H:%M:%S', 1779 time.localtime(now)) + tzoffset 1780 self._msg = container 1781 self._im = image 1782 self._txt = intro 1783 1784 def test_hierarchy(self): 1785 # convenience 1786 eq = self.assertEqual 1787 raises = self.assertRaises 1788 # tests 1789 m = self._msg 1790 self.assertTrue(m.is_multipart()) 1791 eq(m.get_content_type(), 'multipart/mixed') 1792 eq(len(m.get_payload()), 2) 1793 raises(IndexError, m.get_payload, 2) 1794 m0 = m.get_payload(0) 1795 m1 = m.get_payload(1) 1796 self.assertIs(m0, self._txt) 1797 self.assertIs(m1, self._im) 1798 eq(m.get_payload(), [m0, m1]) 1799 self.assertFalse(m0.is_multipart()) 1800 self.assertFalse(m1.is_multipart()) 1801 1802 def test_empty_multipart_idempotent(self): 1803 text = """\ 1804Content-Type: multipart/mixed; boundary="BOUNDARY" 1805MIME-Version: 1.0 1806Subject: A subject 1807To: aperson@dom.ain 1808From: bperson@dom.ain 1809 1810 1811--BOUNDARY 1812 1813 1814--BOUNDARY-- 1815""" 1816 msg = Parser().parsestr(text) 1817 self.ndiffAssertEqual(text, msg.as_string()) 1818 1819 def test_no_parts_in_a_multipart_with_none_epilogue(self): 1820 outer = MIMEBase('multipart', 'mixed') 1821 outer['Subject'] = 'A subject' 1822 outer['To'] = 'aperson@dom.ain' 1823 outer['From'] = 'bperson@dom.ain' 1824 outer.set_boundary('BOUNDARY') 1825 self.ndiffAssertEqual(outer.as_string(), '''\ 1826Content-Type: multipart/mixed; boundary="BOUNDARY" 1827MIME-Version: 1.0 1828Subject: A subject 1829To: aperson@dom.ain 1830From: bperson@dom.ain 1831 1832--BOUNDARY 1833 1834--BOUNDARY-- 1835''') 1836 1837 def test_no_parts_in_a_multipart_with_empty_epilogue(self): 1838 outer = MIMEBase('multipart', 'mixed') 1839 outer['Subject'] = 'A subject' 1840 outer['To'] = 'aperson@dom.ain' 1841 outer['From'] = 'bperson@dom.ain' 1842 outer.preamble = '' 1843 outer.epilogue = '' 1844 outer.set_boundary('BOUNDARY') 1845 self.ndiffAssertEqual(outer.as_string(), '''\ 1846Content-Type: multipart/mixed; boundary="BOUNDARY" 1847MIME-Version: 1.0 1848Subject: A subject 1849To: aperson@dom.ain 1850From: bperson@dom.ain 1851 1852 1853--BOUNDARY 1854 1855--BOUNDARY-- 1856''') 1857 1858 def test_one_part_in_a_multipart(self): 1859 eq = self.ndiffAssertEqual 1860 outer = MIMEBase('multipart', 'mixed') 1861 outer['Subject'] = 'A subject' 1862 outer['To'] = 'aperson@dom.ain' 1863 outer['From'] = 'bperson@dom.ain' 1864 outer.set_boundary('BOUNDARY') 1865 msg = MIMEText('hello world') 1866 outer.attach(msg) 1867 eq(outer.as_string(), '''\ 1868Content-Type: multipart/mixed; boundary="BOUNDARY" 1869MIME-Version: 1.0 1870Subject: A subject 1871To: aperson@dom.ain 1872From: bperson@dom.ain 1873 1874--BOUNDARY 1875Content-Type: text/plain; charset="us-ascii" 1876MIME-Version: 1.0 1877Content-Transfer-Encoding: 7bit 1878 1879hello world 1880--BOUNDARY-- 1881''') 1882 1883 def test_seq_parts_in_a_multipart_with_empty_preamble(self): 1884 eq = self.ndiffAssertEqual 1885 outer = MIMEBase('multipart', 'mixed') 1886 outer['Subject'] = 'A subject' 1887 outer['To'] = 'aperson@dom.ain' 1888 outer['From'] = 'bperson@dom.ain' 1889 outer.preamble = '' 1890 msg = MIMEText('hello world') 1891 outer.attach(msg) 1892 outer.set_boundary('BOUNDARY') 1893 eq(outer.as_string(), '''\ 1894Content-Type: multipart/mixed; boundary="BOUNDARY" 1895MIME-Version: 1.0 1896Subject: A subject 1897To: aperson@dom.ain 1898From: bperson@dom.ain 1899 1900 1901--BOUNDARY 1902Content-Type: text/plain; charset="us-ascii" 1903MIME-Version: 1.0 1904Content-Transfer-Encoding: 7bit 1905 1906hello world 1907--BOUNDARY-- 1908''') 1909 1910 1911 def test_seq_parts_in_a_multipart_with_none_preamble(self): 1912 eq = self.ndiffAssertEqual 1913 outer = MIMEBase('multipart', 'mixed') 1914 outer['Subject'] = 'A subject' 1915 outer['To'] = 'aperson@dom.ain' 1916 outer['From'] = 'bperson@dom.ain' 1917 outer.preamble = None 1918 msg = MIMEText('hello world') 1919 outer.attach(msg) 1920 outer.set_boundary('BOUNDARY') 1921 eq(outer.as_string(), '''\ 1922Content-Type: multipart/mixed; boundary="BOUNDARY" 1923MIME-Version: 1.0 1924Subject: A subject 1925To: aperson@dom.ain 1926From: bperson@dom.ain 1927 1928--BOUNDARY 1929Content-Type: text/plain; charset="us-ascii" 1930MIME-Version: 1.0 1931Content-Transfer-Encoding: 7bit 1932 1933hello world 1934--BOUNDARY-- 1935''') 1936 1937 1938 def test_seq_parts_in_a_multipart_with_none_epilogue(self): 1939 eq = self.ndiffAssertEqual 1940 outer = MIMEBase('multipart', 'mixed') 1941 outer['Subject'] = 'A subject' 1942 outer['To'] = 'aperson@dom.ain' 1943 outer['From'] = 'bperson@dom.ain' 1944 outer.epilogue = None 1945 msg = MIMEText('hello world') 1946 outer.attach(msg) 1947 outer.set_boundary('BOUNDARY') 1948 eq(outer.as_string(), '''\ 1949Content-Type: multipart/mixed; boundary="BOUNDARY" 1950MIME-Version: 1.0 1951Subject: A subject 1952To: aperson@dom.ain 1953From: bperson@dom.ain 1954 1955--BOUNDARY 1956Content-Type: text/plain; charset="us-ascii" 1957MIME-Version: 1.0 1958Content-Transfer-Encoding: 7bit 1959 1960hello world 1961--BOUNDARY-- 1962''') 1963 1964 1965 def test_seq_parts_in_a_multipart_with_empty_epilogue(self): 1966 eq = self.ndiffAssertEqual 1967 outer = MIMEBase('multipart', 'mixed') 1968 outer['Subject'] = 'A subject' 1969 outer['To'] = 'aperson@dom.ain' 1970 outer['From'] = 'bperson@dom.ain' 1971 outer.epilogue = '' 1972 msg = MIMEText('hello world') 1973 outer.attach(msg) 1974 outer.set_boundary('BOUNDARY') 1975 eq(outer.as_string(), '''\ 1976Content-Type: multipart/mixed; boundary="BOUNDARY" 1977MIME-Version: 1.0 1978Subject: A subject 1979To: aperson@dom.ain 1980From: bperson@dom.ain 1981 1982--BOUNDARY 1983Content-Type: text/plain; charset="us-ascii" 1984MIME-Version: 1.0 1985Content-Transfer-Encoding: 7bit 1986 1987hello world 1988--BOUNDARY-- 1989''') 1990 1991 1992 def test_seq_parts_in_a_multipart_with_nl_epilogue(self): 1993 eq = self.ndiffAssertEqual 1994 outer = MIMEBase('multipart', 'mixed') 1995 outer['Subject'] = 'A subject' 1996 outer['To'] = 'aperson@dom.ain' 1997 outer['From'] = 'bperson@dom.ain' 1998 outer.epilogue = '\n' 1999 msg = MIMEText('hello world') 2000 outer.attach(msg) 2001 outer.set_boundary('BOUNDARY') 2002 eq(outer.as_string(), '''\ 2003Content-Type: multipart/mixed; boundary="BOUNDARY" 2004MIME-Version: 1.0 2005Subject: A subject 2006To: aperson@dom.ain 2007From: bperson@dom.ain 2008 2009--BOUNDARY 2010Content-Type: text/plain; charset="us-ascii" 2011MIME-Version: 1.0 2012Content-Transfer-Encoding: 7bit 2013 2014hello world 2015--BOUNDARY-- 2016 2017''') 2018 2019 def test_message_external_body(self): 2020 eq = self.assertEqual 2021 msg = self._msgobj('msg_36.txt') 2022 eq(len(msg.get_payload()), 2) 2023 msg1 = msg.get_payload(1) 2024 eq(msg1.get_content_type(), 'multipart/alternative') 2025 eq(len(msg1.get_payload()), 2) 2026 for subpart in msg1.get_payload(): 2027 eq(subpart.get_content_type(), 'message/external-body') 2028 eq(len(subpart.get_payload()), 1) 2029 subsubpart = subpart.get_payload(0) 2030 eq(subsubpart.get_content_type(), 'text/plain') 2031 2032 def test_double_boundary(self): 2033 # msg_37.txt is a multipart that contains two dash-boundary's in a 2034 # row. Our interpretation of RFC 2046 calls for ignoring the second 2035 # and subsequent boundaries. 2036 msg = self._msgobj('msg_37.txt') 2037 self.assertEqual(len(msg.get_payload()), 3) 2038 2039 def test_nested_inner_contains_outer_boundary(self): 2040 eq = self.ndiffAssertEqual 2041 # msg_38.txt has an inner part that contains outer boundaries. My 2042 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say 2043 # these are illegal and should be interpreted as unterminated inner 2044 # parts. 2045 msg = self._msgobj('msg_38.txt') 2046 sfp = StringIO() 2047 iterators._structure(msg, sfp) 2048 eq(sfp.getvalue(), """\ 2049multipart/mixed 2050 multipart/mixed 2051 multipart/alternative 2052 text/plain 2053 text/plain 2054 text/plain 2055 text/plain 2056""") 2057 2058 def test_nested_with_same_boundary(self): 2059 eq = self.ndiffAssertEqual 2060 # msg 39.txt is similarly evil in that it's got inner parts that use 2061 # the same boundary as outer parts. Again, I believe the way this is 2062 # parsed is closest to the spirit of RFC 2046 2063 msg = self._msgobj('msg_39.txt') 2064 sfp = StringIO() 2065 iterators._structure(msg, sfp) 2066 eq(sfp.getvalue(), """\ 2067multipart/mixed 2068 multipart/mixed 2069 multipart/alternative 2070 application/octet-stream 2071 application/octet-stream 2072 text/plain 2073""") 2074 2075 def test_boundary_in_non_multipart(self): 2076 msg = self._msgobj('msg_40.txt') 2077 self.assertEqual(msg.as_string(), '''\ 2078MIME-Version: 1.0 2079Content-Type: text/html; boundary="--961284236552522269" 2080 2081----961284236552522269 2082Content-Type: text/html; 2083Content-Transfer-Encoding: 7Bit 2084 2085<html></html> 2086 2087----961284236552522269-- 2088''') 2089 2090 def test_boundary_with_leading_space(self): 2091 eq = self.assertEqual 2092 msg = email.message_from_string('''\ 2093MIME-Version: 1.0 2094Content-Type: multipart/mixed; boundary=" XXXX" 2095 2096-- XXXX 2097Content-Type: text/plain 2098 2099 2100-- XXXX 2101Content-Type: text/plain 2102 2103-- XXXX-- 2104''') 2105 self.assertTrue(msg.is_multipart()) 2106 eq(msg.get_boundary(), ' XXXX') 2107 eq(len(msg.get_payload()), 2) 2108 2109 def test_boundary_without_trailing_newline(self): 2110 m = Parser().parsestr("""\ 2111Content-Type: multipart/mixed; boundary="===============0012394164==" 2112MIME-Version: 1.0 2113 2114--===============0012394164== 2115Content-Type: image/file1.jpg 2116MIME-Version: 1.0 2117Content-Transfer-Encoding: base64 2118 2119YXNkZg== 2120--===============0012394164==--""") 2121 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') 2122 2123 def test_mimebase_default_policy(self): 2124 m = MIMEBase('multipart', 'mixed') 2125 self.assertIs(m.policy, email.policy.compat32) 2126 2127 def test_mimebase_custom_policy(self): 2128 m = MIMEBase('multipart', 'mixed', policy=email.policy.default) 2129 self.assertIs(m.policy, email.policy.default) 2130 2131# Test some badly formatted messages 2132class TestNonConformant(TestEmailBase): 2133 2134 def test_parse_missing_minor_type(self): 2135 eq = self.assertEqual 2136 msg = self._msgobj('msg_14.txt') 2137 eq(msg.get_content_type(), 'text/plain') 2138 eq(msg.get_content_maintype(), 'text') 2139 eq(msg.get_content_subtype(), 'plain') 2140 2141 # test_defect_handling 2142 def test_same_boundary_inner_outer(self): 2143 msg = self._msgobj('msg_15.txt') 2144 # XXX We can probably eventually do better 2145 inner = msg.get_payload(0) 2146 self.assertTrue(hasattr(inner, 'defects')) 2147 self.assertEqual(len(inner.defects), 1) 2148 self.assertIsInstance(inner.defects[0], 2149 errors.StartBoundaryNotFoundDefect) 2150 2151 # test_defect_handling 2152 def test_multipart_no_boundary(self): 2153 msg = self._msgobj('msg_25.txt') 2154 self.assertIsInstance(msg.get_payload(), str) 2155 self.assertEqual(len(msg.defects), 2) 2156 self.assertIsInstance(msg.defects[0], 2157 errors.NoBoundaryInMultipartDefect) 2158 self.assertIsInstance(msg.defects[1], 2159 errors.MultipartInvariantViolationDefect) 2160 2161 multipart_msg = textwrap.dedent("""\ 2162 Date: Wed, 14 Nov 2007 12:56:23 GMT 2163 From: foo@bar.invalid 2164 To: foo@bar.invalid 2165 Subject: Content-Transfer-Encoding: base64 and multipart 2166 MIME-Version: 1.0 2167 Content-Type: multipart/mixed; 2168 boundary="===============3344438784458119861=="{} 2169 2170 --===============3344438784458119861== 2171 Content-Type: text/plain 2172 2173 Test message 2174 2175 --===============3344438784458119861== 2176 Content-Type: application/octet-stream 2177 Content-Transfer-Encoding: base64 2178 2179 YWJj 2180 2181 --===============3344438784458119861==-- 2182 """) 2183 2184 # test_defect_handling 2185 def test_multipart_invalid_cte(self): 2186 msg = self._str_msg( 2187 self.multipart_msg.format("\nContent-Transfer-Encoding: base64")) 2188 self.assertEqual(len(msg.defects), 1) 2189 self.assertIsInstance(msg.defects[0], 2190 errors.InvalidMultipartContentTransferEncodingDefect) 2191 2192 # test_defect_handling 2193 def test_multipart_no_cte_no_defect(self): 2194 msg = self._str_msg(self.multipart_msg.format('')) 2195 self.assertEqual(len(msg.defects), 0) 2196 2197 # test_defect_handling 2198 def test_multipart_valid_cte_no_defect(self): 2199 for cte in ('7bit', '8bit', 'BINary'): 2200 msg = self._str_msg( 2201 self.multipart_msg.format( 2202 "\nContent-Transfer-Encoding: {}".format(cte))) 2203 self.assertEqual(len(msg.defects), 0) 2204 2205 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2. 2206 def test_invalid_content_type(self): 2207 eq = self.assertEqual 2208 neq = self.ndiffAssertEqual 2209 msg = Message() 2210 # RFC 2045, $5.2 says invalid yields text/plain 2211 msg['Content-Type'] = 'text' 2212 eq(msg.get_content_maintype(), 'text') 2213 eq(msg.get_content_subtype(), 'plain') 2214 eq(msg.get_content_type(), 'text/plain') 2215 # Clear the old value and try something /really/ invalid 2216 del msg['content-type'] 2217 msg['Content-Type'] = 'foo' 2218 eq(msg.get_content_maintype(), 'text') 2219 eq(msg.get_content_subtype(), 'plain') 2220 eq(msg.get_content_type(), 'text/plain') 2221 # Still, make sure that the message is idempotently generated 2222 s = StringIO() 2223 g = Generator(s) 2224 g.flatten(msg) 2225 neq(s.getvalue(), 'Content-Type: foo\n\n') 2226 2227 def test_no_start_boundary(self): 2228 eq = self.ndiffAssertEqual 2229 msg = self._msgobj('msg_31.txt') 2230 eq(msg.get_payload(), """\ 2231--BOUNDARY 2232Content-Type: text/plain 2233 2234message 1 2235 2236--BOUNDARY 2237Content-Type: text/plain 2238 2239message 2 2240 2241--BOUNDARY-- 2242""") 2243 2244 def test_no_separating_blank_line(self): 2245 eq = self.ndiffAssertEqual 2246 msg = self._msgobj('msg_35.txt') 2247 eq(msg.as_string(), """\ 2248From: aperson@dom.ain 2249To: bperson@dom.ain 2250Subject: here's something interesting 2251 2252counter to RFC 2822, there's no separating newline here 2253""") 2254 2255 # test_defect_handling 2256 def test_lying_multipart(self): 2257 msg = self._msgobj('msg_41.txt') 2258 self.assertTrue(hasattr(msg, 'defects')) 2259 self.assertEqual(len(msg.defects), 2) 2260 self.assertIsInstance(msg.defects[0], 2261 errors.NoBoundaryInMultipartDefect) 2262 self.assertIsInstance(msg.defects[1], 2263 errors.MultipartInvariantViolationDefect) 2264 2265 # test_defect_handling 2266 def test_missing_start_boundary(self): 2267 outer = self._msgobj('msg_42.txt') 2268 # The message structure is: 2269 # 2270 # multipart/mixed 2271 # text/plain 2272 # message/rfc822 2273 # multipart/mixed [*] 2274 # 2275 # [*] This message is missing its start boundary 2276 bad = outer.get_payload(1).get_payload(0) 2277 self.assertEqual(len(bad.defects), 1) 2278 self.assertIsInstance(bad.defects[0], 2279 errors.StartBoundaryNotFoundDefect) 2280 2281 # test_defect_handling 2282 def test_first_line_is_continuation_header(self): 2283 eq = self.assertEqual 2284 m = ' Line 1\nSubject: test\n\nbody' 2285 msg = email.message_from_string(m) 2286 eq(msg.keys(), ['Subject']) 2287 eq(msg.get_payload(), 'body') 2288 eq(len(msg.defects), 1) 2289 self.assertDefectsEqual(msg.defects, 2290 [errors.FirstHeaderLineIsContinuationDefect]) 2291 eq(msg.defects[0].line, ' Line 1\n') 2292 2293 # test_defect_handling 2294 def test_missing_header_body_separator(self): 2295 # Our heuristic if we see a line that doesn't look like a header (no 2296 # leading whitespace but no ':') is to assume that the blank line that 2297 # separates the header from the body is missing, and to stop parsing 2298 # headers and start parsing the body. 2299 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') 2300 self.assertEqual(msg.keys(), ['Subject']) 2301 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') 2302 self.assertDefectsEqual(msg.defects, 2303 [errors.MissingHeaderBodySeparatorDefect]) 2304 2305 2306# Test RFC 2047 header encoding and decoding 2307class TestRFC2047(TestEmailBase): 2308 def test_rfc2047_multiline(self): 2309 eq = self.assertEqual 2310 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz 2311 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" 2312 dh = decode_header(s) 2313 eq(dh, [ 2314 (b'Re: ', None), 2315 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), 2316 (b' baz foo bar ', None), 2317 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) 2318 header = make_header(dh) 2319 eq(str(header), 2320 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') 2321 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ 2322Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= 2323 =?mac-iceland?q?=9Arg=8Cs?=""") 2324 2325 def test_whitespace_keeper_unicode(self): 2326 eq = self.assertEqual 2327 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>' 2328 dh = decode_header(s) 2329 eq(dh, [(b'Andr\xe9', 'iso-8859-1'), 2330 (b' Pirard <pirard@dom.ain>', None)]) 2331 header = str(make_header(dh)) 2332 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>') 2333 2334 def test_whitespace_keeper_unicode_2(self): 2335 eq = self.assertEqual 2336 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' 2337 dh = decode_header(s) 2338 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'), 2339 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')]) 2340 hu = str(make_header(dh)) 2341 eq(hu, 'The quick brown fox jumped over the lazy dog') 2342 2343 def test_rfc2047_missing_whitespace(self): 2344 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' 2345 dh = decode_header(s) 2346 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), 2347 (b'rg', None), (b'\xe5', 'iso-8859-1'), 2348 (b'sbord', None)]) 2349 2350 def test_rfc2047_with_whitespace(self): 2351 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' 2352 dh = decode_header(s) 2353 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'), 2354 (b' rg ', None), (b'\xe5', 'iso-8859-1'), 2355 (b' sbord', None)]) 2356 2357 def test_rfc2047_B_bad_padding(self): 2358 s = '=?iso-8859-1?B?%s?=' 2359 data = [ # only test complete bytes 2360 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'), 2361 ('dmk=', b'vi'), ('dmk', b'vi') 2362 ] 2363 for q, a in data: 2364 dh = decode_header(s % q) 2365 self.assertEqual(dh, [(a, 'iso-8859-1')]) 2366 2367 def test_rfc2047_Q_invalid_digits(self): 2368 # issue 10004. 2369 s = '=?iso-8859-1?Q?andr=e9=zz?=' 2370 self.assertEqual(decode_header(s), 2371 [(b'andr\xe9=zz', 'iso-8859-1')]) 2372 2373 def test_rfc2047_rfc2047_1(self): 2374 # 1st testcase at end of rfc2047 2375 s = '(=?ISO-8859-1?Q?a?=)' 2376 self.assertEqual(decode_header(s), 2377 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) 2378 2379 def test_rfc2047_rfc2047_2(self): 2380 # 2nd testcase at end of rfc2047 2381 s = '(=?ISO-8859-1?Q?a?= b)' 2382 self.assertEqual(decode_header(s), 2383 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) 2384 2385 def test_rfc2047_rfc2047_3(self): 2386 # 3rd testcase at end of rfc2047 2387 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2388 self.assertEqual(decode_header(s), 2389 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2390 2391 def test_rfc2047_rfc2047_4(self): 2392 # 4th testcase at end of rfc2047 2393 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2394 self.assertEqual(decode_header(s), 2395 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2396 2397 def test_rfc2047_rfc2047_5a(self): 2398 # 5th testcase at end of rfc2047 newline is \r\n 2399 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' 2400 self.assertEqual(decode_header(s), 2401 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2402 2403 def test_rfc2047_rfc2047_5b(self): 2404 # 5th testcase at end of rfc2047 newline is \n 2405 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' 2406 self.assertEqual(decode_header(s), 2407 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2408 2409 def test_rfc2047_rfc2047_6(self): 2410 # 6th testcase at end of rfc2047 2411 s = '(=?ISO-8859-1?Q?a_b?=)' 2412 self.assertEqual(decode_header(s), 2413 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) 2414 2415 def test_rfc2047_rfc2047_7(self): 2416 # 7th testcase at end of rfc2047 2417 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' 2418 self.assertEqual(decode_header(s), 2419 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), 2420 (b')', None)]) 2421 self.assertEqual(make_header(decode_header(s)).encode(), s.lower()) 2422 self.assertEqual(str(make_header(decode_header(s))), '(a b)') 2423 2424 def test_multiline_header(self): 2425 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>' 2426 self.assertEqual(decode_header(s), 2427 [(b'"M\xfcller T"', 'windows-1252'), 2428 (b'<T.Mueller@xxx.com>', None)]) 2429 self.assertEqual(make_header(decode_header(s)).encode(), 2430 ''.join(s.splitlines())) 2431 self.assertEqual(str(make_header(decode_header(s))), 2432 '"Müller T" <T.Mueller@xxx.com>') 2433 2434 2435# Test the MIMEMessage class 2436class TestMIMEMessage(TestEmailBase): 2437 def setUp(self): 2438 with openfile('msg_11.txt') as fp: 2439 self._text = fp.read() 2440 2441 def test_type_error(self): 2442 self.assertRaises(TypeError, MIMEMessage, 'a plain string') 2443 2444 def test_valid_argument(self): 2445 eq = self.assertEqual 2446 subject = 'A sub-message' 2447 m = Message() 2448 m['Subject'] = subject 2449 r = MIMEMessage(m) 2450 eq(r.get_content_type(), 'message/rfc822') 2451 payload = r.get_payload() 2452 self.assertIsInstance(payload, list) 2453 eq(len(payload), 1) 2454 subpart = payload[0] 2455 self.assertIs(subpart, m) 2456 eq(subpart['subject'], subject) 2457 2458 def test_bad_multipart(self): 2459 msg1 = Message() 2460 msg1['Subject'] = 'subpart 1' 2461 msg2 = Message() 2462 msg2['Subject'] = 'subpart 2' 2463 r = MIMEMessage(msg1) 2464 self.assertRaises(errors.MultipartConversionError, r.attach, msg2) 2465 2466 def test_generate(self): 2467 # First craft the message to be encapsulated 2468 m = Message() 2469 m['Subject'] = 'An enclosed message' 2470 m.set_payload('Here is the body of the message.\n') 2471 r = MIMEMessage(m) 2472 r['Subject'] = 'The enclosing message' 2473 s = StringIO() 2474 g = Generator(s) 2475 g.flatten(r) 2476 self.assertEqual(s.getvalue(), """\ 2477Content-Type: message/rfc822 2478MIME-Version: 1.0 2479Subject: The enclosing message 2480 2481Subject: An enclosed message 2482 2483Here is the body of the message. 2484""") 2485 2486 def test_parse_message_rfc822(self): 2487 eq = self.assertEqual 2488 msg = self._msgobj('msg_11.txt') 2489 eq(msg.get_content_type(), 'message/rfc822') 2490 payload = msg.get_payload() 2491 self.assertIsInstance(payload, list) 2492 eq(len(payload), 1) 2493 submsg = payload[0] 2494 self.assertIsInstance(submsg, Message) 2495 eq(submsg['subject'], 'An enclosed message') 2496 eq(submsg.get_payload(), 'Here is the body of the message.\n') 2497 2498 def test_dsn(self): 2499 eq = self.assertEqual 2500 # msg 16 is a Delivery Status Notification, see RFC 1894 2501 msg = self._msgobj('msg_16.txt') 2502 eq(msg.get_content_type(), 'multipart/report') 2503 self.assertTrue(msg.is_multipart()) 2504 eq(len(msg.get_payload()), 3) 2505 # Subpart 1 is a text/plain, human readable section 2506 subpart = msg.get_payload(0) 2507 eq(subpart.get_content_type(), 'text/plain') 2508 eq(subpart.get_payload(), """\ 2509This report relates to a message you sent with the following header fields: 2510 2511 Message-id: <002001c144a6$8752e060$56104586@oxy.edu> 2512 Date: Sun, 23 Sep 2001 20:10:55 -0700 2513 From: "Ian T. Henry" <henryi@oxy.edu> 2514 To: SoCal Raves <scr@socal-raves.org> 2515 Subject: [scr] yeah for Ians!! 2516 2517Your message cannot be delivered to the following recipients: 2518 2519 Recipient address: jangel1@cougar.noc.ucla.edu 2520 Reason: recipient reached disk quota 2521 2522""") 2523 # Subpart 2 contains the machine parsable DSN information. It 2524 # consists of two blocks of headers, represented by two nested Message 2525 # objects. 2526 subpart = msg.get_payload(1) 2527 eq(subpart.get_content_type(), 'message/delivery-status') 2528 eq(len(subpart.get_payload()), 2) 2529 # message/delivery-status should treat each block as a bunch of 2530 # headers, i.e. a bunch of Message objects. 2531 dsn1 = subpart.get_payload(0) 2532 self.assertIsInstance(dsn1, Message) 2533 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') 2534 eq(dsn1.get_param('dns', header='reporting-mta'), '') 2535 # Try a missing one <wink> 2536 eq(dsn1.get_param('nsd', header='reporting-mta'), None) 2537 dsn2 = subpart.get_payload(1) 2538 self.assertIsInstance(dsn2, Message) 2539 eq(dsn2['action'], 'failed') 2540 eq(dsn2.get_params(header='original-recipient'), 2541 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) 2542 eq(dsn2.get_param('rfc822', header='final-recipient'), '') 2543 # Subpart 3 is the original message 2544 subpart = msg.get_payload(2) 2545 eq(subpart.get_content_type(), 'message/rfc822') 2546 payload = subpart.get_payload() 2547 self.assertIsInstance(payload, list) 2548 eq(len(payload), 1) 2549 subsubpart = payload[0] 2550 self.assertIsInstance(subsubpart, Message) 2551 eq(subsubpart.get_content_type(), 'text/plain') 2552 eq(subsubpart['message-id'], 2553 '<002001c144a6$8752e060$56104586@oxy.edu>') 2554 2555 def test_epilogue(self): 2556 eq = self.ndiffAssertEqual 2557 with openfile('msg_21.txt') as fp: 2558 text = fp.read() 2559 msg = Message() 2560 msg['From'] = 'aperson@dom.ain' 2561 msg['To'] = 'bperson@dom.ain' 2562 msg['Subject'] = 'Test' 2563 msg.preamble = 'MIME message' 2564 msg.epilogue = 'End of MIME message\n' 2565 msg1 = MIMEText('One') 2566 msg2 = MIMEText('Two') 2567 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2568 msg.attach(msg1) 2569 msg.attach(msg2) 2570 sfp = StringIO() 2571 g = Generator(sfp) 2572 g.flatten(msg) 2573 eq(sfp.getvalue(), text) 2574 2575 def test_no_nl_preamble(self): 2576 eq = self.ndiffAssertEqual 2577 msg = Message() 2578 msg['From'] = 'aperson@dom.ain' 2579 msg['To'] = 'bperson@dom.ain' 2580 msg['Subject'] = 'Test' 2581 msg.preamble = 'MIME message' 2582 msg.epilogue = '' 2583 msg1 = MIMEText('One') 2584 msg2 = MIMEText('Two') 2585 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2586 msg.attach(msg1) 2587 msg.attach(msg2) 2588 eq(msg.as_string(), """\ 2589From: aperson@dom.ain 2590To: bperson@dom.ain 2591Subject: Test 2592Content-Type: multipart/mixed; boundary="BOUNDARY" 2593 2594MIME message 2595--BOUNDARY 2596Content-Type: text/plain; charset="us-ascii" 2597MIME-Version: 1.0 2598Content-Transfer-Encoding: 7bit 2599 2600One 2601--BOUNDARY 2602Content-Type: text/plain; charset="us-ascii" 2603MIME-Version: 1.0 2604Content-Transfer-Encoding: 7bit 2605 2606Two 2607--BOUNDARY-- 2608""") 2609 2610 def test_default_type(self): 2611 eq = self.assertEqual 2612 with openfile('msg_30.txt') as fp: 2613 msg = email.message_from_file(fp) 2614 container1 = msg.get_payload(0) 2615 eq(container1.get_default_type(), 'message/rfc822') 2616 eq(container1.get_content_type(), 'message/rfc822') 2617 container2 = msg.get_payload(1) 2618 eq(container2.get_default_type(), 'message/rfc822') 2619 eq(container2.get_content_type(), 'message/rfc822') 2620 container1a = container1.get_payload(0) 2621 eq(container1a.get_default_type(), 'text/plain') 2622 eq(container1a.get_content_type(), 'text/plain') 2623 container2a = container2.get_payload(0) 2624 eq(container2a.get_default_type(), 'text/plain') 2625 eq(container2a.get_content_type(), 'text/plain') 2626 2627 def test_default_type_with_explicit_container_type(self): 2628 eq = self.assertEqual 2629 with openfile('msg_28.txt') as fp: 2630 msg = email.message_from_file(fp) 2631 container1 = msg.get_payload(0) 2632 eq(container1.get_default_type(), 'message/rfc822') 2633 eq(container1.get_content_type(), 'message/rfc822') 2634 container2 = msg.get_payload(1) 2635 eq(container2.get_default_type(), 'message/rfc822') 2636 eq(container2.get_content_type(), 'message/rfc822') 2637 container1a = container1.get_payload(0) 2638 eq(container1a.get_default_type(), 'text/plain') 2639 eq(container1a.get_content_type(), 'text/plain') 2640 container2a = container2.get_payload(0) 2641 eq(container2a.get_default_type(), 'text/plain') 2642 eq(container2a.get_content_type(), 'text/plain') 2643 2644 def test_default_type_non_parsed(self): 2645 eq = self.assertEqual 2646 neq = self.ndiffAssertEqual 2647 # Set up container 2648 container = MIMEMultipart('digest', 'BOUNDARY') 2649 container.epilogue = '' 2650 # Set up subparts 2651 subpart1a = MIMEText('message 1\n') 2652 subpart2a = MIMEText('message 2\n') 2653 subpart1 = MIMEMessage(subpart1a) 2654 subpart2 = MIMEMessage(subpart2a) 2655 container.attach(subpart1) 2656 container.attach(subpart2) 2657 eq(subpart1.get_content_type(), 'message/rfc822') 2658 eq(subpart1.get_default_type(), 'message/rfc822') 2659 eq(subpart2.get_content_type(), 'message/rfc822') 2660 eq(subpart2.get_default_type(), 'message/rfc822') 2661 neq(container.as_string(0), '''\ 2662Content-Type: multipart/digest; boundary="BOUNDARY" 2663MIME-Version: 1.0 2664 2665--BOUNDARY 2666Content-Type: message/rfc822 2667MIME-Version: 1.0 2668 2669Content-Type: text/plain; charset="us-ascii" 2670MIME-Version: 1.0 2671Content-Transfer-Encoding: 7bit 2672 2673message 1 2674 2675--BOUNDARY 2676Content-Type: message/rfc822 2677MIME-Version: 1.0 2678 2679Content-Type: text/plain; charset="us-ascii" 2680MIME-Version: 1.0 2681Content-Transfer-Encoding: 7bit 2682 2683message 2 2684 2685--BOUNDARY-- 2686''') 2687 del subpart1['content-type'] 2688 del subpart1['mime-version'] 2689 del subpart2['content-type'] 2690 del subpart2['mime-version'] 2691 eq(subpart1.get_content_type(), 'message/rfc822') 2692 eq(subpart1.get_default_type(), 'message/rfc822') 2693 eq(subpart2.get_content_type(), 'message/rfc822') 2694 eq(subpart2.get_default_type(), 'message/rfc822') 2695 neq(container.as_string(0), '''\ 2696Content-Type: multipart/digest; boundary="BOUNDARY" 2697MIME-Version: 1.0 2698 2699--BOUNDARY 2700 2701Content-Type: text/plain; charset="us-ascii" 2702MIME-Version: 1.0 2703Content-Transfer-Encoding: 7bit 2704 2705message 1 2706 2707--BOUNDARY 2708 2709Content-Type: text/plain; charset="us-ascii" 2710MIME-Version: 1.0 2711Content-Transfer-Encoding: 7bit 2712 2713message 2 2714 2715--BOUNDARY-- 2716''') 2717 2718 def test_mime_attachments_in_constructor(self): 2719 eq = self.assertEqual 2720 text1 = MIMEText('') 2721 text2 = MIMEText('') 2722 msg = MIMEMultipart(_subparts=(text1, text2)) 2723 eq(len(msg.get_payload()), 2) 2724 eq(msg.get_payload(0), text1) 2725 eq(msg.get_payload(1), text2) 2726 2727 def test_default_multipart_constructor(self): 2728 msg = MIMEMultipart() 2729 self.assertTrue(msg.is_multipart()) 2730 2731 def test_multipart_default_policy(self): 2732 msg = MIMEMultipart() 2733 msg['To'] = 'a@b.com' 2734 msg['To'] = 'c@d.com' 2735 self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com']) 2736 2737 def test_multipart_custom_policy(self): 2738 msg = MIMEMultipart(policy=email.policy.default) 2739 msg['To'] = 'a@b.com' 2740 with self.assertRaises(ValueError) as cm: 2741 msg['To'] = 'c@d.com' 2742 self.assertEqual(str(cm.exception), 2743 'There may be at most 1 To headers in a message') 2744 2745# A general test of parser->model->generator idempotency. IOW, read a message 2746# in, parse it into a message object tree, then without touching the tree, 2747# regenerate the plain text. The original text and the transformed text 2748# should be identical. Note: that we ignore the Unix-From since that may 2749# contain a changed date. 2750class TestIdempotent(TestEmailBase): 2751 2752 linesep = '\n' 2753 2754 def _msgobj(self, filename): 2755 with openfile(filename) as fp: 2756 data = fp.read() 2757 msg = email.message_from_string(data) 2758 return msg, data 2759 2760 def _idempotent(self, msg, text, unixfrom=False): 2761 eq = self.ndiffAssertEqual 2762 s = StringIO() 2763 g = Generator(s, maxheaderlen=0) 2764 g.flatten(msg, unixfrom=unixfrom) 2765 eq(text, s.getvalue()) 2766 2767 def test_parse_text_message(self): 2768 eq = self.assertEqual 2769 msg, text = self._msgobj('msg_01.txt') 2770 eq(msg.get_content_type(), 'text/plain') 2771 eq(msg.get_content_maintype(), 'text') 2772 eq(msg.get_content_subtype(), 'plain') 2773 eq(msg.get_params()[1], ('charset', 'us-ascii')) 2774 eq(msg.get_param('charset'), 'us-ascii') 2775 eq(msg.preamble, None) 2776 eq(msg.epilogue, None) 2777 self._idempotent(msg, text) 2778 2779 def test_parse_untyped_message(self): 2780 eq = self.assertEqual 2781 msg, text = self._msgobj('msg_03.txt') 2782 eq(msg.get_content_type(), 'text/plain') 2783 eq(msg.get_params(), None) 2784 eq(msg.get_param('charset'), None) 2785 self._idempotent(msg, text) 2786 2787 def test_simple_multipart(self): 2788 msg, text = self._msgobj('msg_04.txt') 2789 self._idempotent(msg, text) 2790 2791 def test_MIME_digest(self): 2792 msg, text = self._msgobj('msg_02.txt') 2793 self._idempotent(msg, text) 2794 2795 def test_long_header(self): 2796 msg, text = self._msgobj('msg_27.txt') 2797 self._idempotent(msg, text) 2798 2799 def test_MIME_digest_with_part_headers(self): 2800 msg, text = self._msgobj('msg_28.txt') 2801 self._idempotent(msg, text) 2802 2803 def test_mixed_with_image(self): 2804 msg, text = self._msgobj('msg_06.txt') 2805 self._idempotent(msg, text) 2806 2807 def test_multipart_report(self): 2808 msg, text = self._msgobj('msg_05.txt') 2809 self._idempotent(msg, text) 2810 2811 def test_dsn(self): 2812 msg, text = self._msgobj('msg_16.txt') 2813 self._idempotent(msg, text) 2814 2815 def test_preamble_epilogue(self): 2816 msg, text = self._msgobj('msg_21.txt') 2817 self._idempotent(msg, text) 2818 2819 def test_multipart_one_part(self): 2820 msg, text = self._msgobj('msg_23.txt') 2821 self._idempotent(msg, text) 2822 2823 def test_multipart_no_parts(self): 2824 msg, text = self._msgobj('msg_24.txt') 2825 self._idempotent(msg, text) 2826 2827 def test_no_start_boundary(self): 2828 msg, text = self._msgobj('msg_31.txt') 2829 self._idempotent(msg, text) 2830 2831 def test_rfc2231_charset(self): 2832 msg, text = self._msgobj('msg_32.txt') 2833 self._idempotent(msg, text) 2834 2835 def test_more_rfc2231_parameters(self): 2836 msg, text = self._msgobj('msg_33.txt') 2837 self._idempotent(msg, text) 2838 2839 def test_text_plain_in_a_multipart_digest(self): 2840 msg, text = self._msgobj('msg_34.txt') 2841 self._idempotent(msg, text) 2842 2843 def test_nested_multipart_mixeds(self): 2844 msg, text = self._msgobj('msg_12a.txt') 2845 self._idempotent(msg, text) 2846 2847 def test_message_external_body_idempotent(self): 2848 msg, text = self._msgobj('msg_36.txt') 2849 self._idempotent(msg, text) 2850 2851 def test_message_delivery_status(self): 2852 msg, text = self._msgobj('msg_43.txt') 2853 self._idempotent(msg, text, unixfrom=True) 2854 2855 def test_message_signed_idempotent(self): 2856 msg, text = self._msgobj('msg_45.txt') 2857 self._idempotent(msg, text) 2858 2859 def test_content_type(self): 2860 eq = self.assertEqual 2861 # Get a message object and reset the seek pointer for other tests 2862 msg, text = self._msgobj('msg_05.txt') 2863 eq(msg.get_content_type(), 'multipart/report') 2864 # Test the Content-Type: parameters 2865 params = {} 2866 for pk, pv in msg.get_params(): 2867 params[pk] = pv 2868 eq(params['report-type'], 'delivery-status') 2869 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') 2870 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep) 2871 eq(msg.epilogue, self.linesep) 2872 eq(len(msg.get_payload()), 3) 2873 # Make sure the subparts are what we expect 2874 msg1 = msg.get_payload(0) 2875 eq(msg1.get_content_type(), 'text/plain') 2876 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep) 2877 msg2 = msg.get_payload(1) 2878 eq(msg2.get_content_type(), 'text/plain') 2879 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep) 2880 msg3 = msg.get_payload(2) 2881 eq(msg3.get_content_type(), 'message/rfc822') 2882 self.assertIsInstance(msg3, Message) 2883 payload = msg3.get_payload() 2884 self.assertIsInstance(payload, list) 2885 eq(len(payload), 1) 2886 msg4 = payload[0] 2887 self.assertIsInstance(msg4, Message) 2888 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep) 2889 2890 def test_parser(self): 2891 eq = self.assertEqual 2892 msg, text = self._msgobj('msg_06.txt') 2893 # Check some of the outer headers 2894 eq(msg.get_content_type(), 'message/rfc822') 2895 # Make sure the payload is a list of exactly one sub-Message, and that 2896 # that submessage has a type of text/plain 2897 payload = msg.get_payload() 2898 self.assertIsInstance(payload, list) 2899 eq(len(payload), 1) 2900 msg1 = payload[0] 2901 self.assertIsInstance(msg1, Message) 2902 eq(msg1.get_content_type(), 'text/plain') 2903 self.assertIsInstance(msg1.get_payload(), str) 2904 eq(msg1.get_payload(), self.linesep) 2905 2906 2907 2908# Test various other bits of the package's functionality 2909class TestMiscellaneous(TestEmailBase): 2910 def test_message_from_string(self): 2911 with openfile('msg_01.txt') as fp: 2912 text = fp.read() 2913 msg = email.message_from_string(text) 2914 s = StringIO() 2915 # Don't wrap/continue long headers since we're trying to test 2916 # idempotency. 2917 g = Generator(s, maxheaderlen=0) 2918 g.flatten(msg) 2919 self.assertEqual(text, s.getvalue()) 2920 2921 def test_message_from_file(self): 2922 with openfile('msg_01.txt') as fp: 2923 text = fp.read() 2924 fp.seek(0) 2925 msg = email.message_from_file(fp) 2926 s = StringIO() 2927 # Don't wrap/continue long headers since we're trying to test 2928 # idempotency. 2929 g = Generator(s, maxheaderlen=0) 2930 g.flatten(msg) 2931 self.assertEqual(text, s.getvalue()) 2932 2933 def test_message_from_string_with_class(self): 2934 with openfile('msg_01.txt') as fp: 2935 text = fp.read() 2936 2937 # Create a subclass 2938 class MyMessage(Message): 2939 pass 2940 2941 msg = email.message_from_string(text, MyMessage) 2942 self.assertIsInstance(msg, MyMessage) 2943 # Try something more complicated 2944 with openfile('msg_02.txt') as fp: 2945 text = fp.read() 2946 msg = email.message_from_string(text, MyMessage) 2947 for subpart in msg.walk(): 2948 self.assertIsInstance(subpart, MyMessage) 2949 2950 def test_message_from_file_with_class(self): 2951 # Create a subclass 2952 class MyMessage(Message): 2953 pass 2954 2955 with openfile('msg_01.txt') as fp: 2956 msg = email.message_from_file(fp, MyMessage) 2957 self.assertIsInstance(msg, MyMessage) 2958 # Try something more complicated 2959 with openfile('msg_02.txt') as fp: 2960 msg = email.message_from_file(fp, MyMessage) 2961 for subpart in msg.walk(): 2962 self.assertIsInstance(subpart, MyMessage) 2963 2964 def test_custom_message_does_not_require_arguments(self): 2965 class MyMessage(Message): 2966 def __init__(self): 2967 super().__init__() 2968 msg = self._str_msg("Subject: test\n\ntest", MyMessage) 2969 self.assertIsInstance(msg, MyMessage) 2970 2971 def test__all__(self): 2972 module = __import__('email') 2973 self.assertEqual(sorted(module.__all__), [ 2974 'base64mime', 'charset', 'encoders', 'errors', 'feedparser', 2975 'generator', 'header', 'iterators', 'message', 2976 'message_from_binary_file', 'message_from_bytes', 2977 'message_from_file', 'message_from_string', 'mime', 'parser', 2978 'quoprimime', 'utils', 2979 ]) 2980 2981 def test_formatdate(self): 2982 now = time.time() 2983 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], 2984 time.gmtime(now)[:6]) 2985 2986 def test_formatdate_localtime(self): 2987 now = time.time() 2988 self.assertEqual( 2989 utils.parsedate(utils.formatdate(now, localtime=True))[:6], 2990 time.localtime(now)[:6]) 2991 2992 def test_formatdate_usegmt(self): 2993 now = time.time() 2994 self.assertEqual( 2995 utils.formatdate(now, localtime=False), 2996 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) 2997 self.assertEqual( 2998 utils.formatdate(now, localtime=False, usegmt=True), 2999 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) 3000 3001 # parsedate and parsedate_tz will become deprecated interfaces someday 3002 def test_parsedate_returns_None_for_invalid_strings(self): 3003 self.assertIsNone(utils.parsedate('')) 3004 self.assertIsNone(utils.parsedate_tz('')) 3005 self.assertIsNone(utils.parsedate(' ')) 3006 self.assertIsNone(utils.parsedate_tz(' ')) 3007 self.assertIsNone(utils.parsedate('0')) 3008 self.assertIsNone(utils.parsedate_tz('0')) 3009 self.assertIsNone(utils.parsedate('A Complete Waste of Time')) 3010 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time')) 3011 # Not a part of the spec but, but this has historically worked: 3012 self.assertIsNone(utils.parsedate(None)) 3013 self.assertIsNone(utils.parsedate_tz(None)) 3014 3015 def test_parsedate_compact(self): 3016 # The FWS after the comma is optional 3017 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), 3018 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) 3019 3020 def test_parsedate_no_dayofweek(self): 3021 eq = self.assertEqual 3022 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), 3023 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) 3024 3025 def test_parsedate_compact_no_dayofweek(self): 3026 eq = self.assertEqual 3027 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), 3028 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3029 3030 def test_parsedate_no_space_before_positive_offset(self): 3031 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'), 3032 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3033 3034 def test_parsedate_no_space_before_negative_offset(self): 3035 # Issue 1155362: we already handled '+' for this case. 3036 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'), 3037 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800)) 3038 3039 3040 def test_parsedate_accepts_time_with_dots(self): 3041 eq = self.assertEqual 3042 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'), 3043 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3044 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'), 3045 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800)) 3046 3047 def test_parsedate_acceptable_to_time_functions(self): 3048 eq = self.assertEqual 3049 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') 3050 t = int(time.mktime(timetup)) 3051 eq(time.localtime(t)[:6], timetup[:6]) 3052 eq(int(time.strftime('%Y', timetup)), 2003) 3053 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') 3054 t = int(time.mktime(timetup[:9])) 3055 eq(time.localtime(t)[:6], timetup[:6]) 3056 eq(int(time.strftime('%Y', timetup[:9])), 2003) 3057 3058 def test_mktime_tz(self): 3059 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3060 -1, -1, -1, 0)), 0) 3061 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3062 -1, -1, -1, 1234)), -1234) 3063 3064 def test_parsedate_y2k(self): 3065 """Test for parsing a date with a two-digit year. 3066 3067 Parsing a date with a two-digit year should return the correct 3068 four-digit year. RFC822 allows two-digit years, but RFC2822 (which 3069 obsoletes RFC822) requires four-digit years. 3070 3071 """ 3072 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), 3073 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) 3074 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), 3075 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) 3076 3077 def test_parseaddr_empty(self): 3078 self.assertEqual(utils.parseaddr('<>'), ('', '')) 3079 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') 3080 3081 def test_parseaddr_multiple_domains(self): 3082 self.assertEqual( 3083 utils.parseaddr('a@b@c'), 3084 ('', '') 3085 ) 3086 self.assertEqual( 3087 utils.parseaddr('a@b.c@c'), 3088 ('', '') 3089 ) 3090 self.assertEqual( 3091 utils.parseaddr('a@172.17.0.1@c'), 3092 ('', '') 3093 ) 3094 3095 def test_noquote_dump(self): 3096 self.assertEqual( 3097 utils.formataddr(('A Silly Person', 'person@dom.ain')), 3098 'A Silly Person <person@dom.ain>') 3099 3100 def test_escape_dump(self): 3101 self.assertEqual( 3102 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), 3103 r'"A (Very) Silly Person" <person@dom.ain>') 3104 self.assertEqual( 3105 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'), 3106 ('A (Very) Silly Person', 'person@dom.ain')) 3107 a = r'A \(Special\) Person' 3108 b = 'person@dom.ain' 3109 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3110 3111 def test_escape_backslashes(self): 3112 self.assertEqual( 3113 utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')), 3114 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>') 3115 a = r'Arthur \Backslash\ Foobar' 3116 b = 'person@dom.ain' 3117 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3118 3119 def test_quotes_unicode_names(self): 3120 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3121 name = "H\u00e4ns W\u00fcrst" 3122 addr = 'person@dom.ain' 3123 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3124 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>" 3125 self.assertEqual(utils.formataddr((name, addr)), utf8_base64) 3126 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'), 3127 latin1_quopri) 3128 3129 def test_accepts_any_charset_like_object(self): 3130 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3131 name = "H\u00e4ns W\u00fcrst" 3132 addr = 'person@dom.ain' 3133 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3134 foobar = "FOOBAR" 3135 class CharsetMock: 3136 def header_encode(self, string): 3137 return foobar 3138 mock = CharsetMock() 3139 mock_expected = "%s <%s>" % (foobar, addr) 3140 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected) 3141 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')), 3142 utf8_base64) 3143 3144 def test_invalid_charset_like_object_raises_error(self): 3145 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3146 name = "H\u00e4ns W\u00fcrst" 3147 addr = 'person@dom.ain' 3148 # An object without a header_encode method: 3149 bad_charset = object() 3150 self.assertRaises(AttributeError, utils.formataddr, (name, addr), 3151 bad_charset) 3152 3153 def test_unicode_address_raises_error(self): 3154 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3155 addr = 'pers\u00f6n@dom.in' 3156 self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) 3157 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) 3158 3159 def test_name_with_dot(self): 3160 x = 'John X. Doe <jxd@example.com>' 3161 y = '"John X. Doe" <jxd@example.com>' 3162 a, b = ('John X. Doe', 'jxd@example.com') 3163 self.assertEqual(utils.parseaddr(x), (a, b)) 3164 self.assertEqual(utils.parseaddr(y), (a, b)) 3165 # formataddr() quotes the name if there's a dot in it 3166 self.assertEqual(utils.formataddr((a, b)), y) 3167 3168 def test_parseaddr_preserves_quoted_pairs_in_addresses(self): 3169 # issue 10005. Note that in the third test the second pair of 3170 # backslashes is not actually a quoted pair because it is not inside a 3171 # comment or quoted string: the address being parsed has a quoted 3172 # string containing a quoted backslash, followed by 'example' and two 3173 # backslashes, followed by another quoted string containing a space and 3174 # the word 'example'. parseaddr copies those two backslashes 3175 # literally. Per rfc5322 this is not technically correct since a \ may 3176 # not appear in an address outside of a quoted string. It is probably 3177 # a sensible Postel interpretation, though. 3178 eq = self.assertEqual 3179 eq(utils.parseaddr('""example" example"@example.com'), 3180 ('', '""example" example"@example.com')) 3181 eq(utils.parseaddr('"\\"example\\" example"@example.com'), 3182 ('', '"\\"example\\" example"@example.com')) 3183 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), 3184 ('', '"\\\\"example\\\\" example"@example.com')) 3185 3186 def test_parseaddr_preserves_spaces_in_local_part(self): 3187 # issue 9286. A normal RFC5322 local part should not contain any 3188 # folding white space, but legacy local parts can (they are a sequence 3189 # of atoms, not dotatoms). On the other hand we strip whitespace from 3190 # before the @ and around dots, on the assumption that the whitespace 3191 # around the punctuation is a mistake in what would otherwise be 3192 # an RFC5322 local part. Leading whitespace is, usual, stripped as well. 3193 self.assertEqual(('', "merwok wok@xample.com"), 3194 utils.parseaddr("merwok wok@xample.com")) 3195 self.assertEqual(('', "merwok wok@xample.com"), 3196 utils.parseaddr("merwok wok@xample.com")) 3197 self.assertEqual(('', "merwok wok@xample.com"), 3198 utils.parseaddr(" merwok wok @xample.com")) 3199 self.assertEqual(('', 'merwok"wok" wok@xample.com'), 3200 utils.parseaddr('merwok"wok" wok@xample.com')) 3201 self.assertEqual(('', 'merwok.wok.wok@xample.com'), 3202 utils.parseaddr('merwok. wok . wok@xample.com')) 3203 3204 def test_formataddr_does_not_quote_parens_in_quoted_string(self): 3205 addr = ("'foo@example.com' (foo@example.com)", 3206 'foo@example.com') 3207 addrstr = ('"\'foo@example.com\' ' 3208 '(foo@example.com)" <foo@example.com>') 3209 self.assertEqual(utils.parseaddr(addrstr), addr) 3210 self.assertEqual(utils.formataddr(addr), addrstr) 3211 3212 3213 def test_multiline_from_comment(self): 3214 x = """\ 3215Foo 3216\tBar <foo@example.com>""" 3217 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) 3218 3219 def test_quote_dump(self): 3220 self.assertEqual( 3221 utils.formataddr(('A Silly; Person', 'person@dom.ain')), 3222 r'"A Silly; Person" <person@dom.ain>') 3223 3224 def test_charset_richcomparisons(self): 3225 eq = self.assertEqual 3226 ne = self.assertNotEqual 3227 cset1 = Charset() 3228 cset2 = Charset() 3229 eq(cset1, 'us-ascii') 3230 eq(cset1, 'US-ASCII') 3231 eq(cset1, 'Us-AsCiI') 3232 eq('us-ascii', cset1) 3233 eq('US-ASCII', cset1) 3234 eq('Us-AsCiI', cset1) 3235 ne(cset1, 'usascii') 3236 ne(cset1, 'USASCII') 3237 ne(cset1, 'UsAsCiI') 3238 ne('usascii', cset1) 3239 ne('USASCII', cset1) 3240 ne('UsAsCiI', cset1) 3241 eq(cset1, cset2) 3242 eq(cset2, cset1) 3243 3244 def test_getaddresses(self): 3245 eq = self.assertEqual 3246 eq(utils.getaddresses(['aperson@dom.ain (Al Person)', 3247 'Bud Person <bperson@dom.ain>']), 3248 [('Al Person', 'aperson@dom.ain'), 3249 ('Bud Person', 'bperson@dom.ain')]) 3250 3251 def test_getaddresses_nasty(self): 3252 eq = self.assertEqual 3253 eq(utils.getaddresses(['foo: ;']), [('', '')]) 3254 eq(utils.getaddresses( 3255 ['[]*-- =~$']), 3256 [('', ''), ('', ''), ('', '*--')]) 3257 eq(utils.getaddresses( 3258 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']), 3259 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) 3260 3261 def test_getaddresses_embedded_comment(self): 3262 """Test proper handling of a nested comment""" 3263 eq = self.assertEqual 3264 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>']) 3265 eq(addrs[0][1], 'foo@bar.com') 3266 3267 def test_make_msgid_collisions(self): 3268 # Test make_msgid uniqueness, even with multiple threads 3269 class MsgidsThread(Thread): 3270 def run(self): 3271 # generate msgids for 3 seconds 3272 self.msgids = [] 3273 append = self.msgids.append 3274 make_msgid = utils.make_msgid 3275 clock = time.monotonic 3276 tfin = clock() + 3.0 3277 while clock() < tfin: 3278 append(make_msgid(domain='testdomain-string')) 3279 3280 threads = [MsgidsThread() for i in range(5)] 3281 with start_threads(threads): 3282 pass 3283 all_ids = sum([t.msgids for t in threads], []) 3284 self.assertEqual(len(set(all_ids)), len(all_ids)) 3285 3286 def test_utils_quote_unquote(self): 3287 eq = self.assertEqual 3288 msg = Message() 3289 msg.add_header('content-disposition', 'attachment', 3290 filename='foo\\wacky"name') 3291 eq(msg.get_filename(), 'foo\\wacky"name') 3292 3293 def test_get_body_encoding_with_bogus_charset(self): 3294 charset = Charset('not a charset') 3295 self.assertEqual(charset.get_body_encoding(), 'base64') 3296 3297 def test_get_body_encoding_with_uppercase_charset(self): 3298 eq = self.assertEqual 3299 msg = Message() 3300 msg['Content-Type'] = 'text/plain; charset=UTF-8' 3301 eq(msg['content-type'], 'text/plain; charset=UTF-8') 3302 charsets = msg.get_charsets() 3303 eq(len(charsets), 1) 3304 eq(charsets[0], 'utf-8') 3305 charset = Charset(charsets[0]) 3306 eq(charset.get_body_encoding(), 'base64') 3307 msg.set_payload(b'hello world', charset=charset) 3308 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') 3309 eq(msg.get_payload(decode=True), b'hello world') 3310 eq(msg['content-transfer-encoding'], 'base64') 3311 # Try another one 3312 msg = Message() 3313 msg['Content-Type'] = 'text/plain; charset="US-ASCII"' 3314 charsets = msg.get_charsets() 3315 eq(len(charsets), 1) 3316 eq(charsets[0], 'us-ascii') 3317 charset = Charset(charsets[0]) 3318 eq(charset.get_body_encoding(), encoders.encode_7or8bit) 3319 msg.set_payload('hello world', charset=charset) 3320 eq(msg.get_payload(), 'hello world') 3321 eq(msg['content-transfer-encoding'], '7bit') 3322 3323 def test_charsets_case_insensitive(self): 3324 lc = Charset('us-ascii') 3325 uc = Charset('US-ASCII') 3326 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) 3327 3328 def test_partial_falls_inside_message_delivery_status(self): 3329 eq = self.ndiffAssertEqual 3330 # The Parser interface provides chunks of data to FeedParser in 8192 3331 # byte gulps. SF bug #1076485 found one of those chunks inside 3332 # message/delivery-status header block, which triggered an 3333 # unreadline() of NeedMoreData. 3334 msg = self._msgobj('msg_43.txt') 3335 sfp = StringIO() 3336 iterators._structure(msg, sfp) 3337 eq(sfp.getvalue(), """\ 3338multipart/report 3339 text/plain 3340 message/delivery-status 3341 text/plain 3342 text/plain 3343 text/plain 3344 text/plain 3345 text/plain 3346 text/plain 3347 text/plain 3348 text/plain 3349 text/plain 3350 text/plain 3351 text/plain 3352 text/plain 3353 text/plain 3354 text/plain 3355 text/plain 3356 text/plain 3357 text/plain 3358 text/plain 3359 text/plain 3360 text/plain 3361 text/plain 3362 text/plain 3363 text/plain 3364 text/plain 3365 text/plain 3366 text/plain 3367 text/rfc822-headers 3368""") 3369 3370 def test_make_msgid_domain(self): 3371 self.assertEqual( 3372 email.utils.make_msgid(domain='testdomain-string')[-19:], 3373 '@testdomain-string>') 3374 3375 def test_make_msgid_idstring(self): 3376 self.assertEqual( 3377 email.utils.make_msgid(idstring='test-idstring', 3378 domain='testdomain-string')[-33:], 3379 '.test-idstring@testdomain-string>') 3380 3381 def test_make_msgid_default_domain(self): 3382 with patch('socket.getfqdn') as mock_getfqdn: 3383 mock_getfqdn.return_value = domain = 'pythontest.example.com' 3384 self.assertTrue( 3385 email.utils.make_msgid().endswith( 3386 '@' + domain + '>')) 3387 3388 def test_Generator_linend(self): 3389 # Issue 14645. 3390 with openfile('msg_26.txt', newline='\n') as f: 3391 msgtxt = f.read() 3392 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3393 msg = email.message_from_string(msgtxt) 3394 s = StringIO() 3395 g = email.generator.Generator(s) 3396 g.flatten(msg) 3397 self.assertEqual(s.getvalue(), msgtxt_nl) 3398 3399 def test_BytesGenerator_linend(self): 3400 # Issue 14645. 3401 with openfile('msg_26.txt', newline='\n') as f: 3402 msgtxt = f.read() 3403 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3404 msg = email.message_from_string(msgtxt_nl) 3405 s = BytesIO() 3406 g = email.generator.BytesGenerator(s) 3407 g.flatten(msg, linesep='\r\n') 3408 self.assertEqual(s.getvalue().decode('ascii'), msgtxt) 3409 3410 def test_BytesGenerator_linend_with_non_ascii(self): 3411 # Issue 14645. 3412 with openfile('msg_26.txt', 'rb') as f: 3413 msgtxt = f.read() 3414 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6') 3415 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n') 3416 msg = email.message_from_bytes(msgtxt_nl) 3417 s = BytesIO() 3418 g = email.generator.BytesGenerator(s) 3419 g.flatten(msg, linesep='\r\n') 3420 self.assertEqual(s.getvalue(), msgtxt) 3421 3422 def test_mime_classes_policy_argument(self): 3423 with openfile('audiotest.au', 'rb') as fp: 3424 audiodata = fp.read() 3425 with openfile('PyBanner048.gif', 'rb') as fp: 3426 bindata = fp.read() 3427 classes = [ 3428 (MIMEApplication, ('',)), 3429 (MIMEAudio, (audiodata,)), 3430 (MIMEImage, (bindata,)), 3431 (MIMEMessage, (Message(),)), 3432 (MIMENonMultipart, ('multipart', 'mixed')), 3433 (MIMEText, ('',)), 3434 ] 3435 for cls, constructor in classes: 3436 with self.subTest(cls=cls.__name__, policy='compat32'): 3437 m = cls(*constructor) 3438 self.assertIs(m.policy, email.policy.compat32) 3439 with self.subTest(cls=cls.__name__, policy='default'): 3440 m = cls(*constructor, policy=email.policy.default) 3441 self.assertIs(m.policy, email.policy.default) 3442 3443 3444# Test the iterator/generators 3445class TestIterators(TestEmailBase): 3446 def test_body_line_iterator(self): 3447 eq = self.assertEqual 3448 neq = self.ndiffAssertEqual 3449 # First a simple non-multipart message 3450 msg = self._msgobj('msg_01.txt') 3451 it = iterators.body_line_iterator(msg) 3452 lines = list(it) 3453 eq(len(lines), 6) 3454 neq(EMPTYSTRING.join(lines), msg.get_payload()) 3455 # Now a more complicated multipart 3456 msg = self._msgobj('msg_02.txt') 3457 it = iterators.body_line_iterator(msg) 3458 lines = list(it) 3459 eq(len(lines), 43) 3460 with openfile('msg_19.txt') as fp: 3461 neq(EMPTYSTRING.join(lines), fp.read()) 3462 3463 def test_typed_subpart_iterator(self): 3464 eq = self.assertEqual 3465 msg = self._msgobj('msg_04.txt') 3466 it = iterators.typed_subpart_iterator(msg, 'text') 3467 lines = [] 3468 subparts = 0 3469 for subpart in it: 3470 subparts += 1 3471 lines.append(subpart.get_payload()) 3472 eq(subparts, 2) 3473 eq(EMPTYSTRING.join(lines), """\ 3474a simple kind of mirror 3475to reflect upon our own 3476a simple kind of mirror 3477to reflect upon our own 3478""") 3479 3480 def test_typed_subpart_iterator_default_type(self): 3481 eq = self.assertEqual 3482 msg = self._msgobj('msg_03.txt') 3483 it = iterators.typed_subpart_iterator(msg, 'text', 'plain') 3484 lines = [] 3485 subparts = 0 3486 for subpart in it: 3487 subparts += 1 3488 lines.append(subpart.get_payload()) 3489 eq(subparts, 1) 3490 eq(EMPTYSTRING.join(lines), """\ 3491 3492Hi, 3493 3494Do you like this message? 3495 3496-Me 3497""") 3498 3499 def test_pushCR_LF(self): 3500 '''FeedParser BufferedSubFile.push() assumed it received complete 3501 line endings. A CR ending one push() followed by a LF starting 3502 the next push() added an empty line. 3503 ''' 3504 imt = [ 3505 ("a\r \n", 2), 3506 ("b", 0), 3507 ("c\n", 1), 3508 ("", 0), 3509 ("d\r\n", 1), 3510 ("e\r", 0), 3511 ("\nf", 1), 3512 ("\r\n", 1), 3513 ] 3514 from email.feedparser import BufferedSubFile, NeedMoreData 3515 bsf = BufferedSubFile() 3516 om = [] 3517 nt = 0 3518 for il, n in imt: 3519 bsf.push(il) 3520 nt += n 3521 n1 = 0 3522 for ol in iter(bsf.readline, NeedMoreData): 3523 om.append(ol) 3524 n1 += 1 3525 self.assertEqual(n, n1) 3526 self.assertEqual(len(om), nt) 3527 self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) 3528 3529 def test_push_random(self): 3530 from email.feedparser import BufferedSubFile, NeedMoreData 3531 3532 n = 10000 3533 chunksize = 5 3534 chars = 'abcd \t\r\n' 3535 3536 s = ''.join(choice(chars) for i in range(n)) + '\n' 3537 target = s.splitlines(True) 3538 3539 bsf = BufferedSubFile() 3540 lines = [] 3541 for i in range(0, len(s), chunksize): 3542 chunk = s[i:i+chunksize] 3543 bsf.push(chunk) 3544 lines.extend(iter(bsf.readline, NeedMoreData)) 3545 self.assertEqual(lines, target) 3546 3547 3548class TestFeedParsers(TestEmailBase): 3549 3550 def parse(self, chunks): 3551 feedparser = FeedParser() 3552 for chunk in chunks: 3553 feedparser.feed(chunk) 3554 return feedparser.close() 3555 3556 def test_empty_header_name_handled(self): 3557 # Issue 19996 3558 msg = self.parse("First: val\n: bad\nSecond: val") 3559 self.assertEqual(msg['First'], 'val') 3560 self.assertEqual(msg['Second'], 'val') 3561 3562 def test_newlines(self): 3563 m = self.parse(['a:\nb:\rc:\r\nd:\n']) 3564 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3565 m = self.parse(['a:\nb:\rc:\r\nd:']) 3566 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3567 m = self.parse(['a:\rb', 'c:\n']) 3568 self.assertEqual(m.keys(), ['a', 'bc']) 3569 m = self.parse(['a:\r', 'b:\n']) 3570 self.assertEqual(m.keys(), ['a', 'b']) 3571 m = self.parse(['a:\r', '\nb:\n']) 3572 self.assertEqual(m.keys(), ['a', 'b']) 3573 3574 # Only CR and LF should break header fields 3575 m = self.parse(['a:\x85b:\u2028c:\n']) 3576 self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')]) 3577 m = self.parse(['a:\r', 'b:\x85', 'c:\n']) 3578 self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')]) 3579 3580 def test_long_lines(self): 3581 # Expected peak memory use on 32-bit platform: 6*N*M bytes. 3582 M, N = 1000, 20000 3583 m = self.parse(['a:b\n\n'] + ['x'*M] * N) 3584 self.assertEqual(m.items(), [('a', 'b')]) 3585 self.assertEqual(m.get_payload(), 'x'*M*N) 3586 m = self.parse(['a:b\r\r'] + ['x'*M] * N) 3587 self.assertEqual(m.items(), [('a', 'b')]) 3588 self.assertEqual(m.get_payload(), 'x'*M*N) 3589 m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N) 3590 self.assertEqual(m.items(), [('a', 'b')]) 3591 self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N) 3592 m = self.parse(['a:\r', 'b: '] + ['x'*M] * N) 3593 self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)]) 3594 3595 3596class TestParsers(TestEmailBase): 3597 3598 def test_header_parser(self): 3599 eq = self.assertEqual 3600 # Parse only the headers of a complex multipart MIME document 3601 with openfile('msg_02.txt') as fp: 3602 msg = HeaderParser().parse(fp) 3603 eq(msg['from'], 'ppp-request@zzz.org') 3604 eq(msg['to'], 'ppp@zzz.org') 3605 eq(msg.get_content_type(), 'multipart/mixed') 3606 self.assertFalse(msg.is_multipart()) 3607 self.assertIsInstance(msg.get_payload(), str) 3608 3609 def test_bytes_header_parser(self): 3610 eq = self.assertEqual 3611 # Parse only the headers of a complex multipart MIME document 3612 with openfile('msg_02.txt', 'rb') as fp: 3613 msg = email.parser.BytesHeaderParser().parse(fp) 3614 eq(msg['from'], 'ppp-request@zzz.org') 3615 eq(msg['to'], 'ppp@zzz.org') 3616 eq(msg.get_content_type(), 'multipart/mixed') 3617 self.assertFalse(msg.is_multipart()) 3618 self.assertIsInstance(msg.get_payload(), str) 3619 self.assertIsInstance(msg.get_payload(decode=True), bytes) 3620 3621 def test_bytes_parser_does_not_close_file(self): 3622 with openfile('msg_02.txt', 'rb') as fp: 3623 email.parser.BytesParser().parse(fp) 3624 self.assertFalse(fp.closed) 3625 3626 def test_bytes_parser_on_exception_does_not_close_file(self): 3627 with openfile('msg_15.txt', 'rb') as fp: 3628 bytesParser = email.parser.BytesParser 3629 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3630 bytesParser(policy=email.policy.strict).parse, 3631 fp) 3632 self.assertFalse(fp.closed) 3633 3634 def test_parser_does_not_close_file(self): 3635 with openfile('msg_02.txt', 'r') as fp: 3636 email.parser.Parser().parse(fp) 3637 self.assertFalse(fp.closed) 3638 3639 def test_parser_on_exception_does_not_close_file(self): 3640 with openfile('msg_15.txt', 'r') as fp: 3641 parser = email.parser.Parser 3642 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3643 parser(policy=email.policy.strict).parse, fp) 3644 self.assertFalse(fp.closed) 3645 3646 def test_whitespace_continuation(self): 3647 eq = self.assertEqual 3648 # This message contains a line after the Subject: header that has only 3649 # whitespace, but it is not empty! 3650 msg = email.message_from_string("""\ 3651From: aperson@dom.ain 3652To: bperson@dom.ain 3653Subject: the next line has a space on it 3654\x20 3655Date: Mon, 8 Apr 2002 15:09:19 -0400 3656Message-ID: spam 3657 3658Here's the message body 3659""") 3660 eq(msg['subject'], 'the next line has a space on it\n ') 3661 eq(msg['message-id'], 'spam') 3662 eq(msg.get_payload(), "Here's the message body\n") 3663 3664 def test_whitespace_continuation_last_header(self): 3665 eq = self.assertEqual 3666 # Like the previous test, but the subject line is the last 3667 # header. 3668 msg = email.message_from_string("""\ 3669From: aperson@dom.ain 3670To: bperson@dom.ain 3671Date: Mon, 8 Apr 2002 15:09:19 -0400 3672Message-ID: spam 3673Subject: the next line has a space on it 3674\x20 3675 3676Here's the message body 3677""") 3678 eq(msg['subject'], 'the next line has a space on it\n ') 3679 eq(msg['message-id'], 'spam') 3680 eq(msg.get_payload(), "Here's the message body\n") 3681 3682 def test_crlf_separation(self): 3683 eq = self.assertEqual 3684 with openfile('msg_26.txt', newline='\n') as fp: 3685 msg = Parser().parse(fp) 3686 eq(len(msg.get_payload()), 2) 3687 part1 = msg.get_payload(0) 3688 eq(part1.get_content_type(), 'text/plain') 3689 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') 3690 part2 = msg.get_payload(1) 3691 eq(part2.get_content_type(), 'application/riscos') 3692 3693 def test_crlf_flatten(self): 3694 # Using newline='\n' preserves the crlfs in this input file. 3695 with openfile('msg_26.txt', newline='\n') as fp: 3696 text = fp.read() 3697 msg = email.message_from_string(text) 3698 s = StringIO() 3699 g = Generator(s) 3700 g.flatten(msg, linesep='\r\n') 3701 self.assertEqual(s.getvalue(), text) 3702 3703 maxDiff = None 3704 3705 def test_multipart_digest_with_extra_mime_headers(self): 3706 eq = self.assertEqual 3707 neq = self.ndiffAssertEqual 3708 with openfile('msg_28.txt') as fp: 3709 msg = email.message_from_file(fp) 3710 # Structure is: 3711 # multipart/digest 3712 # message/rfc822 3713 # text/plain 3714 # message/rfc822 3715 # text/plain 3716 eq(msg.is_multipart(), 1) 3717 eq(len(msg.get_payload()), 2) 3718 part1 = msg.get_payload(0) 3719 eq(part1.get_content_type(), 'message/rfc822') 3720 eq(part1.is_multipart(), 1) 3721 eq(len(part1.get_payload()), 1) 3722 part1a = part1.get_payload(0) 3723 eq(part1a.is_multipart(), 0) 3724 eq(part1a.get_content_type(), 'text/plain') 3725 neq(part1a.get_payload(), 'message 1\n') 3726 # next message/rfc822 3727 part2 = msg.get_payload(1) 3728 eq(part2.get_content_type(), 'message/rfc822') 3729 eq(part2.is_multipart(), 1) 3730 eq(len(part2.get_payload()), 1) 3731 part2a = part2.get_payload(0) 3732 eq(part2a.is_multipart(), 0) 3733 eq(part2a.get_content_type(), 'text/plain') 3734 neq(part2a.get_payload(), 'message 2\n') 3735 3736 def test_three_lines(self): 3737 # A bug report by Andrew McNamara 3738 lines = ['From: Andrew Person <aperson@dom.ain', 3739 'Subject: Test', 3740 'Date: Tue, 20 Aug 2002 16:43:45 +1000'] 3741 msg = email.message_from_string(NL.join(lines)) 3742 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000') 3743 3744 def test_strip_line_feed_and_carriage_return_in_headers(self): 3745 eq = self.assertEqual 3746 # For [ 1002475 ] email message parser doesn't handle \r\n correctly 3747 value1 = 'text' 3748 value2 = 'more text' 3749 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % ( 3750 value1, value2) 3751 msg = email.message_from_string(m) 3752 eq(msg.get('Header'), value1) 3753 eq(msg.get('Next-Header'), value2) 3754 3755 def test_rfc2822_header_syntax(self): 3756 eq = self.assertEqual 3757 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3758 msg = email.message_from_string(m) 3759 eq(len(msg), 3) 3760 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From']) 3761 eq(msg.get_payload(), 'body') 3762 3763 def test_rfc2822_space_not_allowed_in_header(self): 3764 eq = self.assertEqual 3765 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3766 msg = email.message_from_string(m) 3767 eq(len(msg.keys()), 0) 3768 3769 def test_rfc2822_one_character_header(self): 3770 eq = self.assertEqual 3771 m = 'A: first header\nB: second header\nCC: third header\n\nbody' 3772 msg = email.message_from_string(m) 3773 headers = msg.keys() 3774 headers.sort() 3775 eq(headers, ['A', 'B', 'CC']) 3776 eq(msg.get_payload(), 'body') 3777 3778 def test_CRLFLF_at_end_of_part(self): 3779 # issue 5610: feedparser should not eat two chars from body part ending 3780 # with "\r\n\n". 3781 m = ( 3782 "From: foo@bar.com\n" 3783 "To: baz\n" 3784 "Mime-Version: 1.0\n" 3785 "Content-Type: multipart/mixed; boundary=BOUNDARY\n" 3786 "\n" 3787 "--BOUNDARY\n" 3788 "Content-Type: text/plain\n" 3789 "\n" 3790 "body ending with CRLF newline\r\n" 3791 "\n" 3792 "--BOUNDARY--\n" 3793 ) 3794 msg = email.message_from_string(m) 3795 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) 3796 3797 3798class Test8BitBytesHandling(TestEmailBase): 3799 # In Python3 all input is string, but that doesn't work if the actual input 3800 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we 3801 # decode byte streams using the surrogateescape error handler, and 3802 # reconvert to binary at appropriate places if we detect surrogates. This 3803 # doesn't allow us to transform headers with 8bit bytes (they get munged), 3804 # but it does allow us to parse and preserve them, and to decode body 3805 # parts that use an 8bit CTE. 3806 3807 bodytest_msg = textwrap.dedent("""\ 3808 From: foo@bar.com 3809 To: baz 3810 Mime-Version: 1.0 3811 Content-Type: text/plain; charset={charset} 3812 Content-Transfer-Encoding: {cte} 3813 3814 {bodyline} 3815 """) 3816 3817 def test_known_8bit_CTE(self): 3818 m = self.bodytest_msg.format(charset='utf-8', 3819 cte='8bit', 3820 bodyline='pöstal').encode('utf-8') 3821 msg = email.message_from_bytes(m) 3822 self.assertEqual(msg.get_payload(), "pöstal\n") 3823 self.assertEqual(msg.get_payload(decode=True), 3824 "pöstal\n".encode('utf-8')) 3825 3826 def test_unknown_8bit_CTE(self): 3827 m = self.bodytest_msg.format(charset='notavalidcharset', 3828 cte='8bit', 3829 bodyline='pöstal').encode('utf-8') 3830 msg = email.message_from_bytes(m) 3831 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") 3832 self.assertEqual(msg.get_payload(decode=True), 3833 "pöstal\n".encode('utf-8')) 3834 3835 def test_8bit_in_quopri_body(self): 3836 # This is non-RFC compliant data...without 'decode' the library code 3837 # decodes the body using the charset from the headers, and because the 3838 # source byte really is utf-8 this works. This is likely to fail 3839 # against real dirty data (ie: produce mojibake), but the data is 3840 # invalid anyway so it is as good a guess as any. But this means that 3841 # this test just confirms the current behavior; that behavior is not 3842 # necessarily the best possible behavior. With 'decode' it is 3843 # returning the raw bytes, so that test should be of correct behavior, 3844 # or at least produce the same result that email4 did. 3845 m = self.bodytest_msg.format(charset='utf-8', 3846 cte='quoted-printable', 3847 bodyline='p=C3=B6stál').encode('utf-8') 3848 msg = email.message_from_bytes(m) 3849 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n') 3850 self.assertEqual(msg.get_payload(decode=True), 3851 'pöstál\n'.encode('utf-8')) 3852 3853 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self): 3854 # This is similar to the previous test, but proves that if the 8bit 3855 # byte is undecodeable in the specified charset, it gets replaced 3856 # by the unicode 'unknown' character. Again, this may or may not 3857 # be the ideal behavior. Note that if decode=False none of the 3858 # decoders will get involved, so this is the only test we need 3859 # for this behavior. 3860 m = self.bodytest_msg.format(charset='ascii', 3861 cte='quoted-printable', 3862 bodyline='p=C3=B6stál').encode('utf-8') 3863 msg = email.message_from_bytes(m) 3864 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') 3865 self.assertEqual(msg.get_payload(decode=True), 3866 'pöstál\n'.encode('utf-8')) 3867 3868 # test_defect_handling:test_invalid_chars_in_base64_payload 3869 def test_8bit_in_base64_body(self): 3870 # If we get 8bit bytes in a base64 body, we can just ignore them 3871 # as being outside the base64 alphabet and decode anyway. But 3872 # we register a defect. 3873 m = self.bodytest_msg.format(charset='utf-8', 3874 cte='base64', 3875 bodyline='cMO2c3RhbAá=').encode('utf-8') 3876 msg = email.message_from_bytes(m) 3877 self.assertEqual(msg.get_payload(decode=True), 3878 'pöstal'.encode('utf-8')) 3879 self.assertIsInstance(msg.defects[0], 3880 errors.InvalidBase64CharactersDefect) 3881 3882 def test_8bit_in_uuencode_body(self): 3883 # Sticking an 8bit byte in a uuencode block makes it undecodable by 3884 # normal means, so the block is returned undecoded, but as bytes. 3885 m = self.bodytest_msg.format(charset='utf-8', 3886 cte='uuencode', 3887 bodyline='<,.V<W1A; á ').encode('utf-8') 3888 msg = email.message_from_bytes(m) 3889 self.assertEqual(msg.get_payload(decode=True), 3890 '<,.V<W1A; á \n'.encode('utf-8')) 3891 3892 3893 headertest_headers = ( 3894 ('From: foo@bar.com', ('From', 'foo@bar.com')), 3895 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')), 3896 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n' 3897 '\tJean de Baddie', 3898 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 3899 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n' 3900 ' =?unknown-8bit?q?_Jean_de_Baddie?=')), 3901 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')), 3902 ) 3903 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) + 3904 '\nYes, they are flying.\n').encode('utf-8') 3905 3906 def test_get_8bit_header(self): 3907 msg = email.message_from_bytes(self.headertest_msg) 3908 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz') 3909 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz') 3910 3911 def test_print_8bit_headers(self): 3912 msg = email.message_from_bytes(self.headertest_msg) 3913 self.assertEqual(str(msg), 3914 textwrap.dedent("""\ 3915 From: {} 3916 To: {} 3917 Subject: {} 3918 From: {} 3919 3920 Yes, they are flying. 3921 """).format(*[expected[1] for (_, expected) in 3922 self.headertest_headers])) 3923 3924 def test_values_with_8bit_headers(self): 3925 msg = email.message_from_bytes(self.headertest_msg) 3926 self.assertListEqual([str(x) for x in msg.values()], 3927 ['foo@bar.com', 3928 'b\uFFFD\uFFFDz', 3929 'Maintenant je vous pr\uFFFD\uFFFDsente mon ' 3930 'coll\uFFFD\uFFFDgue, le pouf ' 3931 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3932 '\tJean de Baddie', 3933 "g\uFFFD\uFFFDst"]) 3934 3935 def test_items_with_8bit_headers(self): 3936 msg = email.message_from_bytes(self.headertest_msg) 3937 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()], 3938 [('From', 'foo@bar.com'), 3939 ('To', 'b\uFFFD\uFFFDz'), 3940 ('Subject', 'Maintenant je vous ' 3941 'pr\uFFFD\uFFFDsente ' 3942 'mon coll\uFFFD\uFFFDgue, le pouf ' 3943 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3944 '\tJean de Baddie'), 3945 ('From', 'g\uFFFD\uFFFDst')]) 3946 3947 def test_get_all_with_8bit_headers(self): 3948 msg = email.message_from_bytes(self.headertest_msg) 3949 self.assertListEqual([str(x) for x in msg.get_all('from')], 3950 ['foo@bar.com', 3951 'g\uFFFD\uFFFDst']) 3952 3953 def test_get_content_type_with_8bit(self): 3954 msg = email.message_from_bytes(textwrap.dedent("""\ 3955 Content-Type: text/pl\xA7in; charset=utf-8 3956 """).encode('latin-1')) 3957 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") 3958 self.assertEqual(msg.get_content_maintype(), "text") 3959 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") 3960 3961 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params 3962 def test_get_params_with_8bit(self): 3963 msg = email.message_from_bytes( 3964 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) 3965 self.assertEqual(msg.get_params(header='x-header'), 3966 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) 3967 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') 3968 # XXX: someday you might be able to get 'b\xa7r', for now you can't. 3969 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) 3970 3971 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value 3972 def test_get_rfc2231_params_with_8bit(self): 3973 msg = email.message_from_bytes(textwrap.dedent("""\ 3974 Content-Type: text/plain; charset=us-ascii; 3975 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3976 ).encode('latin-1')) 3977 self.assertEqual(msg.get_param('title'), 3978 ('us-ascii', 'en', 'This is not f\uFFFDn')) 3979 3980 def test_set_rfc2231_params_with_8bit(self): 3981 msg = email.message_from_bytes(textwrap.dedent("""\ 3982 Content-Type: text/plain; charset=us-ascii; 3983 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3984 ).encode('latin-1')) 3985 msg.set_param('title', 'test') 3986 self.assertEqual(msg.get_param('title'), 'test') 3987 3988 def test_del_rfc2231_params_with_8bit(self): 3989 msg = email.message_from_bytes(textwrap.dedent("""\ 3990 Content-Type: text/plain; charset=us-ascii; 3991 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3992 ).encode('latin-1')) 3993 msg.del_param('title') 3994 self.assertEqual(msg.get_param('title'), None) 3995 self.assertEqual(msg.get_content_maintype(), 'text') 3996 3997 def test_get_payload_with_8bit_cte_header(self): 3998 msg = email.message_from_bytes(textwrap.dedent("""\ 3999 Content-Transfer-Encoding: b\xa7se64 4000 Content-Type: text/plain; charset=latin-1 4001 4002 payload 4003 """).encode('latin-1')) 4004 self.assertEqual(msg.get_payload(), 'payload\n') 4005 self.assertEqual(msg.get_payload(decode=True), b'payload\n') 4006 4007 non_latin_bin_msg = textwrap.dedent("""\ 4008 From: foo@bar.com 4009 To: báz 4010 Subject: Maintenant je vous présente mon collègue, le pouf célèbre 4011 \tJean de Baddie 4012 Mime-Version: 1.0 4013 Content-Type: text/plain; charset="utf-8" 4014 Content-Transfer-Encoding: 8bit 4015 4016 Да, они летят. 4017 """).encode('utf-8') 4018 4019 def test_bytes_generator(self): 4020 msg = email.message_from_bytes(self.non_latin_bin_msg) 4021 out = BytesIO() 4022 email.generator.BytesGenerator(out).flatten(msg) 4023 self.assertEqual(out.getvalue(), self.non_latin_bin_msg) 4024 4025 def test_bytes_generator_handles_None_body(self): 4026 #Issue 11019 4027 msg = email.message.Message() 4028 out = BytesIO() 4029 email.generator.BytesGenerator(out).flatten(msg) 4030 self.assertEqual(out.getvalue(), b"\n") 4031 4032 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ 4033 From: foo@bar.com 4034 To: =?unknown-8bit?q?b=C3=A1z?= 4035 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= 4036 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?= 4037 =?unknown-8bit?q?_Jean_de_Baddie?= 4038 Mime-Version: 1.0 4039 Content-Type: text/plain; charset="utf-8" 4040 Content-Transfer-Encoding: base64 4041 4042 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== 4043 """) 4044 4045 def test_generator_handles_8bit(self): 4046 msg = email.message_from_bytes(self.non_latin_bin_msg) 4047 out = StringIO() 4048 email.generator.Generator(out).flatten(msg) 4049 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) 4050 4051 def test_str_generator_should_not_mutate_msg_when_handling_8bit(self): 4052 msg = email.message_from_bytes(self.non_latin_bin_msg) 4053 out = BytesIO() 4054 BytesGenerator(out).flatten(msg) 4055 orig_value = out.getvalue() 4056 Generator(StringIO()).flatten(msg) # Should not mutate msg! 4057 out = BytesIO() 4058 BytesGenerator(out).flatten(msg) 4059 self.assertEqual(out.getvalue(), orig_value) 4060 4061 def test_bytes_generator_with_unix_from(self): 4062 # The unixfrom contains a current date, so we can't check it 4063 # literally. Just make sure the first word is 'From' and the 4064 # rest of the message matches the input. 4065 msg = email.message_from_bytes(self.non_latin_bin_msg) 4066 out = BytesIO() 4067 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True) 4068 lines = out.getvalue().split(b'\n') 4069 self.assertEqual(lines[0].split()[0], b'From') 4070 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg) 4071 4072 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n') 4073 non_latin_bin_msg_as7bit[2:4] = [ 4074 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 4075 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?='] 4076 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit) 4077 4078 def test_message_from_binary_file(self): 4079 fn = 'test.msg' 4080 self.addCleanup(unlink, fn) 4081 with open(fn, 'wb') as testfile: 4082 testfile.write(self.non_latin_bin_msg) 4083 with open(fn, 'rb') as testfile: 4084 m = email.parser.BytesParser().parse(testfile) 4085 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) 4086 4087 latin_bin_msg = textwrap.dedent("""\ 4088 From: foo@bar.com 4089 To: Dinsdale 4090 Subject: Nudge nudge, wink, wink 4091 Mime-Version: 1.0 4092 Content-Type: text/plain; charset="latin-1" 4093 Content-Transfer-Encoding: 8bit 4094 4095 oh là là, know what I mean, know what I mean? 4096 """).encode('latin-1') 4097 4098 latin_bin_msg_as7bit = textwrap.dedent("""\ 4099 From: foo@bar.com 4100 To: Dinsdale 4101 Subject: Nudge nudge, wink, wink 4102 Mime-Version: 1.0 4103 Content-Type: text/plain; charset="iso-8859-1" 4104 Content-Transfer-Encoding: quoted-printable 4105 4106 oh l=E0 l=E0, know what I mean, know what I mean? 4107 """) 4108 4109 def test_string_generator_reencodes_to_quopri_when_appropriate(self): 4110 m = email.message_from_bytes(self.latin_bin_msg) 4111 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4112 4113 def test_decoded_generator_emits_unicode_body(self): 4114 m = email.message_from_bytes(self.latin_bin_msg) 4115 out = StringIO() 4116 email.generator.DecodedGenerator(out).flatten(m) 4117 #DecodedHeader output contains an extra blank line compared 4118 #to the input message. RDM: not sure if this is a bug or not, 4119 #but it is not specific to the 8bit->7bit conversion. 4120 self.assertEqual(out.getvalue(), 4121 self.latin_bin_msg.decode('latin-1')+'\n') 4122 4123 def test_bytes_feedparser(self): 4124 bfp = email.feedparser.BytesFeedParser() 4125 for i in range(0, len(self.latin_bin_msg), 10): 4126 bfp.feed(self.latin_bin_msg[i:i+10]) 4127 m = bfp.close() 4128 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4129 4130 def test_crlf_flatten(self): 4131 with openfile('msg_26.txt', 'rb') as fp: 4132 text = fp.read() 4133 msg = email.message_from_bytes(text) 4134 s = BytesIO() 4135 g = email.generator.BytesGenerator(s) 4136 g.flatten(msg, linesep='\r\n') 4137 self.assertEqual(s.getvalue(), text) 4138 4139 def test_8bit_multipart(self): 4140 # Issue 11605 4141 source = textwrap.dedent("""\ 4142 Date: Fri, 18 Mar 2011 17:15:43 +0100 4143 To: foo@example.com 4144 From: foodwatch-Newsletter <bar@example.com> 4145 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System 4146 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain> 4147 MIME-Version: 1.0 4148 Content-Type: multipart/alternative; 4149 boundary="b1_76a486bee62b0d200f33dc2ca08220ad" 4150 4151 --b1_76a486bee62b0d200f33dc2ca08220ad 4152 Content-Type: text/plain; charset="utf-8" 4153 Content-Transfer-Encoding: 8bit 4154 4155 Guten Tag, , 4156 4157 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die 4158 Nachrichten aus Japan. 4159 4160 4161 --b1_76a486bee62b0d200f33dc2ca08220ad 4162 Content-Type: text/html; charset="utf-8" 4163 Content-Transfer-Encoding: 8bit 4164 4165 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 4166 "http://www.w3.org/TR/html4/loose.dtd"> 4167 <html lang="de"> 4168 <head> 4169 <title>foodwatch - Newsletter</title> 4170 </head> 4171 <body> 4172 <p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team 4173 die Nachrichten aus Japan.</p> 4174 </body> 4175 </html> 4176 --b1_76a486bee62b0d200f33dc2ca08220ad-- 4177 4178 """).encode('utf-8') 4179 msg = email.message_from_bytes(source) 4180 s = BytesIO() 4181 g = email.generator.BytesGenerator(s) 4182 g.flatten(msg) 4183 self.assertEqual(s.getvalue(), source) 4184 4185 def test_bytes_generator_b_encoding_linesep(self): 4186 # Issue 14062: b encoding was tacking on an extra \n. 4187 m = Message() 4188 # This has enough non-ascii that it should always end up b encoded. 4189 m['Subject'] = Header('žluťoučký kůň') 4190 s = BytesIO() 4191 g = email.generator.BytesGenerator(s) 4192 g.flatten(m, linesep='\r\n') 4193 self.assertEqual( 4194 s.getvalue(), 4195 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4196 4197 def test_generator_b_encoding_linesep(self): 4198 # Since this broke in ByteGenerator, test Generator for completeness. 4199 m = Message() 4200 # This has enough non-ascii that it should always end up b encoded. 4201 m['Subject'] = Header('žluťoučký kůň') 4202 s = StringIO() 4203 g = email.generator.Generator(s) 4204 g.flatten(m, linesep='\r\n') 4205 self.assertEqual( 4206 s.getvalue(), 4207 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4208 4209 maxDiff = None 4210 4211 4212class BaseTestBytesGeneratorIdempotent: 4213 4214 maxDiff = None 4215 4216 def _msgobj(self, filename): 4217 with openfile(filename, 'rb') as fp: 4218 data = fp.read() 4219 data = self.normalize_linesep_regex.sub(self.blinesep, data) 4220 msg = email.message_from_bytes(data) 4221 return msg, data 4222 4223 def _idempotent(self, msg, data, unixfrom=False): 4224 b = BytesIO() 4225 g = email.generator.BytesGenerator(b, maxheaderlen=0) 4226 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) 4227 self.assertEqual(data, b.getvalue()) 4228 4229 4230class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent, 4231 TestIdempotent): 4232 linesep = '\n' 4233 blinesep = b'\n' 4234 normalize_linesep_regex = re.compile(br'\r\n') 4235 4236 4237class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent, 4238 TestIdempotent): 4239 linesep = '\r\n' 4240 blinesep = b'\r\n' 4241 normalize_linesep_regex = re.compile(br'(?<!\r)\n') 4242 4243 4244class TestBase64(unittest.TestCase): 4245 def test_len(self): 4246 eq = self.assertEqual 4247 eq(base64mime.header_length('hello'), 4248 len(base64mime.body_encode(b'hello', eol=''))) 4249 for size in range(15): 4250 if size == 0 : bsize = 0 4251 elif size <= 3 : bsize = 4 4252 elif size <= 6 : bsize = 8 4253 elif size <= 9 : bsize = 12 4254 elif size <= 12: bsize = 16 4255 else : bsize = 20 4256 eq(base64mime.header_length('x' * size), bsize) 4257 4258 def test_decode(self): 4259 eq = self.assertEqual 4260 eq(base64mime.decode(''), b'') 4261 eq(base64mime.decode('aGVsbG8='), b'hello') 4262 4263 def test_encode(self): 4264 eq = self.assertEqual 4265 eq(base64mime.body_encode(b''), b'') 4266 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n') 4267 # Test the binary flag 4268 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n') 4269 # Test the maxlinelen arg 4270 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\ 4271eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4272eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4273eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4274eHh4eCB4eHh4IA== 4275""") 4276 # Test the eol argument 4277 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4278 """\ 4279eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4280eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4281eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4282eHh4eCB4eHh4IA==\r 4283""") 4284 4285 def test_header_encode(self): 4286 eq = self.assertEqual 4287 he = base64mime.header_encode 4288 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') 4289 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') 4290 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4291 # Test the charset option 4292 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') 4293 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4294 4295 4296 4297class TestQuopri(unittest.TestCase): 4298 def setUp(self): 4299 # Set of characters (as byte integers) that don't need to be encoded 4300 # in headers. 4301 self.hlit = list(chain( 4302 range(ord('a'), ord('z') + 1), 4303 range(ord('A'), ord('Z') + 1), 4304 range(ord('0'), ord('9') + 1), 4305 (c for c in b'!*+-/'))) 4306 # Set of characters (as byte integers) that do need to be encoded in 4307 # headers. 4308 self.hnon = [c for c in range(256) if c not in self.hlit] 4309 assert len(self.hlit) + len(self.hnon) == 256 4310 # Set of characters (as byte integers) that don't need to be encoded 4311 # in bodies. 4312 self.blit = list(range(ord(' '), ord('~') + 1)) 4313 self.blit.append(ord('\t')) 4314 self.blit.remove(ord('=')) 4315 # Set of characters (as byte integers) that do need to be encoded in 4316 # bodies. 4317 self.bnon = [c for c in range(256) if c not in self.blit] 4318 assert len(self.blit) + len(self.bnon) == 256 4319 4320 def test_quopri_header_check(self): 4321 for c in self.hlit: 4322 self.assertFalse(quoprimime.header_check(c), 4323 'Should not be header quopri encoded: %s' % chr(c)) 4324 for c in self.hnon: 4325 self.assertTrue(quoprimime.header_check(c), 4326 'Should be header quopri encoded: %s' % chr(c)) 4327 4328 def test_quopri_body_check(self): 4329 for c in self.blit: 4330 self.assertFalse(quoprimime.body_check(c), 4331 'Should not be body quopri encoded: %s' % chr(c)) 4332 for c in self.bnon: 4333 self.assertTrue(quoprimime.body_check(c), 4334 'Should be body quopri encoded: %s' % chr(c)) 4335 4336 def test_header_quopri_len(self): 4337 eq = self.assertEqual 4338 eq(quoprimime.header_length(b'hello'), 5) 4339 # RFC 2047 chrome is not included in header_length(). 4340 eq(len(quoprimime.header_encode(b'hello', charset='xxx')), 4341 quoprimime.header_length(b'hello') + 4342 # =?xxx?q?...?= means 10 extra characters 4343 10) 4344 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20) 4345 # RFC 2047 chrome is not included in header_length(). 4346 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')), 4347 quoprimime.header_length(b'h@e@l@l@o@') + 4348 # =?xxx?q?...?= means 10 extra characters 4349 10) 4350 for c in self.hlit: 4351 eq(quoprimime.header_length(bytes([c])), 1, 4352 'expected length 1 for %r' % chr(c)) 4353 for c in self.hnon: 4354 # Space is special; it's encoded to _ 4355 if c == ord(' '): 4356 continue 4357 eq(quoprimime.header_length(bytes([c])), 3, 4358 'expected length 3 for %r' % chr(c)) 4359 eq(quoprimime.header_length(b' '), 1) 4360 4361 def test_body_quopri_len(self): 4362 eq = self.assertEqual 4363 for c in self.blit: 4364 eq(quoprimime.body_length(bytes([c])), 1) 4365 for c in self.bnon: 4366 eq(quoprimime.body_length(bytes([c])), 3) 4367 4368 def test_quote_unquote_idempotent(self): 4369 for x in range(256): 4370 c = chr(x) 4371 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) 4372 4373 def _test_header_encode(self, header, expected_encoded_header, charset=None): 4374 if charset is None: 4375 encoded_header = quoprimime.header_encode(header) 4376 else: 4377 encoded_header = quoprimime.header_encode(header, charset) 4378 self.assertEqual(encoded_header, expected_encoded_header) 4379 4380 def test_header_encode_null(self): 4381 self._test_header_encode(b'', '') 4382 4383 def test_header_encode_one_word(self): 4384 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=') 4385 4386 def test_header_encode_two_lines(self): 4387 self._test_header_encode(b'hello\nworld', 4388 '=?iso-8859-1?q?hello=0Aworld?=') 4389 4390 def test_header_encode_non_ascii(self): 4391 self._test_header_encode(b'hello\xc7there', 4392 '=?iso-8859-1?q?hello=C7there?=') 4393 4394 def test_header_encode_alt_charset(self): 4395 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=', 4396 charset='iso-8859-2') 4397 4398 def _test_header_decode(self, encoded_header, expected_decoded_header): 4399 decoded_header = quoprimime.header_decode(encoded_header) 4400 self.assertEqual(decoded_header, expected_decoded_header) 4401 4402 def test_header_decode_null(self): 4403 self._test_header_decode('', '') 4404 4405 def test_header_decode_one_word(self): 4406 self._test_header_decode('hello', 'hello') 4407 4408 def test_header_decode_two_lines(self): 4409 self._test_header_decode('hello=0Aworld', 'hello\nworld') 4410 4411 def test_header_decode_non_ascii(self): 4412 self._test_header_decode('hello=C7there', 'hello\xc7there') 4413 4414 def test_header_decode_re_bug_18380(self): 4415 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position 4416 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257) 4417 4418 def _test_decode(self, encoded, expected_decoded, eol=None): 4419 if eol is None: 4420 decoded = quoprimime.decode(encoded) 4421 else: 4422 decoded = quoprimime.decode(encoded, eol=eol) 4423 self.assertEqual(decoded, expected_decoded) 4424 4425 def test_decode_null_word(self): 4426 self._test_decode('', '') 4427 4428 def test_decode_null_line_null_word(self): 4429 self._test_decode('\r\n', '\n') 4430 4431 def test_decode_one_word(self): 4432 self._test_decode('hello', 'hello') 4433 4434 def test_decode_one_word_eol(self): 4435 self._test_decode('hello', 'hello', eol='X') 4436 4437 def test_decode_one_line(self): 4438 self._test_decode('hello\r\n', 'hello\n') 4439 4440 def test_decode_one_line_lf(self): 4441 self._test_decode('hello\n', 'hello\n') 4442 4443 def test_decode_one_line_cr(self): 4444 self._test_decode('hello\r', 'hello\n') 4445 4446 def test_decode_one_line_nl(self): 4447 self._test_decode('hello\n', 'helloX', eol='X') 4448 4449 def test_decode_one_line_crnl(self): 4450 self._test_decode('hello\r\n', 'helloX', eol='X') 4451 4452 def test_decode_one_line_one_word(self): 4453 self._test_decode('hello\r\nworld', 'hello\nworld') 4454 4455 def test_decode_one_line_one_word_eol(self): 4456 self._test_decode('hello\r\nworld', 'helloXworld', eol='X') 4457 4458 def test_decode_two_lines(self): 4459 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n') 4460 4461 def test_decode_two_lines_eol(self): 4462 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X') 4463 4464 def test_decode_one_long_line(self): 4465 self._test_decode('Spam' * 250, 'Spam' * 250) 4466 4467 def test_decode_one_space(self): 4468 self._test_decode(' ', '') 4469 4470 def test_decode_multiple_spaces(self): 4471 self._test_decode(' ' * 5, '') 4472 4473 def test_decode_one_line_trailing_spaces(self): 4474 self._test_decode('hello \r\n', 'hello\n') 4475 4476 def test_decode_two_lines_trailing_spaces(self): 4477 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n') 4478 4479 def test_decode_quoted_word(self): 4480 self._test_decode('=22quoted=20words=22', '"quoted words"') 4481 4482 def test_decode_uppercase_quoting(self): 4483 self._test_decode('ab=CD=EF', 'ab\xcd\xef') 4484 4485 def test_decode_lowercase_quoting(self): 4486 self._test_decode('ab=cd=ef', 'ab\xcd\xef') 4487 4488 def test_decode_soft_line_break(self): 4489 self._test_decode('soft line=\r\nbreak', 'soft linebreak') 4490 4491 def test_decode_false_quoting(self): 4492 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2') 4493 4494 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None): 4495 kwargs = {} 4496 if maxlinelen is None: 4497 # Use body_encode's default. 4498 maxlinelen = 76 4499 else: 4500 kwargs['maxlinelen'] = maxlinelen 4501 if eol is None: 4502 # Use body_encode's default. 4503 eol = '\n' 4504 else: 4505 kwargs['eol'] = eol 4506 encoded_body = quoprimime.body_encode(body, **kwargs) 4507 self.assertEqual(encoded_body, expected_encoded_body) 4508 if eol == '\n' or eol == '\r\n': 4509 # We know how to split the result back into lines, so maxlinelen 4510 # can be checked. 4511 for line in encoded_body.splitlines(): 4512 self.assertLessEqual(len(line), maxlinelen) 4513 4514 def test_encode_null(self): 4515 self._test_encode('', '') 4516 4517 def test_encode_null_lines(self): 4518 self._test_encode('\n\n', '\n\n') 4519 4520 def test_encode_one_line(self): 4521 self._test_encode('hello\n', 'hello\n') 4522 4523 def test_encode_one_line_crlf(self): 4524 self._test_encode('hello\r\n', 'hello\n') 4525 4526 def test_encode_one_line_eol(self): 4527 self._test_encode('hello\n', 'hello\r\n', eol='\r\n') 4528 4529 def test_encode_one_line_eol_after_non_ascii(self): 4530 # issue 20206; see changeset 0cf700464177 for why the encode/decode. 4531 self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'), 4532 'hello=CF=85\r\n', eol='\r\n') 4533 4534 def test_encode_one_space(self): 4535 self._test_encode(' ', '=20') 4536 4537 def test_encode_one_line_one_space(self): 4538 self._test_encode(' \n', '=20\n') 4539 4540# XXX: body_encode() expect strings, but uses ord(char) from these strings 4541# to index into a 256-entry list. For code points above 255, this will fail. 4542# Should there be a check for 8-bit only ord() values in body, or at least 4543# a comment about the expected input? 4544 4545 def test_encode_two_lines_one_space(self): 4546 self._test_encode(' \n \n', '=20\n=20\n') 4547 4548 def test_encode_one_word_trailing_spaces(self): 4549 self._test_encode('hello ', 'hello =20') 4550 4551 def test_encode_one_line_trailing_spaces(self): 4552 self._test_encode('hello \n', 'hello =20\n') 4553 4554 def test_encode_one_word_trailing_tab(self): 4555 self._test_encode('hello \t', 'hello =09') 4556 4557 def test_encode_one_line_trailing_tab(self): 4558 self._test_encode('hello \t\n', 'hello =09\n') 4559 4560 def test_encode_trailing_space_before_maxlinelen(self): 4561 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) 4562 4563 def test_encode_trailing_space_at_maxlinelen(self): 4564 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) 4565 4566 def test_encode_trailing_space_beyond_maxlinelen(self): 4567 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) 4568 4569 def test_encode_whitespace_lines(self): 4570 self._test_encode(' \n' * 5, '=20\n' * 5) 4571 4572 def test_encode_quoted_equals(self): 4573 self._test_encode('a = b', 'a =3D b') 4574 4575 def test_encode_one_long_string(self): 4576 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25) 4577 4578 def test_encode_one_long_line(self): 4579 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') 4580 4581 def test_encode_one_very_long_line(self): 4582 self._test_encode('x' * 200 + '\n', 4583 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n') 4584 4585 def test_encode_shortest_maxlinelen(self): 4586 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) 4587 4588 def test_encode_maxlinelen_too_small(self): 4589 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3) 4590 4591 def test_encode(self): 4592 eq = self.assertEqual 4593 eq(quoprimime.body_encode(''), '') 4594 eq(quoprimime.body_encode('hello'), 'hello') 4595 # Test the binary flag 4596 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld') 4597 # Test the maxlinelen arg 4598 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\ 4599xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= 4600 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= 4601x xxxx xxxx xxxx xxxx=20""") 4602 # Test the eol argument 4603 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4604 """\ 4605xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r 4606 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r 4607x xxxx xxxx xxxx xxxx=20""") 4608 eq(quoprimime.body_encode("""\ 4609one line 4610 4611two line"""), """\ 4612one line 4613 4614two line""") 4615 4616 4617 4618# Test the Charset class 4619class TestCharset(unittest.TestCase): 4620 def tearDown(self): 4621 from email import charset as CharsetModule 4622 try: 4623 del CharsetModule.CHARSETS['fake'] 4624 except KeyError: 4625 pass 4626 4627 def test_codec_encodeable(self): 4628 eq = self.assertEqual 4629 # Make sure us-ascii = no Unicode conversion 4630 c = Charset('us-ascii') 4631 eq(c.header_encode('Hello World!'), 'Hello World!') 4632 # Test 8-bit idempotency with us-ascii 4633 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' 4634 self.assertRaises(UnicodeError, c.header_encode, s) 4635 c = Charset('utf-8') 4636 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=') 4637 4638 def test_body_encode(self): 4639 eq = self.assertEqual 4640 # Try a charset with QP body encoding 4641 c = Charset('iso-8859-1') 4642 eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) 4643 # Try a charset with Base64 body encoding 4644 c = Charset('utf-8') 4645 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world')) 4646 # Try a charset with None body encoding 4647 c = Charset('us-ascii') 4648 eq('hello world', c.body_encode('hello world')) 4649 # Try the convert argument, where input codec != output codec 4650 c = Charset('euc-jp') 4651 # With apologies to Tokio Kikuchi ;) 4652 # XXX FIXME 4653## try: 4654## eq('\x1b$B5FCO;~IW\x1b(B', 4655## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) 4656## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', 4657## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) 4658## except LookupError: 4659## # We probably don't have the Japanese codecs installed 4660## pass 4661 # Testing SF bug #625509, which we have to fake, since there are no 4662 # built-in encodings where the header encoding is QP but the body 4663 # encoding is not. 4664 from email import charset as CharsetModule 4665 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8') 4666 c = Charset('fake') 4667 eq('hello world', c.body_encode('hello world')) 4668 4669 def test_unicode_charset_name(self): 4670 charset = Charset('us-ascii') 4671 self.assertEqual(str(charset), 'us-ascii') 4672 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') 4673 4674 4675 4676# Test multilingual MIME headers. 4677class TestHeader(TestEmailBase): 4678 def test_simple(self): 4679 eq = self.ndiffAssertEqual 4680 h = Header('Hello World!') 4681 eq(h.encode(), 'Hello World!') 4682 h.append(' Goodbye World!') 4683 eq(h.encode(), 'Hello World! Goodbye World!') 4684 4685 def test_simple_surprise(self): 4686 eq = self.ndiffAssertEqual 4687 h = Header('Hello World!') 4688 eq(h.encode(), 'Hello World!') 4689 h.append('Goodbye World!') 4690 eq(h.encode(), 'Hello World! Goodbye World!') 4691 4692 def test_header_needs_no_decoding(self): 4693 h = 'no decoding needed' 4694 self.assertEqual(decode_header(h), [(h, None)]) 4695 4696 def test_long(self): 4697 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", 4698 maxlinelen=76) 4699 for l in h.encode(splitchars=' ').split('\n '): 4700 self.assertLessEqual(len(l), 76) 4701 4702 def test_multilingual(self): 4703 eq = self.ndiffAssertEqual 4704 g = Charset("iso-8859-1") 4705 cz = Charset("iso-8859-2") 4706 utf8 = Charset("utf-8") 4707 g_head = (b'Die Mieter treten hier ein werden mit einem ' 4708 b'Foerderband komfortabel den Korridor entlang, ' 4709 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, ' 4710 b'gegen die rotierenden Klingen bef\xf6rdert. ') 4711 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 4712 b'd\xf9vtipu.. ') 4713 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 4714 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 4715 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 4716 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 4717 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 4718 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 4719 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 4720 '\u3044\u307e\u3059\u3002') 4721 h = Header(g_head, g) 4722 h.append(cz_head, cz) 4723 h.append(utf8_head, utf8) 4724 enc = h.encode(maxlinelen=76) 4725 eq(enc, """\ 4726=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?= 4727 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?= 4728 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?= 4729 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= 4730 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 4731 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= 4732 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= 4733 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= 4734 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?= 4735 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?= 4736 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""") 4737 decoded = decode_header(enc) 4738 eq(len(decoded), 3) 4739 eq(decoded[0], (g_head, 'iso-8859-1')) 4740 eq(decoded[1], (cz_head, 'iso-8859-2')) 4741 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8')) 4742 ustr = str(h) 4743 eq(ustr, 4744 (b'Die Mieter treten hier ein werden mit einem Foerderband ' 4745 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' 4746 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' 4747 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' 4748 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' 4749 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' 4750 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' 4751 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' 4752 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' 4753 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' 4754 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' 4755 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' 4756 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' 4757 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' 4758 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' 4759 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82' 4760 ).decode('utf-8')) 4761 # Test make_header() 4762 newh = make_header(decode_header(enc)) 4763 eq(newh, h) 4764 4765 def test_empty_header_encode(self): 4766 h = Header() 4767 self.assertEqual(h.encode(), '') 4768 4769 def test_header_ctor_default_args(self): 4770 eq = self.ndiffAssertEqual 4771 h = Header() 4772 eq(h, '') 4773 h.append('foo', Charset('iso-8859-1')) 4774 eq(h, 'foo') 4775 4776 def test_explicit_maxlinelen(self): 4777 eq = self.ndiffAssertEqual 4778 hstr = ('A very long line that must get split to something other ' 4779 'than at the 76th character boundary to test the non-default ' 4780 'behavior') 4781 h = Header(hstr) 4782 eq(h.encode(), '''\ 4783A very long line that must get split to something other than at the 76th 4784 character boundary to test the non-default behavior''') 4785 eq(str(h), hstr) 4786 h = Header(hstr, header_name='Subject') 4787 eq(h.encode(), '''\ 4788A very long line that must get split to something other than at the 4789 76th character boundary to test the non-default behavior''') 4790 eq(str(h), hstr) 4791 h = Header(hstr, maxlinelen=1024, header_name='Subject') 4792 eq(h.encode(), hstr) 4793 eq(str(h), hstr) 4794 4795 def test_quopri_splittable(self): 4796 eq = self.ndiffAssertEqual 4797 h = Header(charset='iso-8859-1', maxlinelen=20) 4798 x = 'xxxx ' * 20 4799 h.append(x) 4800 s = h.encode() 4801 eq(s, """\ 4802=?iso-8859-1?q?xxx?= 4803 =?iso-8859-1?q?x_?= 4804 =?iso-8859-1?q?xx?= 4805 =?iso-8859-1?q?xx?= 4806 =?iso-8859-1?q?_x?= 4807 =?iso-8859-1?q?xx?= 4808 =?iso-8859-1?q?x_?= 4809 =?iso-8859-1?q?xx?= 4810 =?iso-8859-1?q?xx?= 4811 =?iso-8859-1?q?_x?= 4812 =?iso-8859-1?q?xx?= 4813 =?iso-8859-1?q?x_?= 4814 =?iso-8859-1?q?xx?= 4815 =?iso-8859-1?q?xx?= 4816 =?iso-8859-1?q?_x?= 4817 =?iso-8859-1?q?xx?= 4818 =?iso-8859-1?q?x_?= 4819 =?iso-8859-1?q?xx?= 4820 =?iso-8859-1?q?xx?= 4821 =?iso-8859-1?q?_x?= 4822 =?iso-8859-1?q?xx?= 4823 =?iso-8859-1?q?x_?= 4824 =?iso-8859-1?q?xx?= 4825 =?iso-8859-1?q?xx?= 4826 =?iso-8859-1?q?_x?= 4827 =?iso-8859-1?q?xx?= 4828 =?iso-8859-1?q?x_?= 4829 =?iso-8859-1?q?xx?= 4830 =?iso-8859-1?q?xx?= 4831 =?iso-8859-1?q?_x?= 4832 =?iso-8859-1?q?xx?= 4833 =?iso-8859-1?q?x_?= 4834 =?iso-8859-1?q?xx?= 4835 =?iso-8859-1?q?xx?= 4836 =?iso-8859-1?q?_x?= 4837 =?iso-8859-1?q?xx?= 4838 =?iso-8859-1?q?x_?= 4839 =?iso-8859-1?q?xx?= 4840 =?iso-8859-1?q?xx?= 4841 =?iso-8859-1?q?_x?= 4842 =?iso-8859-1?q?xx?= 4843 =?iso-8859-1?q?x_?= 4844 =?iso-8859-1?q?xx?= 4845 =?iso-8859-1?q?xx?= 4846 =?iso-8859-1?q?_x?= 4847 =?iso-8859-1?q?xx?= 4848 =?iso-8859-1?q?x_?= 4849 =?iso-8859-1?q?xx?= 4850 =?iso-8859-1?q?xx?= 4851 =?iso-8859-1?q?_?=""") 4852 eq(x, str(make_header(decode_header(s)))) 4853 h = Header(charset='iso-8859-1', maxlinelen=40) 4854 h.append('xxxx ' * 20) 4855 s = h.encode() 4856 eq(s, """\ 4857=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?= 4858 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?= 4859 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= 4860 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= 4861 =?iso-8859-1?q?_xxxx_xxxx_?=""") 4862 eq(x, str(make_header(decode_header(s)))) 4863 4864 def test_base64_splittable(self): 4865 eq = self.ndiffAssertEqual 4866 h = Header(charset='koi8-r', maxlinelen=20) 4867 x = 'xxxx ' * 20 4868 h.append(x) 4869 s = h.encode() 4870 eq(s, """\ 4871=?koi8-r?b?eHh4?= 4872 =?koi8-r?b?eCB4?= 4873 =?koi8-r?b?eHh4?= 4874 =?koi8-r?b?IHh4?= 4875 =?koi8-r?b?eHgg?= 4876 =?koi8-r?b?eHh4?= 4877 =?koi8-r?b?eCB4?= 4878 =?koi8-r?b?eHh4?= 4879 =?koi8-r?b?IHh4?= 4880 =?koi8-r?b?eHgg?= 4881 =?koi8-r?b?eHh4?= 4882 =?koi8-r?b?eCB4?= 4883 =?koi8-r?b?eHh4?= 4884 =?koi8-r?b?IHh4?= 4885 =?koi8-r?b?eHgg?= 4886 =?koi8-r?b?eHh4?= 4887 =?koi8-r?b?eCB4?= 4888 =?koi8-r?b?eHh4?= 4889 =?koi8-r?b?IHh4?= 4890 =?koi8-r?b?eHgg?= 4891 =?koi8-r?b?eHh4?= 4892 =?koi8-r?b?eCB4?= 4893 =?koi8-r?b?eHh4?= 4894 =?koi8-r?b?IHh4?= 4895 =?koi8-r?b?eHgg?= 4896 =?koi8-r?b?eHh4?= 4897 =?koi8-r?b?eCB4?= 4898 =?koi8-r?b?eHh4?= 4899 =?koi8-r?b?IHh4?= 4900 =?koi8-r?b?eHgg?= 4901 =?koi8-r?b?eHh4?= 4902 =?koi8-r?b?eCB4?= 4903 =?koi8-r?b?eHh4?= 4904 =?koi8-r?b?IA==?=""") 4905 eq(x, str(make_header(decode_header(s)))) 4906 h = Header(charset='koi8-r', maxlinelen=40) 4907 h.append(x) 4908 s = h.encode() 4909 eq(s, """\ 4910=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?= 4911 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?= 4912 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?= 4913 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?= 4914 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?= 4915 =?koi8-r?b?eHh4eCB4eHh4IA==?=""") 4916 eq(x, str(make_header(decode_header(s)))) 4917 4918 def test_us_ascii_header(self): 4919 eq = self.assertEqual 4920 s = 'hello' 4921 x = decode_header(s) 4922 eq(x, [('hello', None)]) 4923 h = make_header(x) 4924 eq(s, h.encode()) 4925 4926 def test_string_charset(self): 4927 eq = self.assertEqual 4928 h = Header() 4929 h.append('hello', 'iso-8859-1') 4930 eq(h, 'hello') 4931 4932## def test_unicode_error(self): 4933## raises = self.assertRaises 4934## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') 4935## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') 4936## h = Header() 4937## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') 4938## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') 4939## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') 4940 4941 def test_utf8_shortest(self): 4942 eq = self.assertEqual 4943 h = Header('p\xf6stal', 'utf-8') 4944 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') 4945 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8') 4946 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') 4947 4948 def test_bad_8bit_header(self): 4949 raises = self.assertRaises 4950 eq = self.assertEqual 4951 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4952 raises(UnicodeError, Header, x) 4953 h = Header() 4954 raises(UnicodeError, h.append, x) 4955 e = x.decode('utf-8', 'replace') 4956 eq(str(Header(x, errors='replace')), e) 4957 h.append(x, errors='replace') 4958 eq(str(h), e) 4959 4960 def test_escaped_8bit_header(self): 4961 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4962 e = x.decode('ascii', 'surrogateescape') 4963 h = Header(e, charset=email.charset.UNKNOWN8BIT) 4964 self.assertEqual(str(h), 4965 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4966 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4967 4968 def test_header_handles_binary_unknown8bit(self): 4969 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4970 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4971 self.assertEqual(str(h), 4972 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4973 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4974 4975 def test_make_header_handles_binary_unknown8bit(self): 4976 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4977 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4978 h2 = email.header.make_header(email.header.decode_header(h)) 4979 self.assertEqual(str(h2), 4980 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4981 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) 4982 4983 def test_modify_returned_list_does_not_change_header(self): 4984 h = Header('test') 4985 chunks = email.header.decode_header(h) 4986 chunks.append(('ascii', 'test2')) 4987 self.assertEqual(str(h), 'test') 4988 4989 def test_encoded_adjacent_nonencoded(self): 4990 eq = self.assertEqual 4991 h = Header() 4992 h.append('hello', 'iso-8859-1') 4993 h.append('world') 4994 s = h.encode() 4995 eq(s, '=?iso-8859-1?q?hello?= world') 4996 h = make_header(decode_header(s)) 4997 eq(h.encode(), s) 4998 4999 def test_whitespace_keeper(self): 5000 eq = self.assertEqual 5001 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' 5002 parts = decode_header(s) 5003 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)]) 5004 hdr = make_header(parts) 5005 eq(hdr.encode(), 5006 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 5007 5008 def test_broken_base64_header(self): 5009 raises = self.assertRaises 5010 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' 5011 raises(errors.HeaderParseError, decode_header, s) 5012 5013 def test_shift_jis_charset(self): 5014 h = Header('文', charset='shift_jis') 5015 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=') 5016 5017 def test_flatten_header_with_no_value(self): 5018 # Issue 11401 (regression from email 4.x) Note that the space after 5019 # the header doesn't reflect the input, but this is also the way 5020 # email 4.x behaved. At some point it would be nice to fix that. 5021 msg = email.message_from_string("EmptyHeader:") 5022 self.assertEqual(str(msg), "EmptyHeader: \n\n") 5023 5024 def test_encode_preserves_leading_ws_on_value(self): 5025 msg = Message() 5026 msg['SomeHeader'] = ' value with leading ws' 5027 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n") 5028 5029 def test_whitespace_header(self): 5030 self.assertEqual(Header(' ').encode(), ' ') 5031 5032 5033 5034# Test RFC 2231 header parameters (en/de)coding 5035class TestRFC2231(TestEmailBase): 5036 5037 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5038 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5039 def test_get_param(self): 5040 eq = self.assertEqual 5041 msg = self._msgobj('msg_29.txt') 5042 eq(msg.get_param('title'), 5043 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5044 eq(msg.get_param('title', unquote=False), 5045 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) 5046 5047 def test_set_param(self): 5048 eq = self.ndiffAssertEqual 5049 msg = Message() 5050 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5051 charset='us-ascii') 5052 eq(msg.get_param('title'), 5053 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) 5054 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5055 charset='us-ascii', language='en') 5056 eq(msg.get_param('title'), 5057 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5058 msg = self._msgobj('msg_01.txt') 5059 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5060 charset='us-ascii', language='en') 5061 eq(msg.as_string(maxheaderlen=78), """\ 5062Return-Path: <bbb@zzz.org> 5063Delivered-To: bbb@zzz.org 5064Received: by mail.zzz.org (Postfix, from userid 889) 5065\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5066MIME-Version: 1.0 5067Content-Transfer-Encoding: 7bit 5068Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5069From: bbb@ddd.com (John X. Doe) 5070To: bbb@zzz.org 5071Subject: This is a test message 5072Date: Fri, 4 May 2001 14:05:44 -0400 5073Content-Type: text/plain; charset=us-ascii; 5074 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5075 5076 5077Hi, 5078 5079Do you like this message? 5080 5081-Me 5082""") 5083 5084 def test_set_param_requote(self): 5085 msg = Message() 5086 msg.set_param('title', 'foo') 5087 self.assertEqual(msg['content-type'], 'text/plain; title="foo"') 5088 msg.set_param('title', 'bar', requote=False) 5089 self.assertEqual(msg['content-type'], 'text/plain; title=bar') 5090 # tspecial is still quoted. 5091 msg.set_param('title', "(bar)bell", requote=False) 5092 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"') 5093 5094 def test_del_param(self): 5095 eq = self.ndiffAssertEqual 5096 msg = self._msgobj('msg_01.txt') 5097 msg.set_param('foo', 'bar', charset='us-ascii', language='en') 5098 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5099 charset='us-ascii', language='en') 5100 msg.del_param('foo', header='Content-Type') 5101 eq(msg.as_string(maxheaderlen=78), """\ 5102Return-Path: <bbb@zzz.org> 5103Delivered-To: bbb@zzz.org 5104Received: by mail.zzz.org (Postfix, from userid 889) 5105\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5106MIME-Version: 1.0 5107Content-Transfer-Encoding: 7bit 5108Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5109From: bbb@ddd.com (John X. Doe) 5110To: bbb@zzz.org 5111Subject: This is a test message 5112Date: Fri, 4 May 2001 14:05:44 -0400 5113Content-Type: text/plain; charset="us-ascii"; 5114 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5115 5116 5117Hi, 5118 5119Do you like this message? 5120 5121-Me 5122""") 5123 5124 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset 5125 # I changed the charset name, though, because the one in the file isn't 5126 # a legal charset name. Should add a test for an illegal charset. 5127 def test_rfc2231_get_content_charset(self): 5128 eq = self.assertEqual 5129 msg = self._msgobj('msg_32.txt') 5130 eq(msg.get_content_charset(), 'us-ascii') 5131 5132 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes 5133 def test_rfc2231_parse_rfc_quoting(self): 5134 m = textwrap.dedent('''\ 5135 Content-Disposition: inline; 5136 \tfilename*0*=''This%20is%20even%20more%20; 5137 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; 5138 \tfilename*2="is it not.pdf" 5139 5140 ''') 5141 msg = email.message_from_string(m) 5142 self.assertEqual(msg.get_filename(), 5143 'This is even more ***fun*** is it not.pdf') 5144 self.assertEqual(m, msg.as_string()) 5145 5146 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5147 def test_rfc2231_parse_extra_quoting(self): 5148 m = textwrap.dedent('''\ 5149 Content-Disposition: inline; 5150 \tfilename*0*="''This%20is%20even%20more%20"; 5151 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5152 \tfilename*2="is it not.pdf" 5153 5154 ''') 5155 msg = email.message_from_string(m) 5156 self.assertEqual(msg.get_filename(), 5157 'This is even more ***fun*** is it not.pdf') 5158 self.assertEqual(m, msg.as_string()) 5159 5160 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset 5161 # but new test uses *0* because otherwise lang/charset is not valid. 5162 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values 5163 def test_rfc2231_no_language_or_charset(self): 5164 m = '''\ 5165Content-Transfer-Encoding: 8bit 5166Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" 5167Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm 5168 5169''' 5170 msg = email.message_from_string(m) 5171 param = msg.get_param('NAME') 5172 self.assertNotIsInstance(param, tuple) 5173 self.assertEqual( 5174 param, 5175 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') 5176 5177 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset 5178 def test_rfc2231_no_language_or_charset_in_filename(self): 5179 m = '''\ 5180Content-Disposition: inline; 5181\tfilename*0*="''This%20is%20even%20more%20"; 5182\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5183\tfilename*2="is it not.pdf" 5184 5185''' 5186 msg = email.message_from_string(m) 5187 self.assertEqual(msg.get_filename(), 5188 'This is even more ***fun*** is it not.pdf') 5189 5190 # Duplicate of previous test? 5191 def test_rfc2231_no_language_or_charset_in_filename_encoded(self): 5192 m = '''\ 5193Content-Disposition: inline; 5194\tfilename*0*="''This%20is%20even%20more%20"; 5195\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5196\tfilename*2="is it not.pdf" 5197 5198''' 5199 msg = email.message_from_string(m) 5200 self.assertEqual(msg.get_filename(), 5201 'This is even more ***fun*** is it not.pdf') 5202 5203 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded, 5204 # but the test below is wrong (the first part should be decoded). 5205 def test_rfc2231_partly_encoded(self): 5206 m = '''\ 5207Content-Disposition: inline; 5208\tfilename*0="''This%20is%20even%20more%20"; 5209\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5210\tfilename*2="is it not.pdf" 5211 5212''' 5213 msg = email.message_from_string(m) 5214 self.assertEqual( 5215 msg.get_filename(), 5216 'This%20is%20even%20more%20***fun*** is it not.pdf') 5217 5218 def test_rfc2231_partly_nonencoded(self): 5219 m = '''\ 5220Content-Disposition: inline; 5221\tfilename*0="This%20is%20even%20more%20"; 5222\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; 5223\tfilename*2="is it not.pdf" 5224 5225''' 5226 msg = email.message_from_string(m) 5227 self.assertEqual( 5228 msg.get_filename(), 5229 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') 5230 5231 def test_rfc2231_no_language_or_charset_in_boundary(self): 5232 m = '''\ 5233Content-Type: multipart/alternative; 5234\tboundary*0*="''This%20is%20even%20more%20"; 5235\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5236\tboundary*2="is it not.pdf" 5237 5238''' 5239 msg = email.message_from_string(m) 5240 self.assertEqual(msg.get_boundary(), 5241 'This is even more ***fun*** is it not.pdf') 5242 5243 def test_rfc2231_no_language_or_charset_in_charset(self): 5244 # This is a nonsensical charset value, but tests the code anyway 5245 m = '''\ 5246Content-Type: text/plain; 5247\tcharset*0*="This%20is%20even%20more%20"; 5248\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5249\tcharset*2="is it not.pdf" 5250 5251''' 5252 msg = email.message_from_string(m) 5253 self.assertEqual(msg.get_content_charset(), 5254 'this is even more ***fun*** is it not.pdf') 5255 5256 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii 5257 def test_rfc2231_bad_encoding_in_filename(self): 5258 m = '''\ 5259Content-Disposition: inline; 5260\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; 5261\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5262\tfilename*2="is it not.pdf" 5263 5264''' 5265 msg = email.message_from_string(m) 5266 self.assertEqual(msg.get_filename(), 5267 'This is even more ***fun*** is it not.pdf') 5268 5269 def test_rfc2231_bad_encoding_in_charset(self): 5270 m = """\ 5271Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D 5272 5273""" 5274 msg = email.message_from_string(m) 5275 # This should return None because non-ascii characters in the charset 5276 # are not allowed. 5277 self.assertEqual(msg.get_content_charset(), None) 5278 5279 def test_rfc2231_bad_character_in_charset(self): 5280 m = """\ 5281Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D 5282 5283""" 5284 msg = email.message_from_string(m) 5285 # This should return None because non-ascii characters in the charset 5286 # are not allowed. 5287 self.assertEqual(msg.get_content_charset(), None) 5288 5289 def test_rfc2231_bad_character_in_filename(self): 5290 m = '''\ 5291Content-Disposition: inline; 5292\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; 5293\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5294\tfilename*2*="is it not.pdf%E2" 5295 5296''' 5297 msg = email.message_from_string(m) 5298 self.assertEqual(msg.get_filename(), 5299 'This is even more ***fun*** is it not.pdf\ufffd') 5300 5301 def test_rfc2231_unknown_encoding(self): 5302 m = """\ 5303Content-Transfer-Encoding: 8bit 5304Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt 5305 5306""" 5307 msg = email.message_from_string(m) 5308 self.assertEqual(msg.get_filename(), 'myfile.txt') 5309 5310 def test_rfc2231_single_tick_in_filename_extended(self): 5311 eq = self.assertEqual 5312 m = """\ 5313Content-Type: application/x-foo; 5314\tname*0*=\"Frank's\"; name*1*=\" Document\" 5315 5316""" 5317 msg = email.message_from_string(m) 5318 charset, language, s = msg.get_param('name') 5319 eq(charset, None) 5320 eq(language, None) 5321 eq(s, "Frank's Document") 5322 5323 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5324 def test_rfc2231_single_tick_in_filename(self): 5325 m = """\ 5326Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" 5327 5328""" 5329 msg = email.message_from_string(m) 5330 param = msg.get_param('name') 5331 self.assertNotIsInstance(param, tuple) 5332 self.assertEqual(param, "Frank's Document") 5333 5334 def test_rfc2231_missing_tick(self): 5335 m = '''\ 5336Content-Disposition: inline; 5337\tfilename*0*="'This%20is%20broken"; 5338''' 5339 msg = email.message_from_string(m) 5340 self.assertEqual( 5341 msg.get_filename(), 5342 "'This is broken") 5343 5344 def test_rfc2231_missing_tick_with_encoded_non_ascii(self): 5345 m = '''\ 5346Content-Disposition: inline; 5347\tfilename*0*="'This%20is%E2broken"; 5348''' 5349 msg = email.message_from_string(m) 5350 self.assertEqual( 5351 msg.get_filename(), 5352 "'This is\ufffdbroken") 5353 5354 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang 5355 def test_rfc2231_tick_attack_extended(self): 5356 eq = self.assertEqual 5357 m = """\ 5358Content-Type: application/x-foo; 5359\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" 5360 5361""" 5362 msg = email.message_from_string(m) 5363 charset, language, s = msg.get_param('name') 5364 eq(charset, 'us-ascii') 5365 eq(language, 'en-us') 5366 eq(s, "Frank's Document") 5367 5368 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value 5369 def test_rfc2231_tick_attack(self): 5370 m = """\ 5371Content-Type: application/x-foo; 5372\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" 5373 5374""" 5375 msg = email.message_from_string(m) 5376 param = msg.get_param('name') 5377 self.assertNotIsInstance(param, tuple) 5378 self.assertEqual(param, "us-ascii'en-us'Frank's Document") 5379 5380 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes 5381 def test_rfc2231_no_extended_values(self): 5382 eq = self.assertEqual 5383 m = """\ 5384Content-Type: application/x-foo; name=\"Frank's Document\" 5385 5386""" 5387 msg = email.message_from_string(m) 5388 eq(msg.get_param('name'), "Frank's Document") 5389 5390 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments 5391 def test_rfc2231_encoded_then_unencoded_segments(self): 5392 eq = self.assertEqual 5393 m = """\ 5394Content-Type: application/x-foo; 5395\tname*0*=\"us-ascii'en-us'My\"; 5396\tname*1=\" Document\"; 5397\tname*2*=\" For You\" 5398 5399""" 5400 msg = email.message_from_string(m) 5401 charset, language, s = msg.get_param('name') 5402 eq(charset, 'us-ascii') 5403 eq(language, 'en-us') 5404 eq(s, 'My Document For You') 5405 5406 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments 5407 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments 5408 def test_rfc2231_unencoded_then_encoded_segments(self): 5409 eq = self.assertEqual 5410 m = """\ 5411Content-Type: application/x-foo; 5412\tname*0=\"us-ascii'en-us'My\"; 5413\tname*1*=\" Document\"; 5414\tname*2*=\" For You\" 5415 5416""" 5417 msg = email.message_from_string(m) 5418 charset, language, s = msg.get_param('name') 5419 eq(charset, 'us-ascii') 5420 eq(language, 'en-us') 5421 eq(s, 'My Document For You') 5422 5423 def test_should_not_hang_on_invalid_ew_messages(self): 5424 messages = ["""From: user@host.com 5425To: user@host.com 5426Bad-Header: 5427 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?= 5428 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?= 5429 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?= 5430 5431Hello! 5432""", """From: ����� �������� <xxx@xxx> 5433To: "xxx" <xxx@xxx> 5434Subject: ��� ���������� ����� ����� � ��������� �� ���� 5435MIME-Version: 1.0 5436Content-Type: text/plain; charset="windows-1251"; 5437Content-Transfer-Encoding: 8bit 5438 5439�� ����� � ���� ������ ��� �������� 5440"""] 5441 for m in messages: 5442 with self.subTest(m=m): 5443 msg = email.message_from_string(m) 5444 5445 5446# Tests to ensure that signed parts of an email are completely preserved, as 5447# required by RFC1847 section 2.1. Note that these are incomplete, because the 5448# email package does not currently always preserve the body. See issue 1670765. 5449class TestSigned(TestEmailBase): 5450 5451 def _msg_and_obj(self, filename): 5452 with openfile(filename) as fp: 5453 original = fp.read() 5454 msg = email.message_from_string(original) 5455 return original, msg 5456 5457 def _signed_parts_eq(self, original, result): 5458 # Extract the first mime part of each message 5459 import re 5460 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) 5461 inpart = repart.search(original).group(2) 5462 outpart = repart.search(result).group(2) 5463 self.assertEqual(outpart, inpart) 5464 5465 def test_long_headers_as_string(self): 5466 original, msg = self._msg_and_obj('msg_45.txt') 5467 result = msg.as_string() 5468 self._signed_parts_eq(original, result) 5469 5470 def test_long_headers_as_string_maxheaderlen(self): 5471 original, msg = self._msg_and_obj('msg_45.txt') 5472 result = msg.as_string(maxheaderlen=60) 5473 self._signed_parts_eq(original, result) 5474 5475 def test_long_headers_flatten(self): 5476 original, msg = self._msg_and_obj('msg_45.txt') 5477 fp = StringIO() 5478 Generator(fp).flatten(msg) 5479 result = fp.getvalue() 5480 self._signed_parts_eq(original, result) 5481 5482 5483 5484if __name__ == '__main__': 5485 unittest.main() 5486