1# Copyright (C) 2001-2010 Python Software Foundation 2# Contact: email-sig@python.org 3# email package unit tests 4 5import re 6import time 7import base64 8import unittest 9import textwrap 10 11from io import StringIO, BytesIO 12from itertools import chain 13from random import choice 14from threading import Thread 15from unittest.mock import patch 16 17import email 18import email.policy 19 20from email.charset import Charset 21from email.header import Header, decode_header, make_header 22from email.parser import Parser, HeaderParser 23from email.generator import Generator, DecodedGenerator, BytesGenerator 24from email.message import Message 25from email.mime.application import MIMEApplication 26from email.mime.audio import MIMEAudio 27from email.mime.text import MIMEText 28from email.mime.image import MIMEImage 29from email.mime.base import MIMEBase 30from email.mime.message import MIMEMessage 31from email.mime.multipart import MIMEMultipart 32from email.mime.nonmultipart import MIMENonMultipart 33from email import utils 34from email import errors 35from email import encoders 36from email import iterators 37from email import base64mime 38from email import quoprimime 39 40from test.support import unlink, start_threads 41from test.test_email import openfile, TestEmailBase 42 43# These imports are documented to work, but we are testing them using a 44# different path, so we import them here just to make sure they are importable. 45from email.parser import FeedParser, BytesFeedParser 46 47NL = '\n' 48EMPTYSTRING = '' 49SPACE = ' ' 50 51 52# Test various aspects of the Message class's API 53class TestMessageAPI(TestEmailBase): 54 def test_get_all(self): 55 eq = self.assertEqual 56 msg = self._msgobj('msg_20.txt') 57 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) 58 eq(msg.get_all('xx', 'n/a'), 'n/a') 59 60 def test_getset_charset(self): 61 eq = self.assertEqual 62 msg = Message() 63 eq(msg.get_charset(), None) 64 charset = Charset('iso-8859-1') 65 msg.set_charset(charset) 66 eq(msg['mime-version'], '1.0') 67 eq(msg.get_content_type(), 'text/plain') 68 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') 69 eq(msg.get_param('charset'), 'iso-8859-1') 70 eq(msg['content-transfer-encoding'], 'quoted-printable') 71 eq(msg.get_charset().input_charset, 'iso-8859-1') 72 # Remove the charset 73 msg.set_charset(None) 74 eq(msg.get_charset(), None) 75 eq(msg['content-type'], 'text/plain') 76 # Try adding a charset when there's already MIME headers present 77 msg = Message() 78 msg['MIME-Version'] = '2.0' 79 msg['Content-Type'] = 'text/x-weird' 80 msg['Content-Transfer-Encoding'] = 'quinted-puntable' 81 msg.set_charset(charset) 82 eq(msg['mime-version'], '2.0') 83 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') 84 eq(msg['content-transfer-encoding'], 'quinted-puntable') 85 86 def test_set_charset_from_string(self): 87 eq = self.assertEqual 88 msg = Message() 89 msg.set_charset('us-ascii') 90 eq(msg.get_charset().input_charset, 'us-ascii') 91 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 92 93 def test_set_payload_with_charset(self): 94 msg = Message() 95 charset = Charset('iso-8859-1') 96 msg.set_payload('This is a string payload', charset) 97 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') 98 99 def test_set_payload_with_8bit_data_and_charset(self): 100 data = b'\xd0\x90\xd0\x91\xd0\x92' 101 charset = Charset('utf-8') 102 msg = Message() 103 msg.set_payload(data, charset) 104 self.assertEqual(msg['content-transfer-encoding'], 'base64') 105 self.assertEqual(msg.get_payload(decode=True), data) 106 self.assertEqual(msg.get_payload(), '0JDQkdCS\n') 107 108 def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self): 109 data = b'\xd0\x90\xd0\x91\xd0\x92' 110 charset = Charset('utf-8') 111 charset.body_encoding = None # Disable base64 encoding 112 msg = Message() 113 msg.set_payload(data.decode('utf-8'), charset) 114 self.assertEqual(msg['content-transfer-encoding'], '8bit') 115 self.assertEqual(msg.get_payload(decode=True), data) 116 117 def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self): 118 data = b'\xd0\x90\xd0\x91\xd0\x92' 119 charset = Charset('utf-8') 120 charset.body_encoding = None # Disable base64 encoding 121 msg = Message() 122 msg.set_payload(data, charset) 123 self.assertEqual(msg['content-transfer-encoding'], '8bit') 124 self.assertEqual(msg.get_payload(decode=True), data) 125 126 def test_set_payload_to_list(self): 127 msg = Message() 128 msg.set_payload([]) 129 self.assertEqual(msg.get_payload(), []) 130 131 def test_attach_when_payload_is_string(self): 132 msg = Message() 133 msg['Content-Type'] = 'multipart/mixed' 134 msg.set_payload('string payload') 135 sub_msg = MIMEMessage(Message()) 136 self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart", 137 msg.attach, sub_msg) 138 139 def test_get_charsets(self): 140 eq = self.assertEqual 141 142 msg = self._msgobj('msg_08.txt') 143 charsets = msg.get_charsets() 144 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) 145 146 msg = self._msgobj('msg_09.txt') 147 charsets = msg.get_charsets('dingbat') 148 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', 149 'koi8-r']) 150 151 msg = self._msgobj('msg_12.txt') 152 charsets = msg.get_charsets() 153 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', 154 'iso-8859-3', 'us-ascii', 'koi8-r']) 155 156 def test_get_filename(self): 157 eq = self.assertEqual 158 159 msg = self._msgobj('msg_04.txt') 160 filenames = [p.get_filename() for p in msg.get_payload()] 161 eq(filenames, ['msg.txt', 'msg.txt']) 162 163 msg = self._msgobj('msg_07.txt') 164 subpart = msg.get_payload(1) 165 eq(subpart.get_filename(), 'dingusfish.gif') 166 167 def test_get_filename_with_name_parameter(self): 168 eq = self.assertEqual 169 170 msg = self._msgobj('msg_44.txt') 171 filenames = [p.get_filename() for p in msg.get_payload()] 172 eq(filenames, ['msg.txt', 'msg.txt']) 173 174 def test_get_boundary(self): 175 eq = self.assertEqual 176 msg = self._msgobj('msg_07.txt') 177 # No quotes! 178 eq(msg.get_boundary(), 'BOUNDARY') 179 180 def test_set_boundary(self): 181 eq = self.assertEqual 182 # This one has no existing boundary parameter, but the Content-Type: 183 # header appears fifth. 184 msg = self._msgobj('msg_01.txt') 185 msg.set_boundary('BOUNDARY') 186 header, value = msg.items()[4] 187 eq(header.lower(), 'content-type') 188 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') 189 # This one has a Content-Type: header, with a boundary, stuck in the 190 # middle of its headers. Make sure the order is preserved; it should 191 # be fifth. 192 msg = self._msgobj('msg_04.txt') 193 msg.set_boundary('BOUNDARY') 194 header, value = msg.items()[4] 195 eq(header.lower(), 'content-type') 196 eq(value, 'multipart/mixed; boundary="BOUNDARY"') 197 # And this one has no Content-Type: header at all. 198 msg = self._msgobj('msg_03.txt') 199 self.assertRaises(errors.HeaderParseError, 200 msg.set_boundary, 'BOUNDARY') 201 202 def test_make_boundary(self): 203 msg = MIMEMultipart('form-data') 204 # Note that when the boundary gets created is an implementation 205 # detail and might change. 206 self.assertEqual(msg.items()[0][1], 'multipart/form-data') 207 # Trigger creation of boundary 208 msg.as_string() 209 self.assertEqual(msg.items()[0][1][:33], 210 'multipart/form-data; boundary="==') 211 # XXX: there ought to be tests of the uniqueness of the boundary, too. 212 213 def test_message_rfc822_only(self): 214 # Issue 7970: message/rfc822 not in multipart parsed by 215 # HeaderParser caused an exception when flattened. 216 with openfile('msg_46.txt') as fp: 217 msgdata = fp.read() 218 parser = HeaderParser() 219 msg = parser.parsestr(msgdata) 220 out = StringIO() 221 gen = Generator(out, True, 0) 222 gen.flatten(msg, False) 223 self.assertEqual(out.getvalue(), msgdata) 224 225 def test_byte_message_rfc822_only(self): 226 # Make sure new bytes header parser also passes this. 227 with openfile('msg_46.txt') as fp: 228 msgdata = fp.read().encode('ascii') 229 parser = email.parser.BytesHeaderParser() 230 msg = parser.parsebytes(msgdata) 231 out = BytesIO() 232 gen = email.generator.BytesGenerator(out) 233 gen.flatten(msg) 234 self.assertEqual(out.getvalue(), msgdata) 235 236 def test_get_decoded_payload(self): 237 eq = self.assertEqual 238 msg = self._msgobj('msg_10.txt') 239 # The outer message is a multipart 240 eq(msg.get_payload(decode=True), None) 241 # Subpart 1 is 7bit encoded 242 eq(msg.get_payload(0).get_payload(decode=True), 243 b'This is a 7bit encoded message.\n') 244 # Subpart 2 is quopri 245 eq(msg.get_payload(1).get_payload(decode=True), 246 b'\xa1This is a Quoted Printable encoded message!\n') 247 # Subpart 3 is base64 248 eq(msg.get_payload(2).get_payload(decode=True), 249 b'This is a Base64 encoded message.') 250 # Subpart 4 is base64 with a trailing newline, which 251 # used to be stripped (issue 7143). 252 eq(msg.get_payload(3).get_payload(decode=True), 253 b'This is a Base64 encoded message.\n') 254 # Subpart 5 has no Content-Transfer-Encoding: header. 255 eq(msg.get_payload(4).get_payload(decode=True), 256 b'This has no Content-Transfer-Encoding: header.\n') 257 258 def test_get_decoded_uu_payload(self): 259 eq = self.assertEqual 260 msg = Message() 261 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') 262 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 263 msg['content-transfer-encoding'] = cte 264 eq(msg.get_payload(decode=True), b'hello world') 265 # Now try some bogus data 266 msg.set_payload('foo') 267 eq(msg.get_payload(decode=True), b'foo') 268 269 def test_get_payload_n_raises_on_non_multipart(self): 270 msg = Message() 271 self.assertRaises(TypeError, msg.get_payload, 1) 272 273 def test_decoded_generator(self): 274 eq = self.assertEqual 275 msg = self._msgobj('msg_07.txt') 276 with openfile('msg_17.txt') as fp: 277 text = fp.read() 278 s = StringIO() 279 g = DecodedGenerator(s) 280 g.flatten(msg) 281 eq(s.getvalue(), text) 282 283 def test__contains__(self): 284 msg = Message() 285 msg['From'] = 'Me' 286 msg['to'] = 'You' 287 # Check for case insensitivity 288 self.assertIn('from', msg) 289 self.assertIn('From', msg) 290 self.assertIn('FROM', msg) 291 self.assertIn('to', msg) 292 self.assertIn('To', msg) 293 self.assertIn('TO', msg) 294 295 def test_as_string(self): 296 msg = self._msgobj('msg_01.txt') 297 with openfile('msg_01.txt') as fp: 298 text = fp.read() 299 self.assertEqual(text, str(msg)) 300 fullrepr = msg.as_string(unixfrom=True) 301 lines = fullrepr.split('\n') 302 self.assertTrue(lines[0].startswith('From ')) 303 self.assertEqual(text, NL.join(lines[1:])) 304 305 def test_as_string_policy(self): 306 msg = self._msgobj('msg_01.txt') 307 newpolicy = msg.policy.clone(linesep='\r\n') 308 fullrepr = msg.as_string(policy=newpolicy) 309 s = StringIO() 310 g = Generator(s, policy=newpolicy) 311 g.flatten(msg) 312 self.assertEqual(fullrepr, s.getvalue()) 313 314 def test_nonascii_as_string_without_cte(self): 315 m = textwrap.dedent("""\ 316 MIME-Version: 1.0 317 Content-type: text/plain; charset="iso-8859-1" 318 319 Test if non-ascii messages with no Content-Transfer-Encoding set 320 can be as_string'd: 321 Föö bär 322 """) 323 source = m.encode('iso-8859-1') 324 expected = textwrap.dedent("""\ 325 MIME-Version: 1.0 326 Content-type: text/plain; charset="iso-8859-1" 327 Content-Transfer-Encoding: quoted-printable 328 329 Test if non-ascii messages with no Content-Transfer-Encoding set 330 can be as_string'd: 331 F=F6=F6 b=E4r 332 """) 333 msg = email.message_from_bytes(source) 334 self.assertEqual(msg.as_string(), expected) 335 336 def test_nonascii_as_string_without_content_type_and_cte(self): 337 m = textwrap.dedent("""\ 338 MIME-Version: 1.0 339 340 Test if non-ascii messages with no Content-Type nor 341 Content-Transfer-Encoding set can be as_string'd: 342 Föö bär 343 """) 344 source = m.encode('iso-8859-1') 345 expected = source.decode('ascii', 'replace') 346 msg = email.message_from_bytes(source) 347 self.assertEqual(msg.as_string(), expected) 348 349 def test_as_bytes(self): 350 msg = self._msgobj('msg_01.txt') 351 with openfile('msg_01.txt') as fp: 352 data = fp.read().encode('ascii') 353 self.assertEqual(data, bytes(msg)) 354 fullrepr = msg.as_bytes(unixfrom=True) 355 lines = fullrepr.split(b'\n') 356 self.assertTrue(lines[0].startswith(b'From ')) 357 self.assertEqual(data, b'\n'.join(lines[1:])) 358 359 def test_as_bytes_policy(self): 360 msg = self._msgobj('msg_01.txt') 361 newpolicy = msg.policy.clone(linesep='\r\n') 362 fullrepr = msg.as_bytes(policy=newpolicy) 363 s = BytesIO() 364 g = BytesGenerator(s,policy=newpolicy) 365 g.flatten(msg) 366 self.assertEqual(fullrepr, s.getvalue()) 367 368 # test_headerregistry.TestContentTypeHeader.bad_params 369 def test_bad_param(self): 370 msg = email.message_from_string("Content-Type: blarg; baz; boo\n") 371 self.assertEqual(msg.get_param('baz'), '') 372 373 def test_missing_filename(self): 374 msg = email.message_from_string("From: foo\n") 375 self.assertEqual(msg.get_filename(), None) 376 377 def test_bogus_filename(self): 378 msg = email.message_from_string( 379 "Content-Disposition: blarg; filename\n") 380 self.assertEqual(msg.get_filename(), '') 381 382 def test_missing_boundary(self): 383 msg = email.message_from_string("From: foo\n") 384 self.assertEqual(msg.get_boundary(), None) 385 386 def test_get_params(self): 387 eq = self.assertEqual 388 msg = email.message_from_string( 389 'X-Header: foo=one; bar=two; baz=three\n') 390 eq(msg.get_params(header='x-header'), 391 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) 392 msg = email.message_from_string( 393 'X-Header: foo; bar=one; baz=two\n') 394 eq(msg.get_params(header='x-header'), 395 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 396 eq(msg.get_params(), None) 397 msg = email.message_from_string( 398 'X-Header: foo; bar="one"; baz=two\n') 399 eq(msg.get_params(header='x-header'), 400 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 401 402 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals 403 def test_get_param_liberal(self): 404 msg = Message() 405 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' 406 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') 407 408 def test_get_param(self): 409 eq = self.assertEqual 410 msg = email.message_from_string( 411 "X-Header: foo=one; bar=two; baz=three\n") 412 eq(msg.get_param('bar', header='x-header'), 'two') 413 eq(msg.get_param('quuz', header='x-header'), None) 414 eq(msg.get_param('quuz'), None) 415 msg = email.message_from_string( 416 'X-Header: foo; bar="one"; baz=two\n') 417 eq(msg.get_param('foo', header='x-header'), '') 418 eq(msg.get_param('bar', header='x-header'), 'one') 419 eq(msg.get_param('baz', header='x-header'), 'two') 420 # XXX: We are not RFC-2045 compliant! We cannot parse: 421 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' 422 # msg.get_param("weird") 423 # yet. 424 425 # test_headerregistry.TestContentTypeHeader.spaces_around_semis 426 def test_get_param_funky_continuation_lines(self): 427 msg = self._msgobj('msg_22.txt') 428 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') 429 430 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes 431 def test_get_param_with_semis_in_quotes(self): 432 msg = email.message_from_string( 433 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') 434 self.assertEqual(msg.get_param('name'), 'Jim&&Jill') 435 self.assertEqual(msg.get_param('name', unquote=False), 436 '"Jim&&Jill"') 437 438 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value 439 def test_get_param_with_quotes(self): 440 msg = email.message_from_string( 441 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') 442 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 443 msg = email.message_from_string( 444 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") 445 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 446 447 def test_field_containment(self): 448 msg = email.message_from_string('Header: exists') 449 self.assertIn('header', msg) 450 self.assertIn('Header', msg) 451 self.assertIn('HEADER', msg) 452 self.assertNotIn('headerx', msg) 453 454 def test_set_param(self): 455 eq = self.assertEqual 456 msg = Message() 457 msg.set_param('charset', 'iso-2022-jp') 458 eq(msg.get_param('charset'), 'iso-2022-jp') 459 msg.set_param('importance', 'high value') 460 eq(msg.get_param('importance'), 'high value') 461 eq(msg.get_param('importance', unquote=False), '"high value"') 462 eq(msg.get_params(), [('text/plain', ''), 463 ('charset', 'iso-2022-jp'), 464 ('importance', 'high value')]) 465 eq(msg.get_params(unquote=False), [('text/plain', ''), 466 ('charset', '"iso-2022-jp"'), 467 ('importance', '"high value"')]) 468 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') 469 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') 470 471 def test_del_param(self): 472 eq = self.assertEqual 473 msg = self._msgobj('msg_05.txt') 474 eq(msg.get_params(), 475 [('multipart/report', ''), ('report-type', 'delivery-status'), 476 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 477 old_val = msg.get_param("report-type") 478 msg.del_param("report-type") 479 eq(msg.get_params(), 480 [('multipart/report', ''), 481 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 482 msg.set_param("report-type", old_val) 483 eq(msg.get_params(), 484 [('multipart/report', ''), 485 ('boundary', 'D1690A7AC1.996856090/mail.example.com'), 486 ('report-type', old_val)]) 487 488 def test_del_param_on_other_header(self): 489 msg = Message() 490 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') 491 msg.del_param('filename', 'content-disposition') 492 self.assertEqual(msg['content-disposition'], 'attachment') 493 494 def test_del_param_on_nonexistent_header(self): 495 msg = Message() 496 # Deleting param on empty msg should not raise exception. 497 msg.del_param('filename', 'content-disposition') 498 499 def test_del_nonexistent_param(self): 500 msg = Message() 501 msg.add_header('Content-Type', 'text/plain', charset='utf-8') 502 existing_header = msg['Content-Type'] 503 msg.del_param('foobar', header='Content-Type') 504 self.assertEqual(msg['Content-Type'], existing_header) 505 506 def test_set_type(self): 507 eq = self.assertEqual 508 msg = Message() 509 self.assertRaises(ValueError, msg.set_type, 'text') 510 msg.set_type('text/plain') 511 eq(msg['content-type'], 'text/plain') 512 msg.set_param('charset', 'us-ascii') 513 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 514 msg.set_type('text/html') 515 eq(msg['content-type'], 'text/html; charset="us-ascii"') 516 517 def test_set_type_on_other_header(self): 518 msg = Message() 519 msg['X-Content-Type'] = 'text/plain' 520 msg.set_type('application/octet-stream', 'X-Content-Type') 521 self.assertEqual(msg['x-content-type'], 'application/octet-stream') 522 523 def test_get_content_type_missing(self): 524 msg = Message() 525 self.assertEqual(msg.get_content_type(), 'text/plain') 526 527 def test_get_content_type_missing_with_default_type(self): 528 msg = Message() 529 msg.set_default_type('message/rfc822') 530 self.assertEqual(msg.get_content_type(), 'message/rfc822') 531 532 def test_get_content_type_from_message_implicit(self): 533 msg = self._msgobj('msg_30.txt') 534 self.assertEqual(msg.get_payload(0).get_content_type(), 535 'message/rfc822') 536 537 def test_get_content_type_from_message_explicit(self): 538 msg = self._msgobj('msg_28.txt') 539 self.assertEqual(msg.get_payload(0).get_content_type(), 540 'message/rfc822') 541 542 def test_get_content_type_from_message_text_plain_implicit(self): 543 msg = self._msgobj('msg_03.txt') 544 self.assertEqual(msg.get_content_type(), 'text/plain') 545 546 def test_get_content_type_from_message_text_plain_explicit(self): 547 msg = self._msgobj('msg_01.txt') 548 self.assertEqual(msg.get_content_type(), 'text/plain') 549 550 def test_get_content_maintype_missing(self): 551 msg = Message() 552 self.assertEqual(msg.get_content_maintype(), 'text') 553 554 def test_get_content_maintype_missing_with_default_type(self): 555 msg = Message() 556 msg.set_default_type('message/rfc822') 557 self.assertEqual(msg.get_content_maintype(), 'message') 558 559 def test_get_content_maintype_from_message_implicit(self): 560 msg = self._msgobj('msg_30.txt') 561 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 562 563 def test_get_content_maintype_from_message_explicit(self): 564 msg = self._msgobj('msg_28.txt') 565 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 566 567 def test_get_content_maintype_from_message_text_plain_implicit(self): 568 msg = self._msgobj('msg_03.txt') 569 self.assertEqual(msg.get_content_maintype(), 'text') 570 571 def test_get_content_maintype_from_message_text_plain_explicit(self): 572 msg = self._msgobj('msg_01.txt') 573 self.assertEqual(msg.get_content_maintype(), 'text') 574 575 def test_get_content_subtype_missing(self): 576 msg = Message() 577 self.assertEqual(msg.get_content_subtype(), 'plain') 578 579 def test_get_content_subtype_missing_with_default_type(self): 580 msg = Message() 581 msg.set_default_type('message/rfc822') 582 self.assertEqual(msg.get_content_subtype(), 'rfc822') 583 584 def test_get_content_subtype_from_message_implicit(self): 585 msg = self._msgobj('msg_30.txt') 586 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 587 588 def test_get_content_subtype_from_message_explicit(self): 589 msg = self._msgobj('msg_28.txt') 590 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 591 592 def test_get_content_subtype_from_message_text_plain_implicit(self): 593 msg = self._msgobj('msg_03.txt') 594 self.assertEqual(msg.get_content_subtype(), 'plain') 595 596 def test_get_content_subtype_from_message_text_plain_explicit(self): 597 msg = self._msgobj('msg_01.txt') 598 self.assertEqual(msg.get_content_subtype(), 'plain') 599 600 def test_get_content_maintype_error(self): 601 msg = Message() 602 msg['Content-Type'] = 'no-slash-in-this-string' 603 self.assertEqual(msg.get_content_maintype(), 'text') 604 605 def test_get_content_subtype_error(self): 606 msg = Message() 607 msg['Content-Type'] = 'no-slash-in-this-string' 608 self.assertEqual(msg.get_content_subtype(), 'plain') 609 610 def test_replace_header(self): 611 eq = self.assertEqual 612 msg = Message() 613 msg.add_header('First', 'One') 614 msg.add_header('Second', 'Two') 615 msg.add_header('Third', 'Three') 616 eq(msg.keys(), ['First', 'Second', 'Third']) 617 eq(msg.values(), ['One', 'Two', 'Three']) 618 msg.replace_header('Second', 'Twenty') 619 eq(msg.keys(), ['First', 'Second', 'Third']) 620 eq(msg.values(), ['One', 'Twenty', 'Three']) 621 msg.add_header('First', 'Eleven') 622 msg.replace_header('First', 'One Hundred') 623 eq(msg.keys(), ['First', 'Second', 'Third', 'First']) 624 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) 625 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') 626 627 def test_get_content_disposition(self): 628 msg = Message() 629 self.assertIsNone(msg.get_content_disposition()) 630 msg.add_header('Content-Disposition', 'attachment', 631 filename='random.avi') 632 self.assertEqual(msg.get_content_disposition(), 'attachment') 633 msg.replace_header('Content-Disposition', 'inline') 634 self.assertEqual(msg.get_content_disposition(), 'inline') 635 msg.replace_header('Content-Disposition', 'InlinE') 636 self.assertEqual(msg.get_content_disposition(), 'inline') 637 638 # test_defect_handling:test_invalid_chars_in_base64_payload 639 def test_broken_base64_payload(self): 640 x = 'AwDp0P7//y6LwKEAcPa/6Q=9' 641 msg = Message() 642 msg['content-type'] = 'audio/x-midi' 643 msg['content-transfer-encoding'] = 'base64' 644 msg.set_payload(x) 645 self.assertEqual(msg.get_payload(decode=True), 646 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0' 647 b'\xa1\x00p\xf6\xbf\xe9\x0f')) 648 self.assertIsInstance(msg.defects[0], 649 errors.InvalidBase64CharactersDefect) 650 651 def test_broken_unicode_payload(self): 652 # This test improves coverage but is not a compliance test. 653 # The behavior in this situation is currently undefined by the API. 654 x = 'this is a br\xf6ken thing to do' 655 msg = Message() 656 msg['content-type'] = 'text/plain' 657 msg['content-transfer-encoding'] = '8bit' 658 msg.set_payload(x) 659 self.assertEqual(msg.get_payload(decode=True), 660 bytes(x, 'raw-unicode-escape')) 661 662 def test_questionable_bytes_payload(self): 663 # This test improves coverage but is not a compliance test, 664 # since it involves poking inside the black box. 665 x = 'this is a quéstionable thing to do'.encode('utf-8') 666 msg = Message() 667 msg['content-type'] = 'text/plain; charset="utf-8"' 668 msg['content-transfer-encoding'] = '8bit' 669 msg._payload = x 670 self.assertEqual(msg.get_payload(decode=True), x) 671 672 # Issue 1078919 673 def test_ascii_add_header(self): 674 msg = Message() 675 msg.add_header('Content-Disposition', 'attachment', 676 filename='bud.gif') 677 self.assertEqual('attachment; filename="bud.gif"', 678 msg['Content-Disposition']) 679 680 def test_noascii_add_header(self): 681 msg = Message() 682 msg.add_header('Content-Disposition', 'attachment', 683 filename="Fußballer.ppt") 684 self.assertEqual( 685 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt', 686 msg['Content-Disposition']) 687 688 def test_nonascii_add_header_via_triple(self): 689 msg = Message() 690 msg.add_header('Content-Disposition', 'attachment', 691 filename=('iso-8859-1', '', 'Fußballer.ppt')) 692 self.assertEqual( 693 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt', 694 msg['Content-Disposition']) 695 696 def test_ascii_add_header_with_tspecial(self): 697 msg = Message() 698 msg.add_header('Content-Disposition', 'attachment', 699 filename="windows [filename].ppt") 700 self.assertEqual( 701 'attachment; filename="windows [filename].ppt"', 702 msg['Content-Disposition']) 703 704 def test_nonascii_add_header_with_tspecial(self): 705 msg = Message() 706 msg.add_header('Content-Disposition', 'attachment', 707 filename="Fußballer [filename].ppt") 708 self.assertEqual( 709 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", 710 msg['Content-Disposition']) 711 712 def test_binary_quopri_payload(self): 713 for charset in ('latin-1', 'ascii'): 714 msg = Message() 715 msg['content-type'] = 'text/plain; charset=%s' % charset 716 msg['content-transfer-encoding'] = 'quoted-printable' 717 msg.set_payload(b'foo=e6=96=87bar') 718 self.assertEqual( 719 msg.get_payload(decode=True), 720 b'foo\xe6\x96\x87bar', 721 'get_payload returns wrong result with charset %s.' % charset) 722 723 def test_binary_base64_payload(self): 724 for charset in ('latin-1', 'ascii'): 725 msg = Message() 726 msg['content-type'] = 'text/plain; charset=%s' % charset 727 msg['content-transfer-encoding'] = 'base64' 728 msg.set_payload(b'Zm9v5paHYmFy') 729 self.assertEqual( 730 msg.get_payload(decode=True), 731 b'foo\xe6\x96\x87bar', 732 'get_payload returns wrong result with charset %s.' % charset) 733 734 def test_binary_uuencode_payload(self): 735 for charset in ('latin-1', 'ascii'): 736 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 737 msg = Message() 738 msg['content-type'] = 'text/plain; charset=%s' % charset 739 msg['content-transfer-encoding'] = encoding 740 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") 741 self.assertEqual( 742 msg.get_payload(decode=True), 743 b'foo\xe6\x96\x87bar', 744 str(('get_payload returns wrong result ', 745 'with charset {0} and encoding {1}.')).\ 746 format(charset, encoding)) 747 748 def test_add_header_with_name_only_param(self): 749 msg = Message() 750 msg.add_header('Content-Disposition', 'inline', foo_bar=None) 751 self.assertEqual("inline; foo-bar", msg['Content-Disposition']) 752 753 def test_add_header_with_no_value(self): 754 msg = Message() 755 msg.add_header('X-Status', None) 756 self.assertEqual('', msg['X-Status']) 757 758 # Issue 5871: reject an attempt to embed a header inside a header value 759 # (header injection attack). 760 def test_embedded_header_via_Header_rejected(self): 761 msg = Message() 762 msg['Dummy'] = Header('dummy\nX-Injected-Header: test') 763 self.assertRaises(errors.HeaderParseError, msg.as_string) 764 765 def test_embedded_header_via_string_rejected(self): 766 msg = Message() 767 msg['Dummy'] = 'dummy\nX-Injected-Header: test' 768 self.assertRaises(errors.HeaderParseError, msg.as_string) 769 770 def test_unicode_header_defaults_to_utf8_encoding(self): 771 # Issue 14291 772 m = MIMEText('abc\n') 773 m['Subject'] = 'É test' 774 self.assertEqual(str(m),textwrap.dedent("""\ 775 Content-Type: text/plain; charset="us-ascii" 776 MIME-Version: 1.0 777 Content-Transfer-Encoding: 7bit 778 Subject: =?utf-8?q?=C3=89_test?= 779 780 abc 781 """)) 782 783 def test_unicode_body_defaults_to_utf8_encoding(self): 784 # Issue 14291 785 m = MIMEText('É testabc\n') 786 self.assertEqual(str(m),textwrap.dedent("""\ 787 Content-Type: text/plain; charset="utf-8" 788 MIME-Version: 1.0 789 Content-Transfer-Encoding: base64 790 791 w4kgdGVzdGFiYwo= 792 """)) 793 794 795# Test the email.encoders module 796class TestEncoders(unittest.TestCase): 797 798 def test_EncodersEncode_base64(self): 799 with openfile('PyBanner048.gif', 'rb') as fp: 800 bindata = fp.read() 801 mimed = email.mime.image.MIMEImage(bindata) 802 base64ed = mimed.get_payload() 803 # the transfer-encoded body lines should all be <=76 characters 804 lines = base64ed.split('\n') 805 self.assertLessEqual(max([ len(x) for x in lines ]), 76) 806 807 def test_encode_empty_payload(self): 808 eq = self.assertEqual 809 msg = Message() 810 msg.set_charset('us-ascii') 811 eq(msg['content-transfer-encoding'], '7bit') 812 813 def test_default_cte(self): 814 eq = self.assertEqual 815 # 7bit data and the default us-ascii _charset 816 msg = MIMEText('hello world') 817 eq(msg['content-transfer-encoding'], '7bit') 818 # Similar, but with 8bit data 819 msg = MIMEText('hello \xf8 world') 820 eq(msg['content-transfer-encoding'], 'base64') 821 # And now with a different charset 822 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') 823 eq(msg['content-transfer-encoding'], 'quoted-printable') 824 825 def test_encode7or8bit(self): 826 # Make sure a charset whose input character set is 8bit but 827 # whose output character set is 7bit gets a transfer-encoding 828 # of 7bit. 829 eq = self.assertEqual 830 msg = MIMEText('文\n', _charset='euc-jp') 831 eq(msg['content-transfer-encoding'], '7bit') 832 eq(msg.as_string(), textwrap.dedent("""\ 833 MIME-Version: 1.0 834 Content-Type: text/plain; charset="iso-2022-jp" 835 Content-Transfer-Encoding: 7bit 836 837 \x1b$BJ8\x1b(B 838 """)) 839 840 def test_qp_encode_latin1(self): 841 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') 842 self.assertEqual(str(msg), textwrap.dedent("""\ 843 MIME-Version: 1.0 844 Content-Type: text/text; charset="iso-8859-1" 845 Content-Transfer-Encoding: quoted-printable 846 847 =E1=F6 848 """)) 849 850 def test_qp_encode_non_latin1(self): 851 # Issue 16948 852 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') 853 self.assertEqual(str(msg), textwrap.dedent("""\ 854 MIME-Version: 1.0 855 Content-Type: text/text; charset="iso-8859-2" 856 Content-Transfer-Encoding: quoted-printable 857 858 =BF 859 """)) 860 861 862# Test long header wrapping 863class TestLongHeaders(TestEmailBase): 864 865 maxDiff = None 866 867 def test_split_long_continuation(self): 868 eq = self.ndiffAssertEqual 869 msg = email.message_from_string("""\ 870Subject: bug demonstration 871\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 872\tmore text 873 874test 875""") 876 sfp = StringIO() 877 g = Generator(sfp) 878 g.flatten(msg) 879 eq(sfp.getvalue(), """\ 880Subject: bug demonstration 881\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 882\tmore text 883 884test 885""") 886 887 def test_another_long_almost_unsplittable_header(self): 888 eq = self.ndiffAssertEqual 889 hstr = """\ 890bug demonstration 891\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 892\tmore text""" 893 h = Header(hstr, continuation_ws='\t') 894 eq(h.encode(), """\ 895bug demonstration 896\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 897\tmore text""") 898 h = Header(hstr.replace('\t', ' ')) 899 eq(h.encode(), """\ 900bug demonstration 901 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 902 more text""") 903 904 def test_long_nonstring(self): 905 eq = self.ndiffAssertEqual 906 g = Charset("iso-8859-1") 907 cz = Charset("iso-8859-2") 908 utf8 = Charset("utf-8") 909 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband ' 910 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 911 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 912 b'bef\xf6rdert. ') 913 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 914 b'd\xf9vtipu.. ') 915 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 916 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 917 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 918 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 919 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 920 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 921 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 922 '\u3044\u307e\u3059\u3002') 923 h = Header(g_head, g, header_name='Subject') 924 h.append(cz_head, cz) 925 h.append(utf8_head, utf8) 926 msg = Message() 927 msg['Subject'] = h 928 sfp = StringIO() 929 g = Generator(sfp) 930 g.flatten(msg) 931 eq(sfp.getvalue(), """\ 932Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?= 933 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?= 934 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?= 935 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?= 936 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 937 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= 938 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?= 939 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?= 940 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?= 941 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?= 942 =?utf-8?b?44CC?= 943 944""") 945 eq(h.encode(maxlinelen=76), """\ 946=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?= 947 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?= 948 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?= 949 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?= 950 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= 951 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?= 952 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?= 953 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?= 954 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?= 955 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?= 956 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") 957 958 def test_long_header_encode(self): 959 eq = self.ndiffAssertEqual 960 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 961 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 962 header_name='X-Foobar-Spoink-Defrobnit') 963 eq(h.encode(), '''\ 964wasnipoop; giraffes="very-long-necked-animals"; 965 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 966 967 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self): 968 eq = self.ndiffAssertEqual 969 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 970 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 971 header_name='X-Foobar-Spoink-Defrobnit', 972 continuation_ws='\t') 973 eq(h.encode(), '''\ 974wasnipoop; giraffes="very-long-necked-animals"; 975 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 976 977 def test_long_header_encode_with_tab_continuation(self): 978 eq = self.ndiffAssertEqual 979 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t' 980 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 981 header_name='X-Foobar-Spoink-Defrobnit', 982 continuation_ws='\t') 983 eq(h.encode(), '''\ 984wasnipoop; giraffes="very-long-necked-animals"; 985\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 986 987 def test_header_encode_with_different_output_charset(self): 988 h = Header('文', 'euc-jp') 989 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=") 990 991 def test_long_header_encode_with_different_output_charset(self): 992 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4' 993 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4' 994 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4' 995 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp') 996 res = """\ 997=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?= 998 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=""" 999 self.assertEqual(h.encode(), res) 1000 1001 def test_header_splitter(self): 1002 eq = self.ndiffAssertEqual 1003 msg = MIMEText('') 1004 # It'd be great if we could use add_header() here, but that doesn't 1005 # guarantee an order of the parameters. 1006 msg['X-Foobar-Spoink-Defrobnit'] = ( 1007 'wasnipoop; giraffes="very-long-necked-animals"; ' 1008 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') 1009 sfp = StringIO() 1010 g = Generator(sfp) 1011 g.flatten(msg) 1012 eq(sfp.getvalue(), '''\ 1013Content-Type: text/plain; charset="us-ascii" 1014MIME-Version: 1.0 1015Content-Transfer-Encoding: 7bit 1016X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; 1017 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" 1018 1019''') 1020 1021 def test_no_semis_header_splitter(self): 1022 eq = self.ndiffAssertEqual 1023 msg = Message() 1024 msg['From'] = 'test@dom.ain' 1025 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10)) 1026 msg.set_payload('Test') 1027 sfp = StringIO() 1028 g = Generator(sfp) 1029 g.flatten(msg) 1030 eq(sfp.getvalue(), """\ 1031From: test@dom.ain 1032References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> 1033 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> 1034 1035Test""") 1036 1037 def test_last_split_chunk_does_not_fit(self): 1038 eq = self.ndiffAssertEqual 1039 h = Header('Subject: the first part of this is short, but_the_second' 1040 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1041 '_all_by_itself') 1042 eq(h.encode(), """\ 1043Subject: the first part of this is short, 1044 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1045 1046 def test_splittable_leading_char_followed_by_overlong_unsplittable(self): 1047 eq = self.ndiffAssertEqual 1048 h = Header(', but_the_second' 1049 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1050 '_all_by_itself') 1051 eq(h.encode(), """\ 1052, 1053 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1054 1055 def test_multiple_splittable_leading_char_followed_by_overlong_unsplittable(self): 1056 eq = self.ndiffAssertEqual 1057 h = Header(', , but_the_second' 1058 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1059 '_all_by_itself') 1060 eq(h.encode(), """\ 1061, , 1062 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1063 1064 def test_trailing_splittable_on_overlong_unsplittable(self): 1065 eq = self.ndiffAssertEqual 1066 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1067 'be_on_a_line_all_by_itself;') 1068 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" 1069 "be_on_a_line_all_by_itself;") 1070 1071 def test_trailing_splittable_on_overlong_unsplittable_with_leading_splittable(self): 1072 eq = self.ndiffAssertEqual 1073 h = Header('; ' 1074 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1075 'be_on_a_line_all_by_itself; ') 1076 eq(h.encode(), """\ 1077; 1078 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1079 1080 def test_long_header_with_multiple_sequential_split_chars(self): 1081 eq = self.ndiffAssertEqual 1082 h = Header('This is a long line that has two whitespaces in a row. ' 1083 'This used to cause truncation of the header when folded') 1084 eq(h.encode(), """\ 1085This is a long line that has two whitespaces in a row. This used to cause 1086 truncation of the header when folded""") 1087 1088 def test_splitter_split_on_punctuation_only_if_fws_with_header(self): 1089 eq = self.ndiffAssertEqual 1090 h = Header('thisverylongheaderhas;semicolons;and,commas,but' 1091 'they;arenotlegal;fold,points') 1092 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;" 1093 "arenotlegal;fold,points") 1094 1095 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): 1096 eq = self.ndiffAssertEqual 1097 h = Header('this is a test where we need to have more than one line ' 1098 'before; our final line that is just too big to fit;; ' 1099 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1100 'be_on_a_line_all_by_itself;') 1101 eq(h.encode(), """\ 1102this is a test where we need to have more than one line before; 1103 our final line that is just too big to fit;; 1104 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") 1105 1106 def test_overlong_last_part_followed_by_split_point(self): 1107 eq = self.ndiffAssertEqual 1108 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1109 'be_on_a_line_all_by_itself ') 1110 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_" 1111 "should_be_on_a_line_all_by_itself ") 1112 1113 def test_multiline_with_overlong_parts_separated_by_two_split_points(self): 1114 eq = self.ndiffAssertEqual 1115 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_' 1116 'before_our_final_line_; ; ' 1117 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1118 'be_on_a_line_all_by_itself; ') 1119 eq(h.encode(), """\ 1120this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_; 1121 ; 1122 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1123 1124 def test_multiline_with_overlong_last_part_followed_by_split_point(self): 1125 eq = self.ndiffAssertEqual 1126 h = Header('this is a test where we need to have more than one line ' 1127 'before our final line; ; ' 1128 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1129 'be_on_a_line_all_by_itself; ') 1130 eq(h.encode(), """\ 1131this is a test where we need to have more than one line before our final line; 1132 ; 1133 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1134 1135 def test_long_header_with_whitespace_runs(self): 1136 eq = self.ndiffAssertEqual 1137 msg = Message() 1138 msg['From'] = 'test@dom.ain' 1139 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10) 1140 msg.set_payload('Test') 1141 sfp = StringIO() 1142 g = Generator(sfp) 1143 g.flatten(msg) 1144 eq(sfp.getvalue(), """\ 1145From: test@dom.ain 1146References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1147 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1148 <foo@dom.ain> <foo@dom.ain>\x20\x20 1149 1150Test""") 1151 1152 def test_long_run_with_semi_header_splitter(self): 1153 eq = self.ndiffAssertEqual 1154 msg = Message() 1155 msg['From'] = 'test@dom.ain' 1156 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc' 1157 msg.set_payload('Test') 1158 sfp = StringIO() 1159 g = Generator(sfp) 1160 g.flatten(msg) 1161 eq(sfp.getvalue(), """\ 1162From: test@dom.ain 1163References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1164 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1165 <foo@dom.ain>; abc 1166 1167Test""") 1168 1169 def test_splitter_split_on_punctuation_only_if_fws(self): 1170 eq = self.ndiffAssertEqual 1171 msg = Message() 1172 msg['From'] = 'test@dom.ain' 1173 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' 1174 'they;arenotlegal;fold,points') 1175 msg.set_payload('Test') 1176 sfp = StringIO() 1177 g = Generator(sfp) 1178 g.flatten(msg) 1179 # XXX the space after the header should not be there. 1180 eq(sfp.getvalue(), """\ 1181From: test@dom.ain 1182References:\x20 1183 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points 1184 1185Test""") 1186 1187 def test_no_split_long_header(self): 1188 eq = self.ndiffAssertEqual 1189 hstr = 'References: ' + 'x' * 80 1190 h = Header(hstr) 1191 # These come on two lines because Headers are really field value 1192 # classes and don't really know about their field names. 1193 eq(h.encode(), """\ 1194References: 1195 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") 1196 h = Header('x' * 80) 1197 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') 1198 1199 def test_splitting_multiple_long_lines(self): 1200 eq = self.ndiffAssertEqual 1201 hstr = """\ 1202from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1203\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1204\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1205""" 1206 h = Header(hstr, continuation_ws='\t') 1207 eq(h.encode(), """\ 1208from babylon.socal-raves.org (localhost [127.0.0.1]); 1209 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1210 for <mailman-admin@babylon.socal-raves.org>; 1211 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1212\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1213 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1214 for <mailman-admin@babylon.socal-raves.org>; 1215 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1216\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1217 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1218 for <mailman-admin@babylon.socal-raves.org>; 1219 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""") 1220 1221 def test_splitting_first_line_only_is_long(self): 1222 eq = self.ndiffAssertEqual 1223 hstr = """\ 1224from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) 1225\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1226\tid 17k4h5-00034i-00 1227\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" 1228 h = Header(hstr, maxlinelen=78, header_name='Received', 1229 continuation_ws='\t') 1230 eq(h.encode(), """\ 1231from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] 1232 helo=cthulhu.gerg.ca) 1233\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1234\tid 17k4h5-00034i-00 1235\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") 1236 1237 def test_long_8bit_header(self): 1238 eq = self.ndiffAssertEqual 1239 msg = Message() 1240 h = Header('Britische Regierung gibt', 'iso-8859-1', 1241 header_name='Subject') 1242 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') 1243 eq(h.encode(maxlinelen=76), """\ 1244=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1245 =?iso-8859-1?q?hore-Windkraftprojekte?=""") 1246 msg['Subject'] = h 1247 eq(msg.as_string(maxheaderlen=76), """\ 1248Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1249 =?iso-8859-1?q?hore-Windkraftprojekte?= 1250 1251""") 1252 eq(msg.as_string(maxheaderlen=0), """\ 1253Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= 1254 1255""") 1256 1257 def test_long_8bit_header_no_charset(self): 1258 eq = self.ndiffAssertEqual 1259 msg = Message() 1260 header_string = ('Britische Regierung gibt gr\xfcnes Licht ' 1261 'f\xfcr Offshore-Windkraftprojekte ' 1262 '<a-very-long-address@example.com>') 1263 msg['Reply-To'] = header_string 1264 eq(msg.as_string(maxheaderlen=78), """\ 1265Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1266 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1267 1268""") 1269 msg = Message() 1270 msg['Reply-To'] = Header(header_string, 1271 header_name='Reply-To') 1272 eq(msg.as_string(maxheaderlen=78), """\ 1273Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1274 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1275 1276""") 1277 1278 def test_long_to_header(self): 1279 eq = self.ndiffAssertEqual 1280 to = ('"Someone Test #A" <someone@eecs.umich.edu>,' 1281 '<someone@eecs.umich.edu>, ' 1282 '"Someone Test #B" <someone@umich.edu>, ' 1283 '"Someone Test #C" <someone@eecs.umich.edu>, ' 1284 '"Someone Test #D" <someone@eecs.umich.edu>') 1285 msg = Message() 1286 msg['To'] = to 1287 eq(msg.as_string(maxheaderlen=78), '''\ 1288To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>, 1289 "Someone Test #B" <someone@umich.edu>, 1290 "Someone Test #C" <someone@eecs.umich.edu>, 1291 "Someone Test #D" <someone@eecs.umich.edu> 1292 1293''') 1294 1295 def test_long_line_after_append(self): 1296 eq = self.ndiffAssertEqual 1297 s = 'This is an example of string which has almost the limit of header length.' 1298 h = Header(s) 1299 h.append('Add another line.') 1300 eq(h.encode(maxlinelen=76), """\ 1301This is an example of string which has almost the limit of header length. 1302 Add another line.""") 1303 1304 def test_shorter_line_with_append(self): 1305 eq = self.ndiffAssertEqual 1306 s = 'This is a shorter line.' 1307 h = Header(s) 1308 h.append('Add another sentence. (Surprise?)') 1309 eq(h.encode(), 1310 'This is a shorter line. Add another sentence. (Surprise?)') 1311 1312 def test_long_field_name(self): 1313 eq = self.ndiffAssertEqual 1314 fn = 'X-Very-Very-Very-Long-Header-Name' 1315 gs = ('Die Mieter treten hier ein werden mit einem Foerderband ' 1316 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 1317 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 1318 'bef\xf6rdert. ') 1319 h = Header(gs, 'iso-8859-1', header_name=fn) 1320 # BAW: this seems broken because the first line is too long 1321 eq(h.encode(maxlinelen=76), """\ 1322=?iso-8859-1?q?Die_Mieter_treten_hier_e?= 1323 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?= 1324 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?= 1325 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") 1326 1327 def test_long_received_header(self): 1328 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) ' 1329 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; ' 1330 'Wed, 05 Mar 2003 18:10:18 -0700') 1331 msg = Message() 1332 msg['Received-1'] = Header(h, continuation_ws='\t') 1333 msg['Received-2'] = h 1334 # This should be splitting on spaces not semicolons. 1335 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1336Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1337 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1338 Wed, 05 Mar 2003 18:10:18 -0700 1339Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1340 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1341 Wed, 05 Mar 2003 18:10:18 -0700 1342 1343""") 1344 1345 def test_string_headerinst_eq(self): 1346 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.' 1347 'tu-muenchen.de> (David Bremner\'s message of ' 1348 '"Thu, 6 Mar 2003 13:58:21 +0100")') 1349 msg = Message() 1350 msg['Received-1'] = Header(h, header_name='Received-1', 1351 continuation_ws='\t') 1352 msg['Received-2'] = h 1353 # XXX The space after the ':' should not be there. 1354 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1355Received-1:\x20 1356 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1357 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1358Received-2:\x20 1359 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1360 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1361 1362""") 1363 1364 def test_long_unbreakable_lines_with_continuation(self): 1365 eq = self.ndiffAssertEqual 1366 msg = Message() 1367 t = """\ 1368iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1369 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" 1370 msg['Face-1'] = t 1371 msg['Face-2'] = Header(t, header_name='Face-2') 1372 msg['Face-3'] = ' ' + t 1373 # XXX This splitting is all wrong. It the first value line should be 1374 # snug against the field name or the space after the header not there. 1375 eq(msg.as_string(maxheaderlen=78), """\ 1376Face-1:\x20 1377 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1378 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1379Face-2:\x20 1380 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1381 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1382Face-3:\x20 1383 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1384 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1385 1386""") 1387 1388 def test_another_long_multiline_header(self): 1389 eq = self.ndiffAssertEqual 1390 m = ('Received: from siimage.com ' 1391 '([172.25.1.3]) by zima.siliconimage.com with ' 1392 'Microsoft SMTPSVC(5.0.2195.4905); ' 1393 'Wed, 16 Oct 2002 07:41:11 -0700') 1394 msg = email.message_from_string(m) 1395 eq(msg.as_string(maxheaderlen=78), '''\ 1396Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with 1397 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 1398 1399''') 1400 1401 def test_long_lines_with_different_header(self): 1402 eq = self.ndiffAssertEqual 1403 h = ('List-Unsubscribe: ' 1404 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,' 1405 ' <mailto:spamassassin-talk-request@lists.sourceforge.net' 1406 '?subject=unsubscribe>') 1407 msg = Message() 1408 msg['List'] = h 1409 msg['List'] = Header(h, header_name='List') 1410 eq(msg.as_string(maxheaderlen=78), """\ 1411List: List-Unsubscribe: 1412 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1413 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1414List: List-Unsubscribe: 1415 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1416 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1417 1418""") 1419 1420 def test_long_rfc2047_header_with_embedded_fws(self): 1421 h = Header(textwrap.dedent("""\ 1422 We're going to pretend this header is in a non-ascii character set 1423 \tto see if line wrapping with encoded words and embedded 1424 folding white space works"""), 1425 charset='utf-8', 1426 header_name='Test') 1427 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ 1428 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= 1429 =?utf-8?q?cter_set?= 1430 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= 1431 =?utf-8?q?_folding_white_space_works?=""")+'\n') 1432 1433 1434 1435# Test mangling of "From " lines in the body of a message 1436class TestFromMangling(unittest.TestCase): 1437 def setUp(self): 1438 self.msg = Message() 1439 self.msg['From'] = 'aaa@bbb.org' 1440 self.msg.set_payload("""\ 1441From the desk of A.A.A.: 1442Blah blah blah 1443""") 1444 1445 def test_mangled_from(self): 1446 s = StringIO() 1447 g = Generator(s, mangle_from_=True) 1448 g.flatten(self.msg) 1449 self.assertEqual(s.getvalue(), """\ 1450From: aaa@bbb.org 1451 1452>From the desk of A.A.A.: 1453Blah blah blah 1454""") 1455 1456 def test_dont_mangle_from(self): 1457 s = StringIO() 1458 g = Generator(s, mangle_from_=False) 1459 g.flatten(self.msg) 1460 self.assertEqual(s.getvalue(), """\ 1461From: aaa@bbb.org 1462 1463From the desk of A.A.A.: 1464Blah blah blah 1465""") 1466 1467 def test_mangle_from_in_preamble_and_epilog(self): 1468 s = StringIO() 1469 g = Generator(s, mangle_from_=True) 1470 msg = email.message_from_string(textwrap.dedent("""\ 1471 From: foo@bar.com 1472 Mime-Version: 1.0 1473 Content-Type: multipart/mixed; boundary=XXX 1474 1475 From somewhere unknown 1476 1477 --XXX 1478 Content-Type: text/plain 1479 1480 foo 1481 1482 --XXX-- 1483 1484 From somewhere unknowable 1485 """)) 1486 g.flatten(msg) 1487 self.assertEqual(len([1 for x in s.getvalue().split('\n') 1488 if x.startswith('>From ')]), 2) 1489 1490 def test_mangled_from_with_bad_bytes(self): 1491 source = textwrap.dedent("""\ 1492 Content-Type: text/plain; charset="utf-8" 1493 MIME-Version: 1.0 1494 Content-Transfer-Encoding: 8bit 1495 From: aaa@bbb.org 1496 1497 """).encode('utf-8') 1498 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') 1499 b = BytesIO() 1500 g = BytesGenerator(b, mangle_from_=True) 1501 g.flatten(msg) 1502 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n') 1503 1504 def test_multipart_with_bad_bytes_in_cte(self): 1505 # bpo30835 1506 source = textwrap.dedent("""\ 1507 From: aperson@example.com 1508 Content-Type: multipart/mixed; boundary="1" 1509 Content-Transfer-Encoding: \xc8 1510 """).encode('utf-8') 1511 msg = email.message_from_bytes(source) 1512 1513 1514# Test the basic MIMEAudio class 1515class TestMIMEAudio(unittest.TestCase): 1516 def setUp(self): 1517 with openfile('audiotest.au', 'rb') as fp: 1518 self._audiodata = fp.read() 1519 self._au = MIMEAudio(self._audiodata) 1520 1521 def test_guess_minor_type(self): 1522 self.assertEqual(self._au.get_content_type(), 'audio/basic') 1523 1524 def test_encoding(self): 1525 payload = self._au.get_payload() 1526 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1527 self._audiodata) 1528 1529 def test_checkSetMinor(self): 1530 au = MIMEAudio(self._audiodata, 'fish') 1531 self.assertEqual(au.get_content_type(), 'audio/fish') 1532 1533 def test_add_header(self): 1534 eq = self.assertEqual 1535 self._au.add_header('Content-Disposition', 'attachment', 1536 filename='audiotest.au') 1537 eq(self._au['content-disposition'], 1538 'attachment; filename="audiotest.au"') 1539 eq(self._au.get_params(header='content-disposition'), 1540 [('attachment', ''), ('filename', 'audiotest.au')]) 1541 eq(self._au.get_param('filename', header='content-disposition'), 1542 'audiotest.au') 1543 missing = [] 1544 eq(self._au.get_param('attachment', header='content-disposition'), '') 1545 self.assertIs(self._au.get_param('foo', failobj=missing, 1546 header='content-disposition'), missing) 1547 # Try some missing stuff 1548 self.assertIs(self._au.get_param('foobar', missing), missing) 1549 self.assertIs(self._au.get_param('attachment', missing, 1550 header='foobar'), missing) 1551 1552 1553 1554# Test the basic MIMEImage class 1555class TestMIMEImage(unittest.TestCase): 1556 def setUp(self): 1557 with openfile('PyBanner048.gif', 'rb') as fp: 1558 self._imgdata = fp.read() 1559 self._im = MIMEImage(self._imgdata) 1560 1561 def test_guess_minor_type(self): 1562 self.assertEqual(self._im.get_content_type(), 'image/gif') 1563 1564 def test_encoding(self): 1565 payload = self._im.get_payload() 1566 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1567 self._imgdata) 1568 1569 def test_checkSetMinor(self): 1570 im = MIMEImage(self._imgdata, 'fish') 1571 self.assertEqual(im.get_content_type(), 'image/fish') 1572 1573 def test_add_header(self): 1574 eq = self.assertEqual 1575 self._im.add_header('Content-Disposition', 'attachment', 1576 filename='dingusfish.gif') 1577 eq(self._im['content-disposition'], 1578 'attachment; filename="dingusfish.gif"') 1579 eq(self._im.get_params(header='content-disposition'), 1580 [('attachment', ''), ('filename', 'dingusfish.gif')]) 1581 eq(self._im.get_param('filename', header='content-disposition'), 1582 'dingusfish.gif') 1583 missing = [] 1584 eq(self._im.get_param('attachment', header='content-disposition'), '') 1585 self.assertIs(self._im.get_param('foo', failobj=missing, 1586 header='content-disposition'), missing) 1587 # Try some missing stuff 1588 self.assertIs(self._im.get_param('foobar', missing), missing) 1589 self.assertIs(self._im.get_param('attachment', missing, 1590 header='foobar'), missing) 1591 1592 1593 1594# Test the basic MIMEApplication class 1595class TestMIMEApplication(unittest.TestCase): 1596 def test_headers(self): 1597 eq = self.assertEqual 1598 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff') 1599 eq(msg.get_content_type(), 'application/octet-stream') 1600 eq(msg['content-transfer-encoding'], 'base64') 1601 1602 def test_body(self): 1603 eq = self.assertEqual 1604 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1605 msg = MIMEApplication(bytesdata) 1606 # whitespace in the cte encoded block is RFC-irrelevant. 1607 eq(msg.get_payload().strip(), '+vv8/f7/') 1608 eq(msg.get_payload(decode=True), bytesdata) 1609 1610 def test_binary_body_with_encode_7or8bit(self): 1611 # Issue 17171. 1612 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1613 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit) 1614 # Treated as a string, this will be invalid code points. 1615 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1616 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1617 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit') 1618 s = BytesIO() 1619 g = BytesGenerator(s) 1620 g.flatten(msg) 1621 wireform = s.getvalue() 1622 msg2 = email.message_from_bytes(wireform) 1623 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1624 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1625 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit') 1626 1627 def test_binary_body_with_encode_noop(self): 1628 # Issue 16564: This does not produce an RFC valid message, since to be 1629 # valid it should have a CTE of binary. But the below works in 1630 # Python2, and is documented as working this way. 1631 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1632 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1633 # Treated as a string, this will be invalid code points. 1634 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1635 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1636 s = BytesIO() 1637 g = BytesGenerator(s) 1638 g.flatten(msg) 1639 wireform = s.getvalue() 1640 msg2 = email.message_from_bytes(wireform) 1641 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1642 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1643 1644 def test_binary_body_with_unicode_linend_encode_noop(self): 1645 # Issue 19003: This is a variation on #16564. 1646 bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff' 1647 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1648 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1649 s = BytesIO() 1650 g = BytesGenerator(s) 1651 g.flatten(msg) 1652 wireform = s.getvalue() 1653 msg2 = email.message_from_bytes(wireform) 1654 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1655 1656 def test_binary_body_with_encode_quopri(self): 1657 # Issue 14360. 1658 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff ' 1659 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri) 1660 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1661 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1662 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable') 1663 s = BytesIO() 1664 g = BytesGenerator(s) 1665 g.flatten(msg) 1666 wireform = s.getvalue() 1667 msg2 = email.message_from_bytes(wireform) 1668 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1669 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1670 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable') 1671 1672 def test_binary_body_with_encode_base64(self): 1673 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1674 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64) 1675 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1676 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1677 s = BytesIO() 1678 g = BytesGenerator(s) 1679 g.flatten(msg) 1680 wireform = s.getvalue() 1681 msg2 = email.message_from_bytes(wireform) 1682 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1683 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1684 1685 1686# Test the basic MIMEText class 1687class TestMIMEText(unittest.TestCase): 1688 def setUp(self): 1689 self._msg = MIMEText('hello there') 1690 1691 def test_types(self): 1692 eq = self.assertEqual 1693 eq(self._msg.get_content_type(), 'text/plain') 1694 eq(self._msg.get_param('charset'), 'us-ascii') 1695 missing = [] 1696 self.assertIs(self._msg.get_param('foobar', missing), missing) 1697 self.assertIs(self._msg.get_param('charset', missing, header='foobar'), 1698 missing) 1699 1700 def test_payload(self): 1701 self.assertEqual(self._msg.get_payload(), 'hello there') 1702 self.assertFalse(self._msg.is_multipart()) 1703 1704 def test_charset(self): 1705 eq = self.assertEqual 1706 msg = MIMEText('hello there', _charset='us-ascii') 1707 eq(msg.get_charset().input_charset, 'us-ascii') 1708 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1709 # Also accept a Charset instance 1710 charset = Charset('utf-8') 1711 charset.body_encoding = None 1712 msg = MIMEText('hello there', _charset=charset) 1713 eq(msg.get_charset().input_charset, 'utf-8') 1714 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1715 eq(msg.get_payload(), 'hello there') 1716 1717 def test_7bit_input(self): 1718 eq = self.assertEqual 1719 msg = MIMEText('hello there', _charset='us-ascii') 1720 eq(msg.get_charset().input_charset, 'us-ascii') 1721 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1722 1723 def test_7bit_input_no_charset(self): 1724 eq = self.assertEqual 1725 msg = MIMEText('hello there') 1726 eq(msg.get_charset(), 'us-ascii') 1727 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1728 self.assertIn('hello there', msg.as_string()) 1729 1730 def test_utf8_input(self): 1731 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1732 eq = self.assertEqual 1733 msg = MIMEText(teststr, _charset='utf-8') 1734 eq(msg.get_charset().output_charset, 'utf-8') 1735 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1736 eq(msg.get_payload(decode=True), teststr.encode('utf-8')) 1737 1738 @unittest.skip("can't fix because of backward compat in email5, " 1739 "will fix in email6") 1740 def test_utf8_input_no_charset(self): 1741 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1742 self.assertRaises(UnicodeEncodeError, MIMEText, teststr) 1743 1744 1745 1746# Test complicated multipart/* messages 1747class TestMultipart(TestEmailBase): 1748 def setUp(self): 1749 with openfile('PyBanner048.gif', 'rb') as fp: 1750 data = fp.read() 1751 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') 1752 image = MIMEImage(data, name='dingusfish.gif') 1753 image.add_header('content-disposition', 'attachment', 1754 filename='dingusfish.gif') 1755 intro = MIMEText('''\ 1756Hi there, 1757 1758This is the dingus fish. 1759''') 1760 container.attach(intro) 1761 container.attach(image) 1762 container['From'] = 'Barry <barry@digicool.com>' 1763 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>' 1764 container['Subject'] = 'Here is your dingus fish' 1765 1766 now = 987809702.54848599 1767 timetuple = time.localtime(now) 1768 if timetuple[-1] == 0: 1769 tzsecs = time.timezone 1770 else: 1771 tzsecs = time.altzone 1772 if tzsecs > 0: 1773 sign = '-' 1774 else: 1775 sign = '+' 1776 tzoffset = ' %s%04d' % (sign, tzsecs / 36) 1777 container['Date'] = time.strftime( 1778 '%a, %d %b %Y %H:%M:%S', 1779 time.localtime(now)) + tzoffset 1780 self._msg = container 1781 self._im = image 1782 self._txt = intro 1783 1784 def test_hierarchy(self): 1785 # convenience 1786 eq = self.assertEqual 1787 raises = self.assertRaises 1788 # tests 1789 m = self._msg 1790 self.assertTrue(m.is_multipart()) 1791 eq(m.get_content_type(), 'multipart/mixed') 1792 eq(len(m.get_payload()), 2) 1793 raises(IndexError, m.get_payload, 2) 1794 m0 = m.get_payload(0) 1795 m1 = m.get_payload(1) 1796 self.assertIs(m0, self._txt) 1797 self.assertIs(m1, self._im) 1798 eq(m.get_payload(), [m0, m1]) 1799 self.assertFalse(m0.is_multipart()) 1800 self.assertFalse(m1.is_multipart()) 1801 1802 def test_empty_multipart_idempotent(self): 1803 text = """\ 1804Content-Type: multipart/mixed; boundary="BOUNDARY" 1805MIME-Version: 1.0 1806Subject: A subject 1807To: aperson@dom.ain 1808From: bperson@dom.ain 1809 1810 1811--BOUNDARY 1812 1813 1814--BOUNDARY-- 1815""" 1816 msg = Parser().parsestr(text) 1817 self.ndiffAssertEqual(text, msg.as_string()) 1818 1819 def test_no_parts_in_a_multipart_with_none_epilogue(self): 1820 outer = MIMEBase('multipart', 'mixed') 1821 outer['Subject'] = 'A subject' 1822 outer['To'] = 'aperson@dom.ain' 1823 outer['From'] = 'bperson@dom.ain' 1824 outer.set_boundary('BOUNDARY') 1825 self.ndiffAssertEqual(outer.as_string(), '''\ 1826Content-Type: multipart/mixed; boundary="BOUNDARY" 1827MIME-Version: 1.0 1828Subject: A subject 1829To: aperson@dom.ain 1830From: bperson@dom.ain 1831 1832--BOUNDARY 1833 1834--BOUNDARY-- 1835''') 1836 1837 def test_no_parts_in_a_multipart_with_empty_epilogue(self): 1838 outer = MIMEBase('multipart', 'mixed') 1839 outer['Subject'] = 'A subject' 1840 outer['To'] = 'aperson@dom.ain' 1841 outer['From'] = 'bperson@dom.ain' 1842 outer.preamble = '' 1843 outer.epilogue = '' 1844 outer.set_boundary('BOUNDARY') 1845 self.ndiffAssertEqual(outer.as_string(), '''\ 1846Content-Type: multipart/mixed; boundary="BOUNDARY" 1847MIME-Version: 1.0 1848Subject: A subject 1849To: aperson@dom.ain 1850From: bperson@dom.ain 1851 1852 1853--BOUNDARY 1854 1855--BOUNDARY-- 1856''') 1857 1858 def test_one_part_in_a_multipart(self): 1859 eq = self.ndiffAssertEqual 1860 outer = MIMEBase('multipart', 'mixed') 1861 outer['Subject'] = 'A subject' 1862 outer['To'] = 'aperson@dom.ain' 1863 outer['From'] = 'bperson@dom.ain' 1864 outer.set_boundary('BOUNDARY') 1865 msg = MIMEText('hello world') 1866 outer.attach(msg) 1867 eq(outer.as_string(), '''\ 1868Content-Type: multipart/mixed; boundary="BOUNDARY" 1869MIME-Version: 1.0 1870Subject: A subject 1871To: aperson@dom.ain 1872From: bperson@dom.ain 1873 1874--BOUNDARY 1875Content-Type: text/plain; charset="us-ascii" 1876MIME-Version: 1.0 1877Content-Transfer-Encoding: 7bit 1878 1879hello world 1880--BOUNDARY-- 1881''') 1882 1883 def test_seq_parts_in_a_multipart_with_empty_preamble(self): 1884 eq = self.ndiffAssertEqual 1885 outer = MIMEBase('multipart', 'mixed') 1886 outer['Subject'] = 'A subject' 1887 outer['To'] = 'aperson@dom.ain' 1888 outer['From'] = 'bperson@dom.ain' 1889 outer.preamble = '' 1890 msg = MIMEText('hello world') 1891 outer.attach(msg) 1892 outer.set_boundary('BOUNDARY') 1893 eq(outer.as_string(), '''\ 1894Content-Type: multipart/mixed; boundary="BOUNDARY" 1895MIME-Version: 1.0 1896Subject: A subject 1897To: aperson@dom.ain 1898From: bperson@dom.ain 1899 1900 1901--BOUNDARY 1902Content-Type: text/plain; charset="us-ascii" 1903MIME-Version: 1.0 1904Content-Transfer-Encoding: 7bit 1905 1906hello world 1907--BOUNDARY-- 1908''') 1909 1910 1911 def test_seq_parts_in_a_multipart_with_none_preamble(self): 1912 eq = self.ndiffAssertEqual 1913 outer = MIMEBase('multipart', 'mixed') 1914 outer['Subject'] = 'A subject' 1915 outer['To'] = 'aperson@dom.ain' 1916 outer['From'] = 'bperson@dom.ain' 1917 outer.preamble = None 1918 msg = MIMEText('hello world') 1919 outer.attach(msg) 1920 outer.set_boundary('BOUNDARY') 1921 eq(outer.as_string(), '''\ 1922Content-Type: multipart/mixed; boundary="BOUNDARY" 1923MIME-Version: 1.0 1924Subject: A subject 1925To: aperson@dom.ain 1926From: bperson@dom.ain 1927 1928--BOUNDARY 1929Content-Type: text/plain; charset="us-ascii" 1930MIME-Version: 1.0 1931Content-Transfer-Encoding: 7bit 1932 1933hello world 1934--BOUNDARY-- 1935''') 1936 1937 1938 def test_seq_parts_in_a_multipart_with_none_epilogue(self): 1939 eq = self.ndiffAssertEqual 1940 outer = MIMEBase('multipart', 'mixed') 1941 outer['Subject'] = 'A subject' 1942 outer['To'] = 'aperson@dom.ain' 1943 outer['From'] = 'bperson@dom.ain' 1944 outer.epilogue = None 1945 msg = MIMEText('hello world') 1946 outer.attach(msg) 1947 outer.set_boundary('BOUNDARY') 1948 eq(outer.as_string(), '''\ 1949Content-Type: multipart/mixed; boundary="BOUNDARY" 1950MIME-Version: 1.0 1951Subject: A subject 1952To: aperson@dom.ain 1953From: bperson@dom.ain 1954 1955--BOUNDARY 1956Content-Type: text/plain; charset="us-ascii" 1957MIME-Version: 1.0 1958Content-Transfer-Encoding: 7bit 1959 1960hello world 1961--BOUNDARY-- 1962''') 1963 1964 1965 def test_seq_parts_in_a_multipart_with_empty_epilogue(self): 1966 eq = self.ndiffAssertEqual 1967 outer = MIMEBase('multipart', 'mixed') 1968 outer['Subject'] = 'A subject' 1969 outer['To'] = 'aperson@dom.ain' 1970 outer['From'] = 'bperson@dom.ain' 1971 outer.epilogue = '' 1972 msg = MIMEText('hello world') 1973 outer.attach(msg) 1974 outer.set_boundary('BOUNDARY') 1975 eq(outer.as_string(), '''\ 1976Content-Type: multipart/mixed; boundary="BOUNDARY" 1977MIME-Version: 1.0 1978Subject: A subject 1979To: aperson@dom.ain 1980From: bperson@dom.ain 1981 1982--BOUNDARY 1983Content-Type: text/plain; charset="us-ascii" 1984MIME-Version: 1.0 1985Content-Transfer-Encoding: 7bit 1986 1987hello world 1988--BOUNDARY-- 1989''') 1990 1991 1992 def test_seq_parts_in_a_multipart_with_nl_epilogue(self): 1993 eq = self.ndiffAssertEqual 1994 outer = MIMEBase('multipart', 'mixed') 1995 outer['Subject'] = 'A subject' 1996 outer['To'] = 'aperson@dom.ain' 1997 outer['From'] = 'bperson@dom.ain' 1998 outer.epilogue = '\n' 1999 msg = MIMEText('hello world') 2000 outer.attach(msg) 2001 outer.set_boundary('BOUNDARY') 2002 eq(outer.as_string(), '''\ 2003Content-Type: multipart/mixed; boundary="BOUNDARY" 2004MIME-Version: 1.0 2005Subject: A subject 2006To: aperson@dom.ain 2007From: bperson@dom.ain 2008 2009--BOUNDARY 2010Content-Type: text/plain; charset="us-ascii" 2011MIME-Version: 1.0 2012Content-Transfer-Encoding: 7bit 2013 2014hello world 2015--BOUNDARY-- 2016 2017''') 2018 2019 def test_message_external_body(self): 2020 eq = self.assertEqual 2021 msg = self._msgobj('msg_36.txt') 2022 eq(len(msg.get_payload()), 2) 2023 msg1 = msg.get_payload(1) 2024 eq(msg1.get_content_type(), 'multipart/alternative') 2025 eq(len(msg1.get_payload()), 2) 2026 for subpart in msg1.get_payload(): 2027 eq(subpart.get_content_type(), 'message/external-body') 2028 eq(len(subpart.get_payload()), 1) 2029 subsubpart = subpart.get_payload(0) 2030 eq(subsubpart.get_content_type(), 'text/plain') 2031 2032 def test_double_boundary(self): 2033 # msg_37.txt is a multipart that contains two dash-boundary's in a 2034 # row. Our interpretation of RFC 2046 calls for ignoring the second 2035 # and subsequent boundaries. 2036 msg = self._msgobj('msg_37.txt') 2037 self.assertEqual(len(msg.get_payload()), 3) 2038 2039 def test_nested_inner_contains_outer_boundary(self): 2040 eq = self.ndiffAssertEqual 2041 # msg_38.txt has an inner part that contains outer boundaries. My 2042 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say 2043 # these are illegal and should be interpreted as unterminated inner 2044 # parts. 2045 msg = self._msgobj('msg_38.txt') 2046 sfp = StringIO() 2047 iterators._structure(msg, sfp) 2048 eq(sfp.getvalue(), """\ 2049multipart/mixed 2050 multipart/mixed 2051 multipart/alternative 2052 text/plain 2053 text/plain 2054 text/plain 2055 text/plain 2056""") 2057 2058 def test_nested_with_same_boundary(self): 2059 eq = self.ndiffAssertEqual 2060 # msg 39.txt is similarly evil in that it's got inner parts that use 2061 # the same boundary as outer parts. Again, I believe the way this is 2062 # parsed is closest to the spirit of RFC 2046 2063 msg = self._msgobj('msg_39.txt') 2064 sfp = StringIO() 2065 iterators._structure(msg, sfp) 2066 eq(sfp.getvalue(), """\ 2067multipart/mixed 2068 multipart/mixed 2069 multipart/alternative 2070 application/octet-stream 2071 application/octet-stream 2072 text/plain 2073""") 2074 2075 def test_boundary_in_non_multipart(self): 2076 msg = self._msgobj('msg_40.txt') 2077 self.assertEqual(msg.as_string(), '''\ 2078MIME-Version: 1.0 2079Content-Type: text/html; boundary="--961284236552522269" 2080 2081----961284236552522269 2082Content-Type: text/html; 2083Content-Transfer-Encoding: 7Bit 2084 2085<html></html> 2086 2087----961284236552522269-- 2088''') 2089 2090 def test_boundary_with_leading_space(self): 2091 eq = self.assertEqual 2092 msg = email.message_from_string('''\ 2093MIME-Version: 1.0 2094Content-Type: multipart/mixed; boundary=" XXXX" 2095 2096-- XXXX 2097Content-Type: text/plain 2098 2099 2100-- XXXX 2101Content-Type: text/plain 2102 2103-- XXXX-- 2104''') 2105 self.assertTrue(msg.is_multipart()) 2106 eq(msg.get_boundary(), ' XXXX') 2107 eq(len(msg.get_payload()), 2) 2108 2109 def test_boundary_without_trailing_newline(self): 2110 m = Parser().parsestr("""\ 2111Content-Type: multipart/mixed; boundary="===============0012394164==" 2112MIME-Version: 1.0 2113 2114--===============0012394164== 2115Content-Type: image/file1.jpg 2116MIME-Version: 1.0 2117Content-Transfer-Encoding: base64 2118 2119YXNkZg== 2120--===============0012394164==--""") 2121 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') 2122 2123 def test_mimebase_default_policy(self): 2124 m = MIMEBase('multipart', 'mixed') 2125 self.assertIs(m.policy, email.policy.compat32) 2126 2127 def test_mimebase_custom_policy(self): 2128 m = MIMEBase('multipart', 'mixed', policy=email.policy.default) 2129 self.assertIs(m.policy, email.policy.default) 2130 2131# Test some badly formatted messages 2132class TestNonConformant(TestEmailBase): 2133 2134 def test_parse_missing_minor_type(self): 2135 eq = self.assertEqual 2136 msg = self._msgobj('msg_14.txt') 2137 eq(msg.get_content_type(), 'text/plain') 2138 eq(msg.get_content_maintype(), 'text') 2139 eq(msg.get_content_subtype(), 'plain') 2140 2141 # test_defect_handling 2142 def test_same_boundary_inner_outer(self): 2143 msg = self._msgobj('msg_15.txt') 2144 # XXX We can probably eventually do better 2145 inner = msg.get_payload(0) 2146 self.assertTrue(hasattr(inner, 'defects')) 2147 self.assertEqual(len(inner.defects), 1) 2148 self.assertIsInstance(inner.defects[0], 2149 errors.StartBoundaryNotFoundDefect) 2150 2151 # test_defect_handling 2152 def test_multipart_no_boundary(self): 2153 msg = self._msgobj('msg_25.txt') 2154 self.assertIsInstance(msg.get_payload(), str) 2155 self.assertEqual(len(msg.defects), 2) 2156 self.assertIsInstance(msg.defects[0], 2157 errors.NoBoundaryInMultipartDefect) 2158 self.assertIsInstance(msg.defects[1], 2159 errors.MultipartInvariantViolationDefect) 2160 2161 multipart_msg = textwrap.dedent("""\ 2162 Date: Wed, 14 Nov 2007 12:56:23 GMT 2163 From: foo@bar.invalid 2164 To: foo@bar.invalid 2165 Subject: Content-Transfer-Encoding: base64 and multipart 2166 MIME-Version: 1.0 2167 Content-Type: multipart/mixed; 2168 boundary="===============3344438784458119861=="{} 2169 2170 --===============3344438784458119861== 2171 Content-Type: text/plain 2172 2173 Test message 2174 2175 --===============3344438784458119861== 2176 Content-Type: application/octet-stream 2177 Content-Transfer-Encoding: base64 2178 2179 YWJj 2180 2181 --===============3344438784458119861==-- 2182 """) 2183 2184 # test_defect_handling 2185 def test_multipart_invalid_cte(self): 2186 msg = self._str_msg( 2187 self.multipart_msg.format("\nContent-Transfer-Encoding: base64")) 2188 self.assertEqual(len(msg.defects), 1) 2189 self.assertIsInstance(msg.defects[0], 2190 errors.InvalidMultipartContentTransferEncodingDefect) 2191 2192 # test_defect_handling 2193 def test_multipart_no_cte_no_defect(self): 2194 msg = self._str_msg(self.multipart_msg.format('')) 2195 self.assertEqual(len(msg.defects), 0) 2196 2197 # test_defect_handling 2198 def test_multipart_valid_cte_no_defect(self): 2199 for cte in ('7bit', '8bit', 'BINary'): 2200 msg = self._str_msg( 2201 self.multipart_msg.format( 2202 "\nContent-Transfer-Encoding: {}".format(cte))) 2203 self.assertEqual(len(msg.defects), 0) 2204 2205 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2. 2206 def test_invalid_content_type(self): 2207 eq = self.assertEqual 2208 neq = self.ndiffAssertEqual 2209 msg = Message() 2210 # RFC 2045, $5.2 says invalid yields text/plain 2211 msg['Content-Type'] = 'text' 2212 eq(msg.get_content_maintype(), 'text') 2213 eq(msg.get_content_subtype(), 'plain') 2214 eq(msg.get_content_type(), 'text/plain') 2215 # Clear the old value and try something /really/ invalid 2216 del msg['content-type'] 2217 msg['Content-Type'] = 'foo' 2218 eq(msg.get_content_maintype(), 'text') 2219 eq(msg.get_content_subtype(), 'plain') 2220 eq(msg.get_content_type(), 'text/plain') 2221 # Still, make sure that the message is idempotently generated 2222 s = StringIO() 2223 g = Generator(s) 2224 g.flatten(msg) 2225 neq(s.getvalue(), 'Content-Type: foo\n\n') 2226 2227 def test_no_start_boundary(self): 2228 eq = self.ndiffAssertEqual 2229 msg = self._msgobj('msg_31.txt') 2230 eq(msg.get_payload(), """\ 2231--BOUNDARY 2232Content-Type: text/plain 2233 2234message 1 2235 2236--BOUNDARY 2237Content-Type: text/plain 2238 2239message 2 2240 2241--BOUNDARY-- 2242""") 2243 2244 def test_no_separating_blank_line(self): 2245 eq = self.ndiffAssertEqual 2246 msg = self._msgobj('msg_35.txt') 2247 eq(msg.as_string(), """\ 2248From: aperson@dom.ain 2249To: bperson@dom.ain 2250Subject: here's something interesting 2251 2252counter to RFC 2822, there's no separating newline here 2253""") 2254 2255 # test_defect_handling 2256 def test_lying_multipart(self): 2257 msg = self._msgobj('msg_41.txt') 2258 self.assertTrue(hasattr(msg, 'defects')) 2259 self.assertEqual(len(msg.defects), 2) 2260 self.assertIsInstance(msg.defects[0], 2261 errors.NoBoundaryInMultipartDefect) 2262 self.assertIsInstance(msg.defects[1], 2263 errors.MultipartInvariantViolationDefect) 2264 2265 # test_defect_handling 2266 def test_missing_start_boundary(self): 2267 outer = self._msgobj('msg_42.txt') 2268 # The message structure is: 2269 # 2270 # multipart/mixed 2271 # text/plain 2272 # message/rfc822 2273 # multipart/mixed [*] 2274 # 2275 # [*] This message is missing its start boundary 2276 bad = outer.get_payload(1).get_payload(0) 2277 self.assertEqual(len(bad.defects), 1) 2278 self.assertIsInstance(bad.defects[0], 2279 errors.StartBoundaryNotFoundDefect) 2280 2281 # test_defect_handling 2282 def test_first_line_is_continuation_header(self): 2283 eq = self.assertEqual 2284 m = ' Line 1\nSubject: test\n\nbody' 2285 msg = email.message_from_string(m) 2286 eq(msg.keys(), ['Subject']) 2287 eq(msg.get_payload(), 'body') 2288 eq(len(msg.defects), 1) 2289 self.assertDefectsEqual(msg.defects, 2290 [errors.FirstHeaderLineIsContinuationDefect]) 2291 eq(msg.defects[0].line, ' Line 1\n') 2292 2293 # test_defect_handling 2294 def test_missing_header_body_separator(self): 2295 # Our heuristic if we see a line that doesn't look like a header (no 2296 # leading whitespace but no ':') is to assume that the blank line that 2297 # separates the header from the body is missing, and to stop parsing 2298 # headers and start parsing the body. 2299 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') 2300 self.assertEqual(msg.keys(), ['Subject']) 2301 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') 2302 self.assertDefectsEqual(msg.defects, 2303 [errors.MissingHeaderBodySeparatorDefect]) 2304 2305 2306# Test RFC 2047 header encoding and decoding 2307class TestRFC2047(TestEmailBase): 2308 def test_rfc2047_multiline(self): 2309 eq = self.assertEqual 2310 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz 2311 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" 2312 dh = decode_header(s) 2313 eq(dh, [ 2314 (b'Re: ', None), 2315 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), 2316 (b' baz foo bar ', None), 2317 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) 2318 header = make_header(dh) 2319 eq(str(header), 2320 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') 2321 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ 2322Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= 2323 =?mac-iceland?q?=9Arg=8Cs?=""") 2324 2325 def test_whitespace_keeper_unicode(self): 2326 eq = self.assertEqual 2327 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>' 2328 dh = decode_header(s) 2329 eq(dh, [(b'Andr\xe9', 'iso-8859-1'), 2330 (b' Pirard <pirard@dom.ain>', None)]) 2331 header = str(make_header(dh)) 2332 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>') 2333 2334 def test_whitespace_keeper_unicode_2(self): 2335 eq = self.assertEqual 2336 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' 2337 dh = decode_header(s) 2338 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'), 2339 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')]) 2340 hu = str(make_header(dh)) 2341 eq(hu, 'The quick brown fox jumped over the lazy dog') 2342 2343 def test_rfc2047_missing_whitespace(self): 2344 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' 2345 dh = decode_header(s) 2346 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), 2347 (b'rg', None), (b'\xe5', 'iso-8859-1'), 2348 (b'sbord', None)]) 2349 2350 def test_rfc2047_with_whitespace(self): 2351 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' 2352 dh = decode_header(s) 2353 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'), 2354 (b' rg ', None), (b'\xe5', 'iso-8859-1'), 2355 (b' sbord', None)]) 2356 2357 def test_rfc2047_B_bad_padding(self): 2358 s = '=?iso-8859-1?B?%s?=' 2359 data = [ # only test complete bytes 2360 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'), 2361 ('dmk=', b'vi'), ('dmk', b'vi') 2362 ] 2363 for q, a in data: 2364 dh = decode_header(s % q) 2365 self.assertEqual(dh, [(a, 'iso-8859-1')]) 2366 2367 def test_rfc2047_Q_invalid_digits(self): 2368 # issue 10004. 2369 s = '=?iso-8859-1?Q?andr=e9=zz?=' 2370 self.assertEqual(decode_header(s), 2371 [(b'andr\xe9=zz', 'iso-8859-1')]) 2372 2373 def test_rfc2047_rfc2047_1(self): 2374 # 1st testcase at end of rfc2047 2375 s = '(=?ISO-8859-1?Q?a?=)' 2376 self.assertEqual(decode_header(s), 2377 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) 2378 2379 def test_rfc2047_rfc2047_2(self): 2380 # 2nd testcase at end of rfc2047 2381 s = '(=?ISO-8859-1?Q?a?= b)' 2382 self.assertEqual(decode_header(s), 2383 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) 2384 2385 def test_rfc2047_rfc2047_3(self): 2386 # 3rd testcase at end of rfc2047 2387 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2388 self.assertEqual(decode_header(s), 2389 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2390 2391 def test_rfc2047_rfc2047_4(self): 2392 # 4th testcase at end of rfc2047 2393 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2394 self.assertEqual(decode_header(s), 2395 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2396 2397 def test_rfc2047_rfc2047_5a(self): 2398 # 5th testcase at end of rfc2047 newline is \r\n 2399 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' 2400 self.assertEqual(decode_header(s), 2401 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2402 2403 def test_rfc2047_rfc2047_5b(self): 2404 # 5th testcase at end of rfc2047 newline is \n 2405 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' 2406 self.assertEqual(decode_header(s), 2407 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2408 2409 def test_rfc2047_rfc2047_6(self): 2410 # 6th testcase at end of rfc2047 2411 s = '(=?ISO-8859-1?Q?a_b?=)' 2412 self.assertEqual(decode_header(s), 2413 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) 2414 2415 def test_rfc2047_rfc2047_7(self): 2416 # 7th testcase at end of rfc2047 2417 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' 2418 self.assertEqual(decode_header(s), 2419 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), 2420 (b')', None)]) 2421 self.assertEqual(make_header(decode_header(s)).encode(), s.lower()) 2422 self.assertEqual(str(make_header(decode_header(s))), '(a b)') 2423 2424 def test_multiline_header(self): 2425 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>' 2426 self.assertEqual(decode_header(s), 2427 [(b'"M\xfcller T"', 'windows-1252'), 2428 (b'<T.Mueller@xxx.com>', None)]) 2429 self.assertEqual(make_header(decode_header(s)).encode(), 2430 ''.join(s.splitlines())) 2431 self.assertEqual(str(make_header(decode_header(s))), 2432 '"Müller T" <T.Mueller@xxx.com>') 2433 2434 2435# Test the MIMEMessage class 2436class TestMIMEMessage(TestEmailBase): 2437 def setUp(self): 2438 with openfile('msg_11.txt') as fp: 2439 self._text = fp.read() 2440 2441 def test_type_error(self): 2442 self.assertRaises(TypeError, MIMEMessage, 'a plain string') 2443 2444 def test_valid_argument(self): 2445 eq = self.assertEqual 2446 subject = 'A sub-message' 2447 m = Message() 2448 m['Subject'] = subject 2449 r = MIMEMessage(m) 2450 eq(r.get_content_type(), 'message/rfc822') 2451 payload = r.get_payload() 2452 self.assertIsInstance(payload, list) 2453 eq(len(payload), 1) 2454 subpart = payload[0] 2455 self.assertIs(subpart, m) 2456 eq(subpart['subject'], subject) 2457 2458 def test_bad_multipart(self): 2459 msg1 = Message() 2460 msg1['Subject'] = 'subpart 1' 2461 msg2 = Message() 2462 msg2['Subject'] = 'subpart 2' 2463 r = MIMEMessage(msg1) 2464 self.assertRaises(errors.MultipartConversionError, r.attach, msg2) 2465 2466 def test_generate(self): 2467 # First craft the message to be encapsulated 2468 m = Message() 2469 m['Subject'] = 'An enclosed message' 2470 m.set_payload('Here is the body of the message.\n') 2471 r = MIMEMessage(m) 2472 r['Subject'] = 'The enclosing message' 2473 s = StringIO() 2474 g = Generator(s) 2475 g.flatten(r) 2476 self.assertEqual(s.getvalue(), """\ 2477Content-Type: message/rfc822 2478MIME-Version: 1.0 2479Subject: The enclosing message 2480 2481Subject: An enclosed message 2482 2483Here is the body of the message. 2484""") 2485 2486 def test_parse_message_rfc822(self): 2487 eq = self.assertEqual 2488 msg = self._msgobj('msg_11.txt') 2489 eq(msg.get_content_type(), 'message/rfc822') 2490 payload = msg.get_payload() 2491 self.assertIsInstance(payload, list) 2492 eq(len(payload), 1) 2493 submsg = payload[0] 2494 self.assertIsInstance(submsg, Message) 2495 eq(submsg['subject'], 'An enclosed message') 2496 eq(submsg.get_payload(), 'Here is the body of the message.\n') 2497 2498 def test_dsn(self): 2499 eq = self.assertEqual 2500 # msg 16 is a Delivery Status Notification, see RFC 1894 2501 msg = self._msgobj('msg_16.txt') 2502 eq(msg.get_content_type(), 'multipart/report') 2503 self.assertTrue(msg.is_multipart()) 2504 eq(len(msg.get_payload()), 3) 2505 # Subpart 1 is a text/plain, human readable section 2506 subpart = msg.get_payload(0) 2507 eq(subpart.get_content_type(), 'text/plain') 2508 eq(subpart.get_payload(), """\ 2509This report relates to a message you sent with the following header fields: 2510 2511 Message-id: <002001c144a6$8752e060$56104586@oxy.edu> 2512 Date: Sun, 23 Sep 2001 20:10:55 -0700 2513 From: "Ian T. Henry" <henryi@oxy.edu> 2514 To: SoCal Raves <scr@socal-raves.org> 2515 Subject: [scr] yeah for Ians!! 2516 2517Your message cannot be delivered to the following recipients: 2518 2519 Recipient address: jangel1@cougar.noc.ucla.edu 2520 Reason: recipient reached disk quota 2521 2522""") 2523 # Subpart 2 contains the machine parsable DSN information. It 2524 # consists of two blocks of headers, represented by two nested Message 2525 # objects. 2526 subpart = msg.get_payload(1) 2527 eq(subpart.get_content_type(), 'message/delivery-status') 2528 eq(len(subpart.get_payload()), 2) 2529 # message/delivery-status should treat each block as a bunch of 2530 # headers, i.e. a bunch of Message objects. 2531 dsn1 = subpart.get_payload(0) 2532 self.assertIsInstance(dsn1, Message) 2533 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') 2534 eq(dsn1.get_param('dns', header='reporting-mta'), '') 2535 # Try a missing one <wink> 2536 eq(dsn1.get_param('nsd', header='reporting-mta'), None) 2537 dsn2 = subpart.get_payload(1) 2538 self.assertIsInstance(dsn2, Message) 2539 eq(dsn2['action'], 'failed') 2540 eq(dsn2.get_params(header='original-recipient'), 2541 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) 2542 eq(dsn2.get_param('rfc822', header='final-recipient'), '') 2543 # Subpart 3 is the original message 2544 subpart = msg.get_payload(2) 2545 eq(subpart.get_content_type(), 'message/rfc822') 2546 payload = subpart.get_payload() 2547 self.assertIsInstance(payload, list) 2548 eq(len(payload), 1) 2549 subsubpart = payload[0] 2550 self.assertIsInstance(subsubpart, Message) 2551 eq(subsubpart.get_content_type(), 'text/plain') 2552 eq(subsubpart['message-id'], 2553 '<002001c144a6$8752e060$56104586@oxy.edu>') 2554 2555 def test_epilogue(self): 2556 eq = self.ndiffAssertEqual 2557 with openfile('msg_21.txt') as fp: 2558 text = fp.read() 2559 msg = Message() 2560 msg['From'] = 'aperson@dom.ain' 2561 msg['To'] = 'bperson@dom.ain' 2562 msg['Subject'] = 'Test' 2563 msg.preamble = 'MIME message' 2564 msg.epilogue = 'End of MIME message\n' 2565 msg1 = MIMEText('One') 2566 msg2 = MIMEText('Two') 2567 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2568 msg.attach(msg1) 2569 msg.attach(msg2) 2570 sfp = StringIO() 2571 g = Generator(sfp) 2572 g.flatten(msg) 2573 eq(sfp.getvalue(), text) 2574 2575 def test_no_nl_preamble(self): 2576 eq = self.ndiffAssertEqual 2577 msg = Message() 2578 msg['From'] = 'aperson@dom.ain' 2579 msg['To'] = 'bperson@dom.ain' 2580 msg['Subject'] = 'Test' 2581 msg.preamble = 'MIME message' 2582 msg.epilogue = '' 2583 msg1 = MIMEText('One') 2584 msg2 = MIMEText('Two') 2585 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2586 msg.attach(msg1) 2587 msg.attach(msg2) 2588 eq(msg.as_string(), """\ 2589From: aperson@dom.ain 2590To: bperson@dom.ain 2591Subject: Test 2592Content-Type: multipart/mixed; boundary="BOUNDARY" 2593 2594MIME message 2595--BOUNDARY 2596Content-Type: text/plain; charset="us-ascii" 2597MIME-Version: 1.0 2598Content-Transfer-Encoding: 7bit 2599 2600One 2601--BOUNDARY 2602Content-Type: text/plain; charset="us-ascii" 2603MIME-Version: 1.0 2604Content-Transfer-Encoding: 7bit 2605 2606Two 2607--BOUNDARY-- 2608""") 2609 2610 def test_default_type(self): 2611 eq = self.assertEqual 2612 with openfile('msg_30.txt') as fp: 2613 msg = email.message_from_file(fp) 2614 container1 = msg.get_payload(0) 2615 eq(container1.get_default_type(), 'message/rfc822') 2616 eq(container1.get_content_type(), 'message/rfc822') 2617 container2 = msg.get_payload(1) 2618 eq(container2.get_default_type(), 'message/rfc822') 2619 eq(container2.get_content_type(), 'message/rfc822') 2620 container1a = container1.get_payload(0) 2621 eq(container1a.get_default_type(), 'text/plain') 2622 eq(container1a.get_content_type(), 'text/plain') 2623 container2a = container2.get_payload(0) 2624 eq(container2a.get_default_type(), 'text/plain') 2625 eq(container2a.get_content_type(), 'text/plain') 2626 2627 def test_default_type_with_explicit_container_type(self): 2628 eq = self.assertEqual 2629 with openfile('msg_28.txt') as fp: 2630 msg = email.message_from_file(fp) 2631 container1 = msg.get_payload(0) 2632 eq(container1.get_default_type(), 'message/rfc822') 2633 eq(container1.get_content_type(), 'message/rfc822') 2634 container2 = msg.get_payload(1) 2635 eq(container2.get_default_type(), 'message/rfc822') 2636 eq(container2.get_content_type(), 'message/rfc822') 2637 container1a = container1.get_payload(0) 2638 eq(container1a.get_default_type(), 'text/plain') 2639 eq(container1a.get_content_type(), 'text/plain') 2640 container2a = container2.get_payload(0) 2641 eq(container2a.get_default_type(), 'text/plain') 2642 eq(container2a.get_content_type(), 'text/plain') 2643 2644 def test_default_type_non_parsed(self): 2645 eq = self.assertEqual 2646 neq = self.ndiffAssertEqual 2647 # Set up container 2648 container = MIMEMultipart('digest', 'BOUNDARY') 2649 container.epilogue = '' 2650 # Set up subparts 2651 subpart1a = MIMEText('message 1\n') 2652 subpart2a = MIMEText('message 2\n') 2653 subpart1 = MIMEMessage(subpart1a) 2654 subpart2 = MIMEMessage(subpart2a) 2655 container.attach(subpart1) 2656 container.attach(subpart2) 2657 eq(subpart1.get_content_type(), 'message/rfc822') 2658 eq(subpart1.get_default_type(), 'message/rfc822') 2659 eq(subpart2.get_content_type(), 'message/rfc822') 2660 eq(subpart2.get_default_type(), 'message/rfc822') 2661 neq(container.as_string(0), '''\ 2662Content-Type: multipart/digest; boundary="BOUNDARY" 2663MIME-Version: 1.0 2664 2665--BOUNDARY 2666Content-Type: message/rfc822 2667MIME-Version: 1.0 2668 2669Content-Type: text/plain; charset="us-ascii" 2670MIME-Version: 1.0 2671Content-Transfer-Encoding: 7bit 2672 2673message 1 2674 2675--BOUNDARY 2676Content-Type: message/rfc822 2677MIME-Version: 1.0 2678 2679Content-Type: text/plain; charset="us-ascii" 2680MIME-Version: 1.0 2681Content-Transfer-Encoding: 7bit 2682 2683message 2 2684 2685--BOUNDARY-- 2686''') 2687 del subpart1['content-type'] 2688 del subpart1['mime-version'] 2689 del subpart2['content-type'] 2690 del subpart2['mime-version'] 2691 eq(subpart1.get_content_type(), 'message/rfc822') 2692 eq(subpart1.get_default_type(), 'message/rfc822') 2693 eq(subpart2.get_content_type(), 'message/rfc822') 2694 eq(subpart2.get_default_type(), 'message/rfc822') 2695 neq(container.as_string(0), '''\ 2696Content-Type: multipart/digest; boundary="BOUNDARY" 2697MIME-Version: 1.0 2698 2699--BOUNDARY 2700 2701Content-Type: text/plain; charset="us-ascii" 2702MIME-Version: 1.0 2703Content-Transfer-Encoding: 7bit 2704 2705message 1 2706 2707--BOUNDARY 2708 2709Content-Type: text/plain; charset="us-ascii" 2710MIME-Version: 1.0 2711Content-Transfer-Encoding: 7bit 2712 2713message 2 2714 2715--BOUNDARY-- 2716''') 2717 2718 def test_mime_attachments_in_constructor(self): 2719 eq = self.assertEqual 2720 text1 = MIMEText('') 2721 text2 = MIMEText('') 2722 msg = MIMEMultipart(_subparts=(text1, text2)) 2723 eq(len(msg.get_payload()), 2) 2724 eq(msg.get_payload(0), text1) 2725 eq(msg.get_payload(1), text2) 2726 2727 def test_default_multipart_constructor(self): 2728 msg = MIMEMultipart() 2729 self.assertTrue(msg.is_multipart()) 2730 2731 def test_multipart_default_policy(self): 2732 msg = MIMEMultipart() 2733 msg['To'] = 'a@b.com' 2734 msg['To'] = 'c@d.com' 2735 self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com']) 2736 2737 def test_multipart_custom_policy(self): 2738 msg = MIMEMultipart(policy=email.policy.default) 2739 msg['To'] = 'a@b.com' 2740 with self.assertRaises(ValueError) as cm: 2741 msg['To'] = 'c@d.com' 2742 self.assertEqual(str(cm.exception), 2743 'There may be at most 1 To headers in a message') 2744 2745# A general test of parser->model->generator idempotency. IOW, read a message 2746# in, parse it into a message object tree, then without touching the tree, 2747# regenerate the plain text. The original text and the transformed text 2748# should be identical. Note: that we ignore the Unix-From since that may 2749# contain a changed date. 2750class TestIdempotent(TestEmailBase): 2751 2752 linesep = '\n' 2753 2754 def _msgobj(self, filename): 2755 with openfile(filename) as fp: 2756 data = fp.read() 2757 msg = email.message_from_string(data) 2758 return msg, data 2759 2760 def _idempotent(self, msg, text, unixfrom=False): 2761 eq = self.ndiffAssertEqual 2762 s = StringIO() 2763 g = Generator(s, maxheaderlen=0) 2764 g.flatten(msg, unixfrom=unixfrom) 2765 eq(text, s.getvalue()) 2766 2767 def test_parse_text_message(self): 2768 eq = self.assertEqual 2769 msg, text = self._msgobj('msg_01.txt') 2770 eq(msg.get_content_type(), 'text/plain') 2771 eq(msg.get_content_maintype(), 'text') 2772 eq(msg.get_content_subtype(), 'plain') 2773 eq(msg.get_params()[1], ('charset', 'us-ascii')) 2774 eq(msg.get_param('charset'), 'us-ascii') 2775 eq(msg.preamble, None) 2776 eq(msg.epilogue, None) 2777 self._idempotent(msg, text) 2778 2779 def test_parse_untyped_message(self): 2780 eq = self.assertEqual 2781 msg, text = self._msgobj('msg_03.txt') 2782 eq(msg.get_content_type(), 'text/plain') 2783 eq(msg.get_params(), None) 2784 eq(msg.get_param('charset'), None) 2785 self._idempotent(msg, text) 2786 2787 def test_simple_multipart(self): 2788 msg, text = self._msgobj('msg_04.txt') 2789 self._idempotent(msg, text) 2790 2791 def test_MIME_digest(self): 2792 msg, text = self._msgobj('msg_02.txt') 2793 self._idempotent(msg, text) 2794 2795 def test_long_header(self): 2796 msg, text = self._msgobj('msg_27.txt') 2797 self._idempotent(msg, text) 2798 2799 def test_MIME_digest_with_part_headers(self): 2800 msg, text = self._msgobj('msg_28.txt') 2801 self._idempotent(msg, text) 2802 2803 def test_mixed_with_image(self): 2804 msg, text = self._msgobj('msg_06.txt') 2805 self._idempotent(msg, text) 2806 2807 def test_multipart_report(self): 2808 msg, text = self._msgobj('msg_05.txt') 2809 self._idempotent(msg, text) 2810 2811 def test_dsn(self): 2812 msg, text = self._msgobj('msg_16.txt') 2813 self._idempotent(msg, text) 2814 2815 def test_preamble_epilogue(self): 2816 msg, text = self._msgobj('msg_21.txt') 2817 self._idempotent(msg, text) 2818 2819 def test_multipart_one_part(self): 2820 msg, text = self._msgobj('msg_23.txt') 2821 self._idempotent(msg, text) 2822 2823 def test_multipart_no_parts(self): 2824 msg, text = self._msgobj('msg_24.txt') 2825 self._idempotent(msg, text) 2826 2827 def test_no_start_boundary(self): 2828 msg, text = self._msgobj('msg_31.txt') 2829 self._idempotent(msg, text) 2830 2831 def test_rfc2231_charset(self): 2832 msg, text = self._msgobj('msg_32.txt') 2833 self._idempotent(msg, text) 2834 2835 def test_more_rfc2231_parameters(self): 2836 msg, text = self._msgobj('msg_33.txt') 2837 self._idempotent(msg, text) 2838 2839 def test_text_plain_in_a_multipart_digest(self): 2840 msg, text = self._msgobj('msg_34.txt') 2841 self._idempotent(msg, text) 2842 2843 def test_nested_multipart_mixeds(self): 2844 msg, text = self._msgobj('msg_12a.txt') 2845 self._idempotent(msg, text) 2846 2847 def test_message_external_body_idempotent(self): 2848 msg, text = self._msgobj('msg_36.txt') 2849 self._idempotent(msg, text) 2850 2851 def test_message_delivery_status(self): 2852 msg, text = self._msgobj('msg_43.txt') 2853 self._idempotent(msg, text, unixfrom=True) 2854 2855 def test_message_signed_idempotent(self): 2856 msg, text = self._msgobj('msg_45.txt') 2857 self._idempotent(msg, text) 2858 2859 def test_content_type(self): 2860 eq = self.assertEqual 2861 # Get a message object and reset the seek pointer for other tests 2862 msg, text = self._msgobj('msg_05.txt') 2863 eq(msg.get_content_type(), 'multipart/report') 2864 # Test the Content-Type: parameters 2865 params = {} 2866 for pk, pv in msg.get_params(): 2867 params[pk] = pv 2868 eq(params['report-type'], 'delivery-status') 2869 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') 2870 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep) 2871 eq(msg.epilogue, self.linesep) 2872 eq(len(msg.get_payload()), 3) 2873 # Make sure the subparts are what we expect 2874 msg1 = msg.get_payload(0) 2875 eq(msg1.get_content_type(), 'text/plain') 2876 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep) 2877 msg2 = msg.get_payload(1) 2878 eq(msg2.get_content_type(), 'text/plain') 2879 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep) 2880 msg3 = msg.get_payload(2) 2881 eq(msg3.get_content_type(), 'message/rfc822') 2882 self.assertIsInstance(msg3, Message) 2883 payload = msg3.get_payload() 2884 self.assertIsInstance(payload, list) 2885 eq(len(payload), 1) 2886 msg4 = payload[0] 2887 self.assertIsInstance(msg4, Message) 2888 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep) 2889 2890 def test_parser(self): 2891 eq = self.assertEqual 2892 msg, text = self._msgobj('msg_06.txt') 2893 # Check some of the outer headers 2894 eq(msg.get_content_type(), 'message/rfc822') 2895 # Make sure the payload is a list of exactly one sub-Message, and that 2896 # that submessage has a type of text/plain 2897 payload = msg.get_payload() 2898 self.assertIsInstance(payload, list) 2899 eq(len(payload), 1) 2900 msg1 = payload[0] 2901 self.assertIsInstance(msg1, Message) 2902 eq(msg1.get_content_type(), 'text/plain') 2903 self.assertIsInstance(msg1.get_payload(), str) 2904 eq(msg1.get_payload(), self.linesep) 2905 2906 2907 2908# Test various other bits of the package's functionality 2909class TestMiscellaneous(TestEmailBase): 2910 def test_message_from_string(self): 2911 with openfile('msg_01.txt') as fp: 2912 text = fp.read() 2913 msg = email.message_from_string(text) 2914 s = StringIO() 2915 # Don't wrap/continue long headers since we're trying to test 2916 # idempotency. 2917 g = Generator(s, maxheaderlen=0) 2918 g.flatten(msg) 2919 self.assertEqual(text, s.getvalue()) 2920 2921 def test_message_from_file(self): 2922 with openfile('msg_01.txt') as fp: 2923 text = fp.read() 2924 fp.seek(0) 2925 msg = email.message_from_file(fp) 2926 s = StringIO() 2927 # Don't wrap/continue long headers since we're trying to test 2928 # idempotency. 2929 g = Generator(s, maxheaderlen=0) 2930 g.flatten(msg) 2931 self.assertEqual(text, s.getvalue()) 2932 2933 def test_message_from_string_with_class(self): 2934 with openfile('msg_01.txt') as fp: 2935 text = fp.read() 2936 2937 # Create a subclass 2938 class MyMessage(Message): 2939 pass 2940 2941 msg = email.message_from_string(text, MyMessage) 2942 self.assertIsInstance(msg, MyMessage) 2943 # Try something more complicated 2944 with openfile('msg_02.txt') as fp: 2945 text = fp.read() 2946 msg = email.message_from_string(text, MyMessage) 2947 for subpart in msg.walk(): 2948 self.assertIsInstance(subpart, MyMessage) 2949 2950 def test_message_from_file_with_class(self): 2951 # Create a subclass 2952 class MyMessage(Message): 2953 pass 2954 2955 with openfile('msg_01.txt') as fp: 2956 msg = email.message_from_file(fp, MyMessage) 2957 self.assertIsInstance(msg, MyMessage) 2958 # Try something more complicated 2959 with openfile('msg_02.txt') as fp: 2960 msg = email.message_from_file(fp, MyMessage) 2961 for subpart in msg.walk(): 2962 self.assertIsInstance(subpart, MyMessage) 2963 2964 def test_custom_message_does_not_require_arguments(self): 2965 class MyMessage(Message): 2966 def __init__(self): 2967 super().__init__() 2968 msg = self._str_msg("Subject: test\n\ntest", MyMessage) 2969 self.assertIsInstance(msg, MyMessage) 2970 2971 def test__all__(self): 2972 module = __import__('email') 2973 self.assertEqual(sorted(module.__all__), [ 2974 'base64mime', 'charset', 'encoders', 'errors', 'feedparser', 2975 'generator', 'header', 'iterators', 'message', 2976 'message_from_binary_file', 'message_from_bytes', 2977 'message_from_file', 'message_from_string', 'mime', 'parser', 2978 'quoprimime', 'utils', 2979 ]) 2980 2981 def test_formatdate(self): 2982 now = time.time() 2983 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], 2984 time.gmtime(now)[:6]) 2985 2986 def test_formatdate_localtime(self): 2987 now = time.time() 2988 self.assertEqual( 2989 utils.parsedate(utils.formatdate(now, localtime=True))[:6], 2990 time.localtime(now)[:6]) 2991 2992 def test_formatdate_usegmt(self): 2993 now = time.time() 2994 self.assertEqual( 2995 utils.formatdate(now, localtime=False), 2996 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) 2997 self.assertEqual( 2998 utils.formatdate(now, localtime=False, usegmt=True), 2999 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) 3000 3001 # parsedate and parsedate_tz will become deprecated interfaces someday 3002 def test_parsedate_returns_None_for_invalid_strings(self): 3003 self.assertIsNone(utils.parsedate('')) 3004 self.assertIsNone(utils.parsedate_tz('')) 3005 self.assertIsNone(utils.parsedate(' ')) 3006 self.assertIsNone(utils.parsedate_tz(' ')) 3007 self.assertIsNone(utils.parsedate('0')) 3008 self.assertIsNone(utils.parsedate_tz('0')) 3009 self.assertIsNone(utils.parsedate('A Complete Waste of Time')) 3010 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time')) 3011 self.assertIsNone(utils.parsedate_tz('Wed, 3 Apr 2002 12.34.56.78+0800')) 3012 # Not a part of the spec but, but this has historically worked: 3013 self.assertIsNone(utils.parsedate(None)) 3014 self.assertIsNone(utils.parsedate_tz(None)) 3015 3016 def test_parsedate_compact(self): 3017 # The FWS after the comma is optional 3018 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), 3019 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) 3020 3021 def test_parsedate_no_dayofweek(self): 3022 eq = self.assertEqual 3023 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), 3024 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) 3025 3026 def test_parsedate_compact_no_dayofweek(self): 3027 eq = self.assertEqual 3028 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), 3029 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3030 3031 def test_parsedate_no_space_before_positive_offset(self): 3032 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'), 3033 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3034 3035 def test_parsedate_no_space_before_negative_offset(self): 3036 # Issue 1155362: we already handled '+' for this case. 3037 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'), 3038 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800)) 3039 3040 3041 def test_parsedate_accepts_time_with_dots(self): 3042 eq = self.assertEqual 3043 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'), 3044 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3045 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'), 3046 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800)) 3047 3048 def test_parsedate_acceptable_to_time_functions(self): 3049 eq = self.assertEqual 3050 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') 3051 t = int(time.mktime(timetup)) 3052 eq(time.localtime(t)[:6], timetup[:6]) 3053 eq(int(time.strftime('%Y', timetup)), 2003) 3054 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') 3055 t = int(time.mktime(timetup[:9])) 3056 eq(time.localtime(t)[:6], timetup[:6]) 3057 eq(int(time.strftime('%Y', timetup[:9])), 2003) 3058 3059 def test_mktime_tz(self): 3060 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3061 -1, -1, -1, 0)), 0) 3062 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3063 -1, -1, -1, 1234)), -1234) 3064 3065 def test_parsedate_y2k(self): 3066 """Test for parsing a date with a two-digit year. 3067 3068 Parsing a date with a two-digit year should return the correct 3069 four-digit year. RFC822 allows two-digit years, but RFC2822 (which 3070 obsoletes RFC822) requires four-digit years. 3071 3072 """ 3073 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), 3074 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) 3075 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), 3076 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) 3077 3078 def test_parseaddr_empty(self): 3079 self.assertEqual(utils.parseaddr('<>'), ('', '')) 3080 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') 3081 3082 def test_parseaddr_multiple_domains(self): 3083 self.assertEqual( 3084 utils.parseaddr('a@b@c'), 3085 ('', '') 3086 ) 3087 self.assertEqual( 3088 utils.parseaddr('a@b.c@c'), 3089 ('', '') 3090 ) 3091 self.assertEqual( 3092 utils.parseaddr('a@172.17.0.1@c'), 3093 ('', '') 3094 ) 3095 3096 def test_noquote_dump(self): 3097 self.assertEqual( 3098 utils.formataddr(('A Silly Person', 'person@dom.ain')), 3099 'A Silly Person <person@dom.ain>') 3100 3101 def test_escape_dump(self): 3102 self.assertEqual( 3103 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), 3104 r'"A (Very) Silly Person" <person@dom.ain>') 3105 self.assertEqual( 3106 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'), 3107 ('A (Very) Silly Person', 'person@dom.ain')) 3108 a = r'A \(Special\) Person' 3109 b = 'person@dom.ain' 3110 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3111 3112 def test_escape_backslashes(self): 3113 self.assertEqual( 3114 utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')), 3115 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>') 3116 a = r'Arthur \Backslash\ Foobar' 3117 b = 'person@dom.ain' 3118 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3119 3120 def test_quotes_unicode_names(self): 3121 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3122 name = "H\u00e4ns W\u00fcrst" 3123 addr = 'person@dom.ain' 3124 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3125 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>" 3126 self.assertEqual(utils.formataddr((name, addr)), utf8_base64) 3127 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'), 3128 latin1_quopri) 3129 3130 def test_accepts_any_charset_like_object(self): 3131 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3132 name = "H\u00e4ns W\u00fcrst" 3133 addr = 'person@dom.ain' 3134 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3135 foobar = "FOOBAR" 3136 class CharsetMock: 3137 def header_encode(self, string): 3138 return foobar 3139 mock = CharsetMock() 3140 mock_expected = "%s <%s>" % (foobar, addr) 3141 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected) 3142 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')), 3143 utf8_base64) 3144 3145 def test_invalid_charset_like_object_raises_error(self): 3146 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3147 name = "H\u00e4ns W\u00fcrst" 3148 addr = 'person@dom.ain' 3149 # An object without a header_encode method: 3150 bad_charset = object() 3151 self.assertRaises(AttributeError, utils.formataddr, (name, addr), 3152 bad_charset) 3153 3154 def test_unicode_address_raises_error(self): 3155 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3156 addr = 'pers\u00f6n@dom.in' 3157 self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) 3158 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) 3159 3160 def test_name_with_dot(self): 3161 x = 'John X. Doe <jxd@example.com>' 3162 y = '"John X. Doe" <jxd@example.com>' 3163 a, b = ('John X. Doe', 'jxd@example.com') 3164 self.assertEqual(utils.parseaddr(x), (a, b)) 3165 self.assertEqual(utils.parseaddr(y), (a, b)) 3166 # formataddr() quotes the name if there's a dot in it 3167 self.assertEqual(utils.formataddr((a, b)), y) 3168 3169 def test_parseaddr_preserves_quoted_pairs_in_addresses(self): 3170 # issue 10005. Note that in the third test the second pair of 3171 # backslashes is not actually a quoted pair because it is not inside a 3172 # comment or quoted string: the address being parsed has a quoted 3173 # string containing a quoted backslash, followed by 'example' and two 3174 # backslashes, followed by another quoted string containing a space and 3175 # the word 'example'. parseaddr copies those two backslashes 3176 # literally. Per rfc5322 this is not technically correct since a \ may 3177 # not appear in an address outside of a quoted string. It is probably 3178 # a sensible Postel interpretation, though. 3179 eq = self.assertEqual 3180 eq(utils.parseaddr('""example" example"@example.com'), 3181 ('', '""example" example"@example.com')) 3182 eq(utils.parseaddr('"\\"example\\" example"@example.com'), 3183 ('', '"\\"example\\" example"@example.com')) 3184 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), 3185 ('', '"\\\\"example\\\\" example"@example.com')) 3186 3187 def test_parseaddr_preserves_spaces_in_local_part(self): 3188 # issue 9286. A normal RFC5322 local part should not contain any 3189 # folding white space, but legacy local parts can (they are a sequence 3190 # of atoms, not dotatoms). On the other hand we strip whitespace from 3191 # before the @ and around dots, on the assumption that the whitespace 3192 # around the punctuation is a mistake in what would otherwise be 3193 # an RFC5322 local part. Leading whitespace is, usual, stripped as well. 3194 self.assertEqual(('', "merwok wok@xample.com"), 3195 utils.parseaddr("merwok wok@xample.com")) 3196 self.assertEqual(('', "merwok wok@xample.com"), 3197 utils.parseaddr("merwok wok@xample.com")) 3198 self.assertEqual(('', "merwok wok@xample.com"), 3199 utils.parseaddr(" merwok wok @xample.com")) 3200 self.assertEqual(('', 'merwok"wok" wok@xample.com'), 3201 utils.parseaddr('merwok"wok" wok@xample.com')) 3202 self.assertEqual(('', 'merwok.wok.wok@xample.com'), 3203 utils.parseaddr('merwok. wok . wok@xample.com')) 3204 3205 def test_formataddr_does_not_quote_parens_in_quoted_string(self): 3206 addr = ("'foo@example.com' (foo@example.com)", 3207 'foo@example.com') 3208 addrstr = ('"\'foo@example.com\' ' 3209 '(foo@example.com)" <foo@example.com>') 3210 self.assertEqual(utils.parseaddr(addrstr), addr) 3211 self.assertEqual(utils.formataddr(addr), addrstr) 3212 3213 3214 def test_multiline_from_comment(self): 3215 x = """\ 3216Foo 3217\tBar <foo@example.com>""" 3218 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) 3219 3220 def test_quote_dump(self): 3221 self.assertEqual( 3222 utils.formataddr(('A Silly; Person', 'person@dom.ain')), 3223 r'"A Silly; Person" <person@dom.ain>') 3224 3225 def test_charset_richcomparisons(self): 3226 eq = self.assertEqual 3227 ne = self.assertNotEqual 3228 cset1 = Charset() 3229 cset2 = Charset() 3230 eq(cset1, 'us-ascii') 3231 eq(cset1, 'US-ASCII') 3232 eq(cset1, 'Us-AsCiI') 3233 eq('us-ascii', cset1) 3234 eq('US-ASCII', cset1) 3235 eq('Us-AsCiI', cset1) 3236 ne(cset1, 'usascii') 3237 ne(cset1, 'USASCII') 3238 ne(cset1, 'UsAsCiI') 3239 ne('usascii', cset1) 3240 ne('USASCII', cset1) 3241 ne('UsAsCiI', cset1) 3242 eq(cset1, cset2) 3243 eq(cset2, cset1) 3244 3245 def test_getaddresses(self): 3246 eq = self.assertEqual 3247 eq(utils.getaddresses(['aperson@dom.ain (Al Person)', 3248 'Bud Person <bperson@dom.ain>']), 3249 [('Al Person', 'aperson@dom.ain'), 3250 ('Bud Person', 'bperson@dom.ain')]) 3251 3252 def test_getaddresses_nasty(self): 3253 eq = self.assertEqual 3254 eq(utils.getaddresses(['foo: ;']), [('', '')]) 3255 eq(utils.getaddresses( 3256 ['[]*-- =~$']), 3257 [('', ''), ('', ''), ('', '*--')]) 3258 eq(utils.getaddresses( 3259 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']), 3260 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) 3261 3262 def test_getaddresses_embedded_comment(self): 3263 """Test proper handling of a nested comment""" 3264 eq = self.assertEqual 3265 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>']) 3266 eq(addrs[0][1], 'foo@bar.com') 3267 3268 def test_getaddresses_header_obj(self): 3269 """Test the handling of a Header object.""" 3270 addrs = utils.getaddresses([Header('Al Person <aperson@dom.ain>')]) 3271 self.assertEqual(addrs[0][1], 'aperson@dom.ain') 3272 3273 def test_make_msgid_collisions(self): 3274 # Test make_msgid uniqueness, even with multiple threads 3275 class MsgidsThread(Thread): 3276 def run(self): 3277 # generate msgids for 3 seconds 3278 self.msgids = [] 3279 append = self.msgids.append 3280 make_msgid = utils.make_msgid 3281 clock = time.monotonic 3282 tfin = clock() + 3.0 3283 while clock() < tfin: 3284 append(make_msgid(domain='testdomain-string')) 3285 3286 threads = [MsgidsThread() for i in range(5)] 3287 with start_threads(threads): 3288 pass 3289 all_ids = sum([t.msgids for t in threads], []) 3290 self.assertEqual(len(set(all_ids)), len(all_ids)) 3291 3292 def test_utils_quote_unquote(self): 3293 eq = self.assertEqual 3294 msg = Message() 3295 msg.add_header('content-disposition', 'attachment', 3296 filename='foo\\wacky"name') 3297 eq(msg.get_filename(), 'foo\\wacky"name') 3298 3299 def test_get_body_encoding_with_bogus_charset(self): 3300 charset = Charset('not a charset') 3301 self.assertEqual(charset.get_body_encoding(), 'base64') 3302 3303 def test_get_body_encoding_with_uppercase_charset(self): 3304 eq = self.assertEqual 3305 msg = Message() 3306 msg['Content-Type'] = 'text/plain; charset=UTF-8' 3307 eq(msg['content-type'], 'text/plain; charset=UTF-8') 3308 charsets = msg.get_charsets() 3309 eq(len(charsets), 1) 3310 eq(charsets[0], 'utf-8') 3311 charset = Charset(charsets[0]) 3312 eq(charset.get_body_encoding(), 'base64') 3313 msg.set_payload(b'hello world', charset=charset) 3314 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') 3315 eq(msg.get_payload(decode=True), b'hello world') 3316 eq(msg['content-transfer-encoding'], 'base64') 3317 # Try another one 3318 msg = Message() 3319 msg['Content-Type'] = 'text/plain; charset="US-ASCII"' 3320 charsets = msg.get_charsets() 3321 eq(len(charsets), 1) 3322 eq(charsets[0], 'us-ascii') 3323 charset = Charset(charsets[0]) 3324 eq(charset.get_body_encoding(), encoders.encode_7or8bit) 3325 msg.set_payload('hello world', charset=charset) 3326 eq(msg.get_payload(), 'hello world') 3327 eq(msg['content-transfer-encoding'], '7bit') 3328 3329 def test_charsets_case_insensitive(self): 3330 lc = Charset('us-ascii') 3331 uc = Charset('US-ASCII') 3332 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) 3333 3334 def test_partial_falls_inside_message_delivery_status(self): 3335 eq = self.ndiffAssertEqual 3336 # The Parser interface provides chunks of data to FeedParser in 8192 3337 # byte gulps. SF bug #1076485 found one of those chunks inside 3338 # message/delivery-status header block, which triggered an 3339 # unreadline() of NeedMoreData. 3340 msg = self._msgobj('msg_43.txt') 3341 sfp = StringIO() 3342 iterators._structure(msg, sfp) 3343 eq(sfp.getvalue(), """\ 3344multipart/report 3345 text/plain 3346 message/delivery-status 3347 text/plain 3348 text/plain 3349 text/plain 3350 text/plain 3351 text/plain 3352 text/plain 3353 text/plain 3354 text/plain 3355 text/plain 3356 text/plain 3357 text/plain 3358 text/plain 3359 text/plain 3360 text/plain 3361 text/plain 3362 text/plain 3363 text/plain 3364 text/plain 3365 text/plain 3366 text/plain 3367 text/plain 3368 text/plain 3369 text/plain 3370 text/plain 3371 text/plain 3372 text/plain 3373 text/rfc822-headers 3374""") 3375 3376 def test_make_msgid_domain(self): 3377 self.assertEqual( 3378 email.utils.make_msgid(domain='testdomain-string')[-19:], 3379 '@testdomain-string>') 3380 3381 def test_make_msgid_idstring(self): 3382 self.assertEqual( 3383 email.utils.make_msgid(idstring='test-idstring', 3384 domain='testdomain-string')[-33:], 3385 '.test-idstring@testdomain-string>') 3386 3387 def test_make_msgid_default_domain(self): 3388 with patch('socket.getfqdn') as mock_getfqdn: 3389 mock_getfqdn.return_value = domain = 'pythontest.example.com' 3390 self.assertTrue( 3391 email.utils.make_msgid().endswith( 3392 '@' + domain + '>')) 3393 3394 def test_Generator_linend(self): 3395 # Issue 14645. 3396 with openfile('msg_26.txt', newline='\n') as f: 3397 msgtxt = f.read() 3398 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3399 msg = email.message_from_string(msgtxt) 3400 s = StringIO() 3401 g = email.generator.Generator(s) 3402 g.flatten(msg) 3403 self.assertEqual(s.getvalue(), msgtxt_nl) 3404 3405 def test_BytesGenerator_linend(self): 3406 # Issue 14645. 3407 with openfile('msg_26.txt', newline='\n') as f: 3408 msgtxt = f.read() 3409 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3410 msg = email.message_from_string(msgtxt_nl) 3411 s = BytesIO() 3412 g = email.generator.BytesGenerator(s) 3413 g.flatten(msg, linesep='\r\n') 3414 self.assertEqual(s.getvalue().decode('ascii'), msgtxt) 3415 3416 def test_BytesGenerator_linend_with_non_ascii(self): 3417 # Issue 14645. 3418 with openfile('msg_26.txt', 'rb') as f: 3419 msgtxt = f.read() 3420 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6') 3421 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n') 3422 msg = email.message_from_bytes(msgtxt_nl) 3423 s = BytesIO() 3424 g = email.generator.BytesGenerator(s) 3425 g.flatten(msg, linesep='\r\n') 3426 self.assertEqual(s.getvalue(), msgtxt) 3427 3428 def test_mime_classes_policy_argument(self): 3429 with openfile('audiotest.au', 'rb') as fp: 3430 audiodata = fp.read() 3431 with openfile('PyBanner048.gif', 'rb') as fp: 3432 bindata = fp.read() 3433 classes = [ 3434 (MIMEApplication, ('',)), 3435 (MIMEAudio, (audiodata,)), 3436 (MIMEImage, (bindata,)), 3437 (MIMEMessage, (Message(),)), 3438 (MIMENonMultipart, ('multipart', 'mixed')), 3439 (MIMEText, ('',)), 3440 ] 3441 for cls, constructor in classes: 3442 with self.subTest(cls=cls.__name__, policy='compat32'): 3443 m = cls(*constructor) 3444 self.assertIs(m.policy, email.policy.compat32) 3445 with self.subTest(cls=cls.__name__, policy='default'): 3446 m = cls(*constructor, policy=email.policy.default) 3447 self.assertIs(m.policy, email.policy.default) 3448 3449 3450# Test the iterator/generators 3451class TestIterators(TestEmailBase): 3452 def test_body_line_iterator(self): 3453 eq = self.assertEqual 3454 neq = self.ndiffAssertEqual 3455 # First a simple non-multipart message 3456 msg = self._msgobj('msg_01.txt') 3457 it = iterators.body_line_iterator(msg) 3458 lines = list(it) 3459 eq(len(lines), 6) 3460 neq(EMPTYSTRING.join(lines), msg.get_payload()) 3461 # Now a more complicated multipart 3462 msg = self._msgobj('msg_02.txt') 3463 it = iterators.body_line_iterator(msg) 3464 lines = list(it) 3465 eq(len(lines), 43) 3466 with openfile('msg_19.txt') as fp: 3467 neq(EMPTYSTRING.join(lines), fp.read()) 3468 3469 def test_typed_subpart_iterator(self): 3470 eq = self.assertEqual 3471 msg = self._msgobj('msg_04.txt') 3472 it = iterators.typed_subpart_iterator(msg, 'text') 3473 lines = [] 3474 subparts = 0 3475 for subpart in it: 3476 subparts += 1 3477 lines.append(subpart.get_payload()) 3478 eq(subparts, 2) 3479 eq(EMPTYSTRING.join(lines), """\ 3480a simple kind of mirror 3481to reflect upon our own 3482a simple kind of mirror 3483to reflect upon our own 3484""") 3485 3486 def test_typed_subpart_iterator_default_type(self): 3487 eq = self.assertEqual 3488 msg = self._msgobj('msg_03.txt') 3489 it = iterators.typed_subpart_iterator(msg, 'text', 'plain') 3490 lines = [] 3491 subparts = 0 3492 for subpart in it: 3493 subparts += 1 3494 lines.append(subpart.get_payload()) 3495 eq(subparts, 1) 3496 eq(EMPTYSTRING.join(lines), """\ 3497 3498Hi, 3499 3500Do you like this message? 3501 3502-Me 3503""") 3504 3505 def test_pushCR_LF(self): 3506 '''FeedParser BufferedSubFile.push() assumed it received complete 3507 line endings. A CR ending one push() followed by a LF starting 3508 the next push() added an empty line. 3509 ''' 3510 imt = [ 3511 ("a\r \n", 2), 3512 ("b", 0), 3513 ("c\n", 1), 3514 ("", 0), 3515 ("d\r\n", 1), 3516 ("e\r", 0), 3517 ("\nf", 1), 3518 ("\r\n", 1), 3519 ] 3520 from email.feedparser import BufferedSubFile, NeedMoreData 3521 bsf = BufferedSubFile() 3522 om = [] 3523 nt = 0 3524 for il, n in imt: 3525 bsf.push(il) 3526 nt += n 3527 n1 = 0 3528 for ol in iter(bsf.readline, NeedMoreData): 3529 om.append(ol) 3530 n1 += 1 3531 self.assertEqual(n, n1) 3532 self.assertEqual(len(om), nt) 3533 self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) 3534 3535 def test_push_random(self): 3536 from email.feedparser import BufferedSubFile, NeedMoreData 3537 3538 n = 10000 3539 chunksize = 5 3540 chars = 'abcd \t\r\n' 3541 3542 s = ''.join(choice(chars) for i in range(n)) + '\n' 3543 target = s.splitlines(True) 3544 3545 bsf = BufferedSubFile() 3546 lines = [] 3547 for i in range(0, len(s), chunksize): 3548 chunk = s[i:i+chunksize] 3549 bsf.push(chunk) 3550 lines.extend(iter(bsf.readline, NeedMoreData)) 3551 self.assertEqual(lines, target) 3552 3553 3554class TestFeedParsers(TestEmailBase): 3555 3556 def parse(self, chunks): 3557 feedparser = FeedParser() 3558 for chunk in chunks: 3559 feedparser.feed(chunk) 3560 return feedparser.close() 3561 3562 def test_empty_header_name_handled(self): 3563 # Issue 19996 3564 msg = self.parse("First: val\n: bad\nSecond: val") 3565 self.assertEqual(msg['First'], 'val') 3566 self.assertEqual(msg['Second'], 'val') 3567 3568 def test_newlines(self): 3569 m = self.parse(['a:\nb:\rc:\r\nd:\n']) 3570 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3571 m = self.parse(['a:\nb:\rc:\r\nd:']) 3572 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3573 m = self.parse(['a:\rb', 'c:\n']) 3574 self.assertEqual(m.keys(), ['a', 'bc']) 3575 m = self.parse(['a:\r', 'b:\n']) 3576 self.assertEqual(m.keys(), ['a', 'b']) 3577 m = self.parse(['a:\r', '\nb:\n']) 3578 self.assertEqual(m.keys(), ['a', 'b']) 3579 3580 # Only CR and LF should break header fields 3581 m = self.parse(['a:\x85b:\u2028c:\n']) 3582 self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')]) 3583 m = self.parse(['a:\r', 'b:\x85', 'c:\n']) 3584 self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')]) 3585 3586 def test_long_lines(self): 3587 # Expected peak memory use on 32-bit platform: 6*N*M bytes. 3588 M, N = 1000, 20000 3589 m = self.parse(['a:b\n\n'] + ['x'*M] * N) 3590 self.assertEqual(m.items(), [('a', 'b')]) 3591 self.assertEqual(m.get_payload(), 'x'*M*N) 3592 m = self.parse(['a:b\r\r'] + ['x'*M] * N) 3593 self.assertEqual(m.items(), [('a', 'b')]) 3594 self.assertEqual(m.get_payload(), 'x'*M*N) 3595 m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N) 3596 self.assertEqual(m.items(), [('a', 'b')]) 3597 self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N) 3598 m = self.parse(['a:\r', 'b: '] + ['x'*M] * N) 3599 self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)]) 3600 3601 3602class TestParsers(TestEmailBase): 3603 3604 def test_header_parser(self): 3605 eq = self.assertEqual 3606 # Parse only the headers of a complex multipart MIME document 3607 with openfile('msg_02.txt') as fp: 3608 msg = HeaderParser().parse(fp) 3609 eq(msg['from'], 'ppp-request@zzz.org') 3610 eq(msg['to'], 'ppp@zzz.org') 3611 eq(msg.get_content_type(), 'multipart/mixed') 3612 self.assertFalse(msg.is_multipart()) 3613 self.assertIsInstance(msg.get_payload(), str) 3614 3615 def test_bytes_header_parser(self): 3616 eq = self.assertEqual 3617 # Parse only the headers of a complex multipart MIME document 3618 with openfile('msg_02.txt', 'rb') as fp: 3619 msg = email.parser.BytesHeaderParser().parse(fp) 3620 eq(msg['from'], 'ppp-request@zzz.org') 3621 eq(msg['to'], 'ppp@zzz.org') 3622 eq(msg.get_content_type(), 'multipart/mixed') 3623 self.assertFalse(msg.is_multipart()) 3624 self.assertIsInstance(msg.get_payload(), str) 3625 self.assertIsInstance(msg.get_payload(decode=True), bytes) 3626 3627 def test_bytes_parser_does_not_close_file(self): 3628 with openfile('msg_02.txt', 'rb') as fp: 3629 email.parser.BytesParser().parse(fp) 3630 self.assertFalse(fp.closed) 3631 3632 def test_bytes_parser_on_exception_does_not_close_file(self): 3633 with openfile('msg_15.txt', 'rb') as fp: 3634 bytesParser = email.parser.BytesParser 3635 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3636 bytesParser(policy=email.policy.strict).parse, 3637 fp) 3638 self.assertFalse(fp.closed) 3639 3640 def test_parser_does_not_close_file(self): 3641 with openfile('msg_02.txt', 'r') as fp: 3642 email.parser.Parser().parse(fp) 3643 self.assertFalse(fp.closed) 3644 3645 def test_parser_on_exception_does_not_close_file(self): 3646 with openfile('msg_15.txt', 'r') as fp: 3647 parser = email.parser.Parser 3648 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3649 parser(policy=email.policy.strict).parse, fp) 3650 self.assertFalse(fp.closed) 3651 3652 def test_whitespace_continuation(self): 3653 eq = self.assertEqual 3654 # This message contains a line after the Subject: header that has only 3655 # whitespace, but it is not empty! 3656 msg = email.message_from_string("""\ 3657From: aperson@dom.ain 3658To: bperson@dom.ain 3659Subject: the next line has a space on it 3660\x20 3661Date: Mon, 8 Apr 2002 15:09:19 -0400 3662Message-ID: spam 3663 3664Here's the message body 3665""") 3666 eq(msg['subject'], 'the next line has a space on it\n ') 3667 eq(msg['message-id'], 'spam') 3668 eq(msg.get_payload(), "Here's the message body\n") 3669 3670 def test_whitespace_continuation_last_header(self): 3671 eq = self.assertEqual 3672 # Like the previous test, but the subject line is the last 3673 # header. 3674 msg = email.message_from_string("""\ 3675From: aperson@dom.ain 3676To: bperson@dom.ain 3677Date: Mon, 8 Apr 2002 15:09:19 -0400 3678Message-ID: spam 3679Subject: the next line has a space on it 3680\x20 3681 3682Here's the message body 3683""") 3684 eq(msg['subject'], 'the next line has a space on it\n ') 3685 eq(msg['message-id'], 'spam') 3686 eq(msg.get_payload(), "Here's the message body\n") 3687 3688 def test_crlf_separation(self): 3689 eq = self.assertEqual 3690 with openfile('msg_26.txt', newline='\n') as fp: 3691 msg = Parser().parse(fp) 3692 eq(len(msg.get_payload()), 2) 3693 part1 = msg.get_payload(0) 3694 eq(part1.get_content_type(), 'text/plain') 3695 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') 3696 part2 = msg.get_payload(1) 3697 eq(part2.get_content_type(), 'application/riscos') 3698 3699 def test_crlf_flatten(self): 3700 # Using newline='\n' preserves the crlfs in this input file. 3701 with openfile('msg_26.txt', newline='\n') as fp: 3702 text = fp.read() 3703 msg = email.message_from_string(text) 3704 s = StringIO() 3705 g = Generator(s) 3706 g.flatten(msg, linesep='\r\n') 3707 self.assertEqual(s.getvalue(), text) 3708 3709 maxDiff = None 3710 3711 def test_multipart_digest_with_extra_mime_headers(self): 3712 eq = self.assertEqual 3713 neq = self.ndiffAssertEqual 3714 with openfile('msg_28.txt') as fp: 3715 msg = email.message_from_file(fp) 3716 # Structure is: 3717 # multipart/digest 3718 # message/rfc822 3719 # text/plain 3720 # message/rfc822 3721 # text/plain 3722 eq(msg.is_multipart(), 1) 3723 eq(len(msg.get_payload()), 2) 3724 part1 = msg.get_payload(0) 3725 eq(part1.get_content_type(), 'message/rfc822') 3726 eq(part1.is_multipart(), 1) 3727 eq(len(part1.get_payload()), 1) 3728 part1a = part1.get_payload(0) 3729 eq(part1a.is_multipart(), 0) 3730 eq(part1a.get_content_type(), 'text/plain') 3731 neq(part1a.get_payload(), 'message 1\n') 3732 # next message/rfc822 3733 part2 = msg.get_payload(1) 3734 eq(part2.get_content_type(), 'message/rfc822') 3735 eq(part2.is_multipart(), 1) 3736 eq(len(part2.get_payload()), 1) 3737 part2a = part2.get_payload(0) 3738 eq(part2a.is_multipart(), 0) 3739 eq(part2a.get_content_type(), 'text/plain') 3740 neq(part2a.get_payload(), 'message 2\n') 3741 3742 def test_three_lines(self): 3743 # A bug report by Andrew McNamara 3744 lines = ['From: Andrew Person <aperson@dom.ain', 3745 'Subject: Test', 3746 'Date: Tue, 20 Aug 2002 16:43:45 +1000'] 3747 msg = email.message_from_string(NL.join(lines)) 3748 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000') 3749 3750 def test_strip_line_feed_and_carriage_return_in_headers(self): 3751 eq = self.assertEqual 3752 # For [ 1002475 ] email message parser doesn't handle \r\n correctly 3753 value1 = 'text' 3754 value2 = 'more text' 3755 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % ( 3756 value1, value2) 3757 msg = email.message_from_string(m) 3758 eq(msg.get('Header'), value1) 3759 eq(msg.get('Next-Header'), value2) 3760 3761 def test_rfc2822_header_syntax(self): 3762 eq = self.assertEqual 3763 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3764 msg = email.message_from_string(m) 3765 eq(len(msg), 3) 3766 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From']) 3767 eq(msg.get_payload(), 'body') 3768 3769 def test_rfc2822_space_not_allowed_in_header(self): 3770 eq = self.assertEqual 3771 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3772 msg = email.message_from_string(m) 3773 eq(len(msg.keys()), 0) 3774 3775 def test_rfc2822_one_character_header(self): 3776 eq = self.assertEqual 3777 m = 'A: first header\nB: second header\nCC: third header\n\nbody' 3778 msg = email.message_from_string(m) 3779 headers = msg.keys() 3780 headers.sort() 3781 eq(headers, ['A', 'B', 'CC']) 3782 eq(msg.get_payload(), 'body') 3783 3784 def test_CRLFLF_at_end_of_part(self): 3785 # issue 5610: feedparser should not eat two chars from body part ending 3786 # with "\r\n\n". 3787 m = ( 3788 "From: foo@bar.com\n" 3789 "To: baz\n" 3790 "Mime-Version: 1.0\n" 3791 "Content-Type: multipart/mixed; boundary=BOUNDARY\n" 3792 "\n" 3793 "--BOUNDARY\n" 3794 "Content-Type: text/plain\n" 3795 "\n" 3796 "body ending with CRLF newline\r\n" 3797 "\n" 3798 "--BOUNDARY--\n" 3799 ) 3800 msg = email.message_from_string(m) 3801 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) 3802 3803 3804class Test8BitBytesHandling(TestEmailBase): 3805 # In Python3 all input is string, but that doesn't work if the actual input 3806 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we 3807 # decode byte streams using the surrogateescape error handler, and 3808 # reconvert to binary at appropriate places if we detect surrogates. This 3809 # doesn't allow us to transform headers with 8bit bytes (they get munged), 3810 # but it does allow us to parse and preserve them, and to decode body 3811 # parts that use an 8bit CTE. 3812 3813 bodytest_msg = textwrap.dedent("""\ 3814 From: foo@bar.com 3815 To: baz 3816 Mime-Version: 1.0 3817 Content-Type: text/plain; charset={charset} 3818 Content-Transfer-Encoding: {cte} 3819 3820 {bodyline} 3821 """) 3822 3823 def test_known_8bit_CTE(self): 3824 m = self.bodytest_msg.format(charset='utf-8', 3825 cte='8bit', 3826 bodyline='pöstal').encode('utf-8') 3827 msg = email.message_from_bytes(m) 3828 self.assertEqual(msg.get_payload(), "pöstal\n") 3829 self.assertEqual(msg.get_payload(decode=True), 3830 "pöstal\n".encode('utf-8')) 3831 3832 def test_unknown_8bit_CTE(self): 3833 m = self.bodytest_msg.format(charset='notavalidcharset', 3834 cte='8bit', 3835 bodyline='pöstal').encode('utf-8') 3836 msg = email.message_from_bytes(m) 3837 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") 3838 self.assertEqual(msg.get_payload(decode=True), 3839 "pöstal\n".encode('utf-8')) 3840 3841 def test_8bit_in_quopri_body(self): 3842 # This is non-RFC compliant data...without 'decode' the library code 3843 # decodes the body using the charset from the headers, and because the 3844 # source byte really is utf-8 this works. This is likely to fail 3845 # against real dirty data (ie: produce mojibake), but the data is 3846 # invalid anyway so it is as good a guess as any. But this means that 3847 # this test just confirms the current behavior; that behavior is not 3848 # necessarily the best possible behavior. With 'decode' it is 3849 # returning the raw bytes, so that test should be of correct behavior, 3850 # or at least produce the same result that email4 did. 3851 m = self.bodytest_msg.format(charset='utf-8', 3852 cte='quoted-printable', 3853 bodyline='p=C3=B6stál').encode('utf-8') 3854 msg = email.message_from_bytes(m) 3855 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n') 3856 self.assertEqual(msg.get_payload(decode=True), 3857 'pöstál\n'.encode('utf-8')) 3858 3859 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self): 3860 # This is similar to the previous test, but proves that if the 8bit 3861 # byte is undecodeable in the specified charset, it gets replaced 3862 # by the unicode 'unknown' character. Again, this may or may not 3863 # be the ideal behavior. Note that if decode=False none of the 3864 # decoders will get involved, so this is the only test we need 3865 # for this behavior. 3866 m = self.bodytest_msg.format(charset='ascii', 3867 cte='quoted-printable', 3868 bodyline='p=C3=B6stál').encode('utf-8') 3869 msg = email.message_from_bytes(m) 3870 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') 3871 self.assertEqual(msg.get_payload(decode=True), 3872 'pöstál\n'.encode('utf-8')) 3873 3874 # test_defect_handling:test_invalid_chars_in_base64_payload 3875 def test_8bit_in_base64_body(self): 3876 # If we get 8bit bytes in a base64 body, we can just ignore them 3877 # as being outside the base64 alphabet and decode anyway. But 3878 # we register a defect. 3879 m = self.bodytest_msg.format(charset='utf-8', 3880 cte='base64', 3881 bodyline='cMO2c3RhbAá=').encode('utf-8') 3882 msg = email.message_from_bytes(m) 3883 self.assertEqual(msg.get_payload(decode=True), 3884 'pöstal'.encode('utf-8')) 3885 self.assertIsInstance(msg.defects[0], 3886 errors.InvalidBase64CharactersDefect) 3887 3888 def test_8bit_in_uuencode_body(self): 3889 # Sticking an 8bit byte in a uuencode block makes it undecodable by 3890 # normal means, so the block is returned undecoded, but as bytes. 3891 m = self.bodytest_msg.format(charset='utf-8', 3892 cte='uuencode', 3893 bodyline='<,.V<W1A; á ').encode('utf-8') 3894 msg = email.message_from_bytes(m) 3895 self.assertEqual(msg.get_payload(decode=True), 3896 '<,.V<W1A; á \n'.encode('utf-8')) 3897 3898 3899 headertest_headers = ( 3900 ('From: foo@bar.com', ('From', 'foo@bar.com')), 3901 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')), 3902 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n' 3903 '\tJean de Baddie', 3904 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 3905 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n' 3906 ' =?unknown-8bit?q?_Jean_de_Baddie?=')), 3907 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')), 3908 ) 3909 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) + 3910 '\nYes, they are flying.\n').encode('utf-8') 3911 3912 def test_get_8bit_header(self): 3913 msg = email.message_from_bytes(self.headertest_msg) 3914 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz') 3915 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz') 3916 3917 def test_print_8bit_headers(self): 3918 msg = email.message_from_bytes(self.headertest_msg) 3919 self.assertEqual(str(msg), 3920 textwrap.dedent("""\ 3921 From: {} 3922 To: {} 3923 Subject: {} 3924 From: {} 3925 3926 Yes, they are flying. 3927 """).format(*[expected[1] for (_, expected) in 3928 self.headertest_headers])) 3929 3930 def test_values_with_8bit_headers(self): 3931 msg = email.message_from_bytes(self.headertest_msg) 3932 self.assertListEqual([str(x) for x in msg.values()], 3933 ['foo@bar.com', 3934 'b\uFFFD\uFFFDz', 3935 'Maintenant je vous pr\uFFFD\uFFFDsente mon ' 3936 'coll\uFFFD\uFFFDgue, le pouf ' 3937 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3938 '\tJean de Baddie', 3939 "g\uFFFD\uFFFDst"]) 3940 3941 def test_items_with_8bit_headers(self): 3942 msg = email.message_from_bytes(self.headertest_msg) 3943 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()], 3944 [('From', 'foo@bar.com'), 3945 ('To', 'b\uFFFD\uFFFDz'), 3946 ('Subject', 'Maintenant je vous ' 3947 'pr\uFFFD\uFFFDsente ' 3948 'mon coll\uFFFD\uFFFDgue, le pouf ' 3949 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3950 '\tJean de Baddie'), 3951 ('From', 'g\uFFFD\uFFFDst')]) 3952 3953 def test_get_all_with_8bit_headers(self): 3954 msg = email.message_from_bytes(self.headertest_msg) 3955 self.assertListEqual([str(x) for x in msg.get_all('from')], 3956 ['foo@bar.com', 3957 'g\uFFFD\uFFFDst']) 3958 3959 def test_get_content_type_with_8bit(self): 3960 msg = email.message_from_bytes(textwrap.dedent("""\ 3961 Content-Type: text/pl\xA7in; charset=utf-8 3962 """).encode('latin-1')) 3963 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") 3964 self.assertEqual(msg.get_content_maintype(), "text") 3965 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") 3966 3967 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params 3968 def test_get_params_with_8bit(self): 3969 msg = email.message_from_bytes( 3970 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) 3971 self.assertEqual(msg.get_params(header='x-header'), 3972 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) 3973 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') 3974 # XXX: someday you might be able to get 'b\xa7r', for now you can't. 3975 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) 3976 3977 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value 3978 def test_get_rfc2231_params_with_8bit(self): 3979 msg = email.message_from_bytes(textwrap.dedent("""\ 3980 Content-Type: text/plain; charset=us-ascii; 3981 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3982 ).encode('latin-1')) 3983 self.assertEqual(msg.get_param('title'), 3984 ('us-ascii', 'en', 'This is not f\uFFFDn')) 3985 3986 def test_set_rfc2231_params_with_8bit(self): 3987 msg = email.message_from_bytes(textwrap.dedent("""\ 3988 Content-Type: text/plain; charset=us-ascii; 3989 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3990 ).encode('latin-1')) 3991 msg.set_param('title', 'test') 3992 self.assertEqual(msg.get_param('title'), 'test') 3993 3994 def test_del_rfc2231_params_with_8bit(self): 3995 msg = email.message_from_bytes(textwrap.dedent("""\ 3996 Content-Type: text/plain; charset=us-ascii; 3997 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3998 ).encode('latin-1')) 3999 msg.del_param('title') 4000 self.assertEqual(msg.get_param('title'), None) 4001 self.assertEqual(msg.get_content_maintype(), 'text') 4002 4003 def test_get_payload_with_8bit_cte_header(self): 4004 msg = email.message_from_bytes(textwrap.dedent("""\ 4005 Content-Transfer-Encoding: b\xa7se64 4006 Content-Type: text/plain; charset=latin-1 4007 4008 payload 4009 """).encode('latin-1')) 4010 self.assertEqual(msg.get_payload(), 'payload\n') 4011 self.assertEqual(msg.get_payload(decode=True), b'payload\n') 4012 4013 non_latin_bin_msg = textwrap.dedent("""\ 4014 From: foo@bar.com 4015 To: báz 4016 Subject: Maintenant je vous présente mon collègue, le pouf célèbre 4017 \tJean de Baddie 4018 Mime-Version: 1.0 4019 Content-Type: text/plain; charset="utf-8" 4020 Content-Transfer-Encoding: 8bit 4021 4022 Да, они летят. 4023 """).encode('utf-8') 4024 4025 def test_bytes_generator(self): 4026 msg = email.message_from_bytes(self.non_latin_bin_msg) 4027 out = BytesIO() 4028 email.generator.BytesGenerator(out).flatten(msg) 4029 self.assertEqual(out.getvalue(), self.non_latin_bin_msg) 4030 4031 def test_bytes_generator_handles_None_body(self): 4032 #Issue 11019 4033 msg = email.message.Message() 4034 out = BytesIO() 4035 email.generator.BytesGenerator(out).flatten(msg) 4036 self.assertEqual(out.getvalue(), b"\n") 4037 4038 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ 4039 From: foo@bar.com 4040 To: =?unknown-8bit?q?b=C3=A1z?= 4041 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= 4042 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?= 4043 =?unknown-8bit?q?_Jean_de_Baddie?= 4044 Mime-Version: 1.0 4045 Content-Type: text/plain; charset="utf-8" 4046 Content-Transfer-Encoding: base64 4047 4048 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== 4049 """) 4050 4051 def test_generator_handles_8bit(self): 4052 msg = email.message_from_bytes(self.non_latin_bin_msg) 4053 out = StringIO() 4054 email.generator.Generator(out).flatten(msg) 4055 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) 4056 4057 def test_str_generator_should_not_mutate_msg_when_handling_8bit(self): 4058 msg = email.message_from_bytes(self.non_latin_bin_msg) 4059 out = BytesIO() 4060 BytesGenerator(out).flatten(msg) 4061 orig_value = out.getvalue() 4062 Generator(StringIO()).flatten(msg) # Should not mutate msg! 4063 out = BytesIO() 4064 BytesGenerator(out).flatten(msg) 4065 self.assertEqual(out.getvalue(), orig_value) 4066 4067 def test_bytes_generator_with_unix_from(self): 4068 # The unixfrom contains a current date, so we can't check it 4069 # literally. Just make sure the first word is 'From' and the 4070 # rest of the message matches the input. 4071 msg = email.message_from_bytes(self.non_latin_bin_msg) 4072 out = BytesIO() 4073 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True) 4074 lines = out.getvalue().split(b'\n') 4075 self.assertEqual(lines[0].split()[0], b'From') 4076 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg) 4077 4078 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n') 4079 non_latin_bin_msg_as7bit[2:4] = [ 4080 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 4081 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?='] 4082 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit) 4083 4084 def test_message_from_binary_file(self): 4085 fn = 'test.msg' 4086 self.addCleanup(unlink, fn) 4087 with open(fn, 'wb') as testfile: 4088 testfile.write(self.non_latin_bin_msg) 4089 with open(fn, 'rb') as testfile: 4090 m = email.parser.BytesParser().parse(testfile) 4091 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) 4092 4093 latin_bin_msg = textwrap.dedent("""\ 4094 From: foo@bar.com 4095 To: Dinsdale 4096 Subject: Nudge nudge, wink, wink 4097 Mime-Version: 1.0 4098 Content-Type: text/plain; charset="latin-1" 4099 Content-Transfer-Encoding: 8bit 4100 4101 oh là là, know what I mean, know what I mean? 4102 """).encode('latin-1') 4103 4104 latin_bin_msg_as7bit = textwrap.dedent("""\ 4105 From: foo@bar.com 4106 To: Dinsdale 4107 Subject: Nudge nudge, wink, wink 4108 Mime-Version: 1.0 4109 Content-Type: text/plain; charset="iso-8859-1" 4110 Content-Transfer-Encoding: quoted-printable 4111 4112 oh l=E0 l=E0, know what I mean, know what I mean? 4113 """) 4114 4115 def test_string_generator_reencodes_to_quopri_when_appropriate(self): 4116 m = email.message_from_bytes(self.latin_bin_msg) 4117 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4118 4119 def test_decoded_generator_emits_unicode_body(self): 4120 m = email.message_from_bytes(self.latin_bin_msg) 4121 out = StringIO() 4122 email.generator.DecodedGenerator(out).flatten(m) 4123 #DecodedHeader output contains an extra blank line compared 4124 #to the input message. RDM: not sure if this is a bug or not, 4125 #but it is not specific to the 8bit->7bit conversion. 4126 self.assertEqual(out.getvalue(), 4127 self.latin_bin_msg.decode('latin-1')+'\n') 4128 4129 def test_bytes_feedparser(self): 4130 bfp = email.feedparser.BytesFeedParser() 4131 for i in range(0, len(self.latin_bin_msg), 10): 4132 bfp.feed(self.latin_bin_msg[i:i+10]) 4133 m = bfp.close() 4134 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4135 4136 def test_crlf_flatten(self): 4137 with openfile('msg_26.txt', 'rb') as fp: 4138 text = fp.read() 4139 msg = email.message_from_bytes(text) 4140 s = BytesIO() 4141 g = email.generator.BytesGenerator(s) 4142 g.flatten(msg, linesep='\r\n') 4143 self.assertEqual(s.getvalue(), text) 4144 4145 def test_8bit_multipart(self): 4146 # Issue 11605 4147 source = textwrap.dedent("""\ 4148 Date: Fri, 18 Mar 2011 17:15:43 +0100 4149 To: foo@example.com 4150 From: foodwatch-Newsletter <bar@example.com> 4151 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System 4152 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain> 4153 MIME-Version: 1.0 4154 Content-Type: multipart/alternative; 4155 boundary="b1_76a486bee62b0d200f33dc2ca08220ad" 4156 4157 --b1_76a486bee62b0d200f33dc2ca08220ad 4158 Content-Type: text/plain; charset="utf-8" 4159 Content-Transfer-Encoding: 8bit 4160 4161 Guten Tag, , 4162 4163 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die 4164 Nachrichten aus Japan. 4165 4166 4167 --b1_76a486bee62b0d200f33dc2ca08220ad 4168 Content-Type: text/html; charset="utf-8" 4169 Content-Transfer-Encoding: 8bit 4170 4171 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 4172 "http://www.w3.org/TR/html4/loose.dtd"> 4173 <html lang="de"> 4174 <head> 4175 <title>foodwatch - Newsletter</title> 4176 </head> 4177 <body> 4178 <p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team 4179 die Nachrichten aus Japan.</p> 4180 </body> 4181 </html> 4182 --b1_76a486bee62b0d200f33dc2ca08220ad-- 4183 4184 """).encode('utf-8') 4185 msg = email.message_from_bytes(source) 4186 s = BytesIO() 4187 g = email.generator.BytesGenerator(s) 4188 g.flatten(msg) 4189 self.assertEqual(s.getvalue(), source) 4190 4191 def test_bytes_generator_b_encoding_linesep(self): 4192 # Issue 14062: b encoding was tacking on an extra \n. 4193 m = Message() 4194 # This has enough non-ascii that it should always end up b encoded. 4195 m['Subject'] = Header('žluťoučký kůň') 4196 s = BytesIO() 4197 g = email.generator.BytesGenerator(s) 4198 g.flatten(m, linesep='\r\n') 4199 self.assertEqual( 4200 s.getvalue(), 4201 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4202 4203 def test_generator_b_encoding_linesep(self): 4204 # Since this broke in ByteGenerator, test Generator for completeness. 4205 m = Message() 4206 # This has enough non-ascii that it should always end up b encoded. 4207 m['Subject'] = Header('žluťoučký kůň') 4208 s = StringIO() 4209 g = email.generator.Generator(s) 4210 g.flatten(m, linesep='\r\n') 4211 self.assertEqual( 4212 s.getvalue(), 4213 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4214 4215 maxDiff = None 4216 4217 4218class BaseTestBytesGeneratorIdempotent: 4219 4220 maxDiff = None 4221 4222 def _msgobj(self, filename): 4223 with openfile(filename, 'rb') as fp: 4224 data = fp.read() 4225 data = self.normalize_linesep_regex.sub(self.blinesep, data) 4226 msg = email.message_from_bytes(data) 4227 return msg, data 4228 4229 def _idempotent(self, msg, data, unixfrom=False): 4230 b = BytesIO() 4231 g = email.generator.BytesGenerator(b, maxheaderlen=0) 4232 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) 4233 self.assertEqual(data, b.getvalue()) 4234 4235 4236class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent, 4237 TestIdempotent): 4238 linesep = '\n' 4239 blinesep = b'\n' 4240 normalize_linesep_regex = re.compile(br'\r\n') 4241 4242 4243class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent, 4244 TestIdempotent): 4245 linesep = '\r\n' 4246 blinesep = b'\r\n' 4247 normalize_linesep_regex = re.compile(br'(?<!\r)\n') 4248 4249 4250class TestBase64(unittest.TestCase): 4251 def test_len(self): 4252 eq = self.assertEqual 4253 eq(base64mime.header_length('hello'), 4254 len(base64mime.body_encode(b'hello', eol=''))) 4255 for size in range(15): 4256 if size == 0 : bsize = 0 4257 elif size <= 3 : bsize = 4 4258 elif size <= 6 : bsize = 8 4259 elif size <= 9 : bsize = 12 4260 elif size <= 12: bsize = 16 4261 else : bsize = 20 4262 eq(base64mime.header_length('x' * size), bsize) 4263 4264 def test_decode(self): 4265 eq = self.assertEqual 4266 eq(base64mime.decode(''), b'') 4267 eq(base64mime.decode('aGVsbG8='), b'hello') 4268 4269 def test_encode(self): 4270 eq = self.assertEqual 4271 eq(base64mime.body_encode(b''), b'') 4272 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n') 4273 # Test the binary flag 4274 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n') 4275 # Test the maxlinelen arg 4276 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\ 4277eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4278eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4279eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4280eHh4eCB4eHh4IA== 4281""") 4282 # Test the eol argument 4283 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4284 """\ 4285eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4286eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4287eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4288eHh4eCB4eHh4IA==\r 4289""") 4290 4291 def test_header_encode(self): 4292 eq = self.assertEqual 4293 he = base64mime.header_encode 4294 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') 4295 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') 4296 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4297 # Test the charset option 4298 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') 4299 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4300 4301 4302 4303class TestQuopri(unittest.TestCase): 4304 def setUp(self): 4305 # Set of characters (as byte integers) that don't need to be encoded 4306 # in headers. 4307 self.hlit = list(chain( 4308 range(ord('a'), ord('z') + 1), 4309 range(ord('A'), ord('Z') + 1), 4310 range(ord('0'), ord('9') + 1), 4311 (c for c in b'!*+-/'))) 4312 # Set of characters (as byte integers) that do need to be encoded in 4313 # headers. 4314 self.hnon = [c for c in range(256) if c not in self.hlit] 4315 assert len(self.hlit) + len(self.hnon) == 256 4316 # Set of characters (as byte integers) that don't need to be encoded 4317 # in bodies. 4318 self.blit = list(range(ord(' '), ord('~') + 1)) 4319 self.blit.append(ord('\t')) 4320 self.blit.remove(ord('=')) 4321 # Set of characters (as byte integers) that do need to be encoded in 4322 # bodies. 4323 self.bnon = [c for c in range(256) if c not in self.blit] 4324 assert len(self.blit) + len(self.bnon) == 256 4325 4326 def test_quopri_header_check(self): 4327 for c in self.hlit: 4328 self.assertFalse(quoprimime.header_check(c), 4329 'Should not be header quopri encoded: %s' % chr(c)) 4330 for c in self.hnon: 4331 self.assertTrue(quoprimime.header_check(c), 4332 'Should be header quopri encoded: %s' % chr(c)) 4333 4334 def test_quopri_body_check(self): 4335 for c in self.blit: 4336 self.assertFalse(quoprimime.body_check(c), 4337 'Should not be body quopri encoded: %s' % chr(c)) 4338 for c in self.bnon: 4339 self.assertTrue(quoprimime.body_check(c), 4340 'Should be body quopri encoded: %s' % chr(c)) 4341 4342 def test_header_quopri_len(self): 4343 eq = self.assertEqual 4344 eq(quoprimime.header_length(b'hello'), 5) 4345 # RFC 2047 chrome is not included in header_length(). 4346 eq(len(quoprimime.header_encode(b'hello', charset='xxx')), 4347 quoprimime.header_length(b'hello') + 4348 # =?xxx?q?...?= means 10 extra characters 4349 10) 4350 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20) 4351 # RFC 2047 chrome is not included in header_length(). 4352 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')), 4353 quoprimime.header_length(b'h@e@l@l@o@') + 4354 # =?xxx?q?...?= means 10 extra characters 4355 10) 4356 for c in self.hlit: 4357 eq(quoprimime.header_length(bytes([c])), 1, 4358 'expected length 1 for %r' % chr(c)) 4359 for c in self.hnon: 4360 # Space is special; it's encoded to _ 4361 if c == ord(' '): 4362 continue 4363 eq(quoprimime.header_length(bytes([c])), 3, 4364 'expected length 3 for %r' % chr(c)) 4365 eq(quoprimime.header_length(b' '), 1) 4366 4367 def test_body_quopri_len(self): 4368 eq = self.assertEqual 4369 for c in self.blit: 4370 eq(quoprimime.body_length(bytes([c])), 1) 4371 for c in self.bnon: 4372 eq(quoprimime.body_length(bytes([c])), 3) 4373 4374 def test_quote_unquote_idempotent(self): 4375 for x in range(256): 4376 c = chr(x) 4377 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) 4378 4379 def _test_header_encode(self, header, expected_encoded_header, charset=None): 4380 if charset is None: 4381 encoded_header = quoprimime.header_encode(header) 4382 else: 4383 encoded_header = quoprimime.header_encode(header, charset) 4384 self.assertEqual(encoded_header, expected_encoded_header) 4385 4386 def test_header_encode_null(self): 4387 self._test_header_encode(b'', '') 4388 4389 def test_header_encode_one_word(self): 4390 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=') 4391 4392 def test_header_encode_two_lines(self): 4393 self._test_header_encode(b'hello\nworld', 4394 '=?iso-8859-1?q?hello=0Aworld?=') 4395 4396 def test_header_encode_non_ascii(self): 4397 self._test_header_encode(b'hello\xc7there', 4398 '=?iso-8859-1?q?hello=C7there?=') 4399 4400 def test_header_encode_alt_charset(self): 4401 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=', 4402 charset='iso-8859-2') 4403 4404 def _test_header_decode(self, encoded_header, expected_decoded_header): 4405 decoded_header = quoprimime.header_decode(encoded_header) 4406 self.assertEqual(decoded_header, expected_decoded_header) 4407 4408 def test_header_decode_null(self): 4409 self._test_header_decode('', '') 4410 4411 def test_header_decode_one_word(self): 4412 self._test_header_decode('hello', 'hello') 4413 4414 def test_header_decode_two_lines(self): 4415 self._test_header_decode('hello=0Aworld', 'hello\nworld') 4416 4417 def test_header_decode_non_ascii(self): 4418 self._test_header_decode('hello=C7there', 'hello\xc7there') 4419 4420 def test_header_decode_re_bug_18380(self): 4421 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position 4422 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257) 4423 4424 def _test_decode(self, encoded, expected_decoded, eol=None): 4425 if eol is None: 4426 decoded = quoprimime.decode(encoded) 4427 else: 4428 decoded = quoprimime.decode(encoded, eol=eol) 4429 self.assertEqual(decoded, expected_decoded) 4430 4431 def test_decode_null_word(self): 4432 self._test_decode('', '') 4433 4434 def test_decode_null_line_null_word(self): 4435 self._test_decode('\r\n', '\n') 4436 4437 def test_decode_one_word(self): 4438 self._test_decode('hello', 'hello') 4439 4440 def test_decode_one_word_eol(self): 4441 self._test_decode('hello', 'hello', eol='X') 4442 4443 def test_decode_one_line(self): 4444 self._test_decode('hello\r\n', 'hello\n') 4445 4446 def test_decode_one_line_lf(self): 4447 self._test_decode('hello\n', 'hello\n') 4448 4449 def test_decode_one_line_cr(self): 4450 self._test_decode('hello\r', 'hello\n') 4451 4452 def test_decode_one_line_nl(self): 4453 self._test_decode('hello\n', 'helloX', eol='X') 4454 4455 def test_decode_one_line_crnl(self): 4456 self._test_decode('hello\r\n', 'helloX', eol='X') 4457 4458 def test_decode_one_line_one_word(self): 4459 self._test_decode('hello\r\nworld', 'hello\nworld') 4460 4461 def test_decode_one_line_one_word_eol(self): 4462 self._test_decode('hello\r\nworld', 'helloXworld', eol='X') 4463 4464 def test_decode_two_lines(self): 4465 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n') 4466 4467 def test_decode_two_lines_eol(self): 4468 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X') 4469 4470 def test_decode_one_long_line(self): 4471 self._test_decode('Spam' * 250, 'Spam' * 250) 4472 4473 def test_decode_one_space(self): 4474 self._test_decode(' ', '') 4475 4476 def test_decode_multiple_spaces(self): 4477 self._test_decode(' ' * 5, '') 4478 4479 def test_decode_one_line_trailing_spaces(self): 4480 self._test_decode('hello \r\n', 'hello\n') 4481 4482 def test_decode_two_lines_trailing_spaces(self): 4483 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n') 4484 4485 def test_decode_quoted_word(self): 4486 self._test_decode('=22quoted=20words=22', '"quoted words"') 4487 4488 def test_decode_uppercase_quoting(self): 4489 self._test_decode('ab=CD=EF', 'ab\xcd\xef') 4490 4491 def test_decode_lowercase_quoting(self): 4492 self._test_decode('ab=cd=ef', 'ab\xcd\xef') 4493 4494 def test_decode_soft_line_break(self): 4495 self._test_decode('soft line=\r\nbreak', 'soft linebreak') 4496 4497 def test_decode_false_quoting(self): 4498 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2') 4499 4500 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None): 4501 kwargs = {} 4502 if maxlinelen is None: 4503 # Use body_encode's default. 4504 maxlinelen = 76 4505 else: 4506 kwargs['maxlinelen'] = maxlinelen 4507 if eol is None: 4508 # Use body_encode's default. 4509 eol = '\n' 4510 else: 4511 kwargs['eol'] = eol 4512 encoded_body = quoprimime.body_encode(body, **kwargs) 4513 self.assertEqual(encoded_body, expected_encoded_body) 4514 if eol == '\n' or eol == '\r\n': 4515 # We know how to split the result back into lines, so maxlinelen 4516 # can be checked. 4517 for line in encoded_body.splitlines(): 4518 self.assertLessEqual(len(line), maxlinelen) 4519 4520 def test_encode_null(self): 4521 self._test_encode('', '') 4522 4523 def test_encode_null_lines(self): 4524 self._test_encode('\n\n', '\n\n') 4525 4526 def test_encode_one_line(self): 4527 self._test_encode('hello\n', 'hello\n') 4528 4529 def test_encode_one_line_crlf(self): 4530 self._test_encode('hello\r\n', 'hello\n') 4531 4532 def test_encode_one_line_eol(self): 4533 self._test_encode('hello\n', 'hello\r\n', eol='\r\n') 4534 4535 def test_encode_one_line_eol_after_non_ascii(self): 4536 # issue 20206; see changeset 0cf700464177 for why the encode/decode. 4537 self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'), 4538 'hello=CF=85\r\n', eol='\r\n') 4539 4540 def test_encode_one_space(self): 4541 self._test_encode(' ', '=20') 4542 4543 def test_encode_one_line_one_space(self): 4544 self._test_encode(' \n', '=20\n') 4545 4546# XXX: body_encode() expect strings, but uses ord(char) from these strings 4547# to index into a 256-entry list. For code points above 255, this will fail. 4548# Should there be a check for 8-bit only ord() values in body, or at least 4549# a comment about the expected input? 4550 4551 def test_encode_two_lines_one_space(self): 4552 self._test_encode(' \n \n', '=20\n=20\n') 4553 4554 def test_encode_one_word_trailing_spaces(self): 4555 self._test_encode('hello ', 'hello =20') 4556 4557 def test_encode_one_line_trailing_spaces(self): 4558 self._test_encode('hello \n', 'hello =20\n') 4559 4560 def test_encode_one_word_trailing_tab(self): 4561 self._test_encode('hello \t', 'hello =09') 4562 4563 def test_encode_one_line_trailing_tab(self): 4564 self._test_encode('hello \t\n', 'hello =09\n') 4565 4566 def test_encode_trailing_space_before_maxlinelen(self): 4567 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) 4568 4569 def test_encode_trailing_space_at_maxlinelen(self): 4570 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) 4571 4572 def test_encode_trailing_space_beyond_maxlinelen(self): 4573 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) 4574 4575 def test_encode_whitespace_lines(self): 4576 self._test_encode(' \n' * 5, '=20\n' * 5) 4577 4578 def test_encode_quoted_equals(self): 4579 self._test_encode('a = b', 'a =3D b') 4580 4581 def test_encode_one_long_string(self): 4582 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25) 4583 4584 def test_encode_one_long_line(self): 4585 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') 4586 4587 def test_encode_one_very_long_line(self): 4588 self._test_encode('x' * 200 + '\n', 4589 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n') 4590 4591 def test_encode_shortest_maxlinelen(self): 4592 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) 4593 4594 def test_encode_maxlinelen_too_small(self): 4595 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3) 4596 4597 def test_encode(self): 4598 eq = self.assertEqual 4599 eq(quoprimime.body_encode(''), '') 4600 eq(quoprimime.body_encode('hello'), 'hello') 4601 # Test the binary flag 4602 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld') 4603 # Test the maxlinelen arg 4604 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\ 4605xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= 4606 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= 4607x xxxx xxxx xxxx xxxx=20""") 4608 # Test the eol argument 4609 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4610 """\ 4611xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r 4612 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r 4613x xxxx xxxx xxxx xxxx=20""") 4614 eq(quoprimime.body_encode("""\ 4615one line 4616 4617two line"""), """\ 4618one line 4619 4620two line""") 4621 4622 4623 4624# Test the Charset class 4625class TestCharset(unittest.TestCase): 4626 def tearDown(self): 4627 from email import charset as CharsetModule 4628 try: 4629 del CharsetModule.CHARSETS['fake'] 4630 except KeyError: 4631 pass 4632 4633 def test_codec_encodeable(self): 4634 eq = self.assertEqual 4635 # Make sure us-ascii = no Unicode conversion 4636 c = Charset('us-ascii') 4637 eq(c.header_encode('Hello World!'), 'Hello World!') 4638 # Test 8-bit idempotency with us-ascii 4639 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' 4640 self.assertRaises(UnicodeError, c.header_encode, s) 4641 c = Charset('utf-8') 4642 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=') 4643 4644 def test_body_encode(self): 4645 eq = self.assertEqual 4646 # Try a charset with QP body encoding 4647 c = Charset('iso-8859-1') 4648 eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) 4649 # Try a charset with Base64 body encoding 4650 c = Charset('utf-8') 4651 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world')) 4652 # Try a charset with None body encoding 4653 c = Charset('us-ascii') 4654 eq('hello world', c.body_encode('hello world')) 4655 # Try the convert argument, where input codec != output codec 4656 c = Charset('euc-jp') 4657 # With apologies to Tokio Kikuchi ;) 4658 # XXX FIXME 4659## try: 4660## eq('\x1b$B5FCO;~IW\x1b(B', 4661## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) 4662## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', 4663## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) 4664## except LookupError: 4665## # We probably don't have the Japanese codecs installed 4666## pass 4667 # Testing SF bug #625509, which we have to fake, since there are no 4668 # built-in encodings where the header encoding is QP but the body 4669 # encoding is not. 4670 from email import charset as CharsetModule 4671 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8') 4672 c = Charset('fake') 4673 eq('hello world', c.body_encode('hello world')) 4674 4675 def test_unicode_charset_name(self): 4676 charset = Charset('us-ascii') 4677 self.assertEqual(str(charset), 'us-ascii') 4678 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') 4679 4680 4681 4682# Test multilingual MIME headers. 4683class TestHeader(TestEmailBase): 4684 def test_simple(self): 4685 eq = self.ndiffAssertEqual 4686 h = Header('Hello World!') 4687 eq(h.encode(), 'Hello World!') 4688 h.append(' Goodbye World!') 4689 eq(h.encode(), 'Hello World! Goodbye World!') 4690 4691 def test_simple_surprise(self): 4692 eq = self.ndiffAssertEqual 4693 h = Header('Hello World!') 4694 eq(h.encode(), 'Hello World!') 4695 h.append('Goodbye World!') 4696 eq(h.encode(), 'Hello World! Goodbye World!') 4697 4698 def test_header_needs_no_decoding(self): 4699 h = 'no decoding needed' 4700 self.assertEqual(decode_header(h), [(h, None)]) 4701 4702 def test_long(self): 4703 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", 4704 maxlinelen=76) 4705 for l in h.encode(splitchars=' ').split('\n '): 4706 self.assertLessEqual(len(l), 76) 4707 4708 def test_multilingual(self): 4709 eq = self.ndiffAssertEqual 4710 g = Charset("iso-8859-1") 4711 cz = Charset("iso-8859-2") 4712 utf8 = Charset("utf-8") 4713 g_head = (b'Die Mieter treten hier ein werden mit einem ' 4714 b'Foerderband komfortabel den Korridor entlang, ' 4715 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, ' 4716 b'gegen die rotierenden Klingen bef\xf6rdert. ') 4717 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 4718 b'd\xf9vtipu.. ') 4719 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 4720 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 4721 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 4722 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 4723 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 4724 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 4725 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 4726 '\u3044\u307e\u3059\u3002') 4727 h = Header(g_head, g) 4728 h.append(cz_head, cz) 4729 h.append(utf8_head, utf8) 4730 enc = h.encode(maxlinelen=76) 4731 eq(enc, """\ 4732=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?= 4733 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?= 4734 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?= 4735 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= 4736 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 4737 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= 4738 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= 4739 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= 4740 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?= 4741 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?= 4742 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""") 4743 decoded = decode_header(enc) 4744 eq(len(decoded), 3) 4745 eq(decoded[0], (g_head, 'iso-8859-1')) 4746 eq(decoded[1], (cz_head, 'iso-8859-2')) 4747 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8')) 4748 ustr = str(h) 4749 eq(ustr, 4750 (b'Die Mieter treten hier ein werden mit einem Foerderband ' 4751 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' 4752 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' 4753 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' 4754 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' 4755 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' 4756 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' 4757 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' 4758 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' 4759 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' 4760 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' 4761 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' 4762 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' 4763 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' 4764 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' 4765 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82' 4766 ).decode('utf-8')) 4767 # Test make_header() 4768 newh = make_header(decode_header(enc)) 4769 eq(newh, h) 4770 4771 def test_empty_header_encode(self): 4772 h = Header() 4773 self.assertEqual(h.encode(), '') 4774 4775 def test_header_ctor_default_args(self): 4776 eq = self.ndiffAssertEqual 4777 h = Header() 4778 eq(h, '') 4779 h.append('foo', Charset('iso-8859-1')) 4780 eq(h, 'foo') 4781 4782 def test_explicit_maxlinelen(self): 4783 eq = self.ndiffAssertEqual 4784 hstr = ('A very long line that must get split to something other ' 4785 'than at the 76th character boundary to test the non-default ' 4786 'behavior') 4787 h = Header(hstr) 4788 eq(h.encode(), '''\ 4789A very long line that must get split to something other than at the 76th 4790 character boundary to test the non-default behavior''') 4791 eq(str(h), hstr) 4792 h = Header(hstr, header_name='Subject') 4793 eq(h.encode(), '''\ 4794A very long line that must get split to something other than at the 4795 76th character boundary to test the non-default behavior''') 4796 eq(str(h), hstr) 4797 h = Header(hstr, maxlinelen=1024, header_name='Subject') 4798 eq(h.encode(), hstr) 4799 eq(str(h), hstr) 4800 4801 def test_quopri_splittable(self): 4802 eq = self.ndiffAssertEqual 4803 h = Header(charset='iso-8859-1', maxlinelen=20) 4804 x = 'xxxx ' * 20 4805 h.append(x) 4806 s = h.encode() 4807 eq(s, """\ 4808=?iso-8859-1?q?xxx?= 4809 =?iso-8859-1?q?x_?= 4810 =?iso-8859-1?q?xx?= 4811 =?iso-8859-1?q?xx?= 4812 =?iso-8859-1?q?_x?= 4813 =?iso-8859-1?q?xx?= 4814 =?iso-8859-1?q?x_?= 4815 =?iso-8859-1?q?xx?= 4816 =?iso-8859-1?q?xx?= 4817 =?iso-8859-1?q?_x?= 4818 =?iso-8859-1?q?xx?= 4819 =?iso-8859-1?q?x_?= 4820 =?iso-8859-1?q?xx?= 4821 =?iso-8859-1?q?xx?= 4822 =?iso-8859-1?q?_x?= 4823 =?iso-8859-1?q?xx?= 4824 =?iso-8859-1?q?x_?= 4825 =?iso-8859-1?q?xx?= 4826 =?iso-8859-1?q?xx?= 4827 =?iso-8859-1?q?_x?= 4828 =?iso-8859-1?q?xx?= 4829 =?iso-8859-1?q?x_?= 4830 =?iso-8859-1?q?xx?= 4831 =?iso-8859-1?q?xx?= 4832 =?iso-8859-1?q?_x?= 4833 =?iso-8859-1?q?xx?= 4834 =?iso-8859-1?q?x_?= 4835 =?iso-8859-1?q?xx?= 4836 =?iso-8859-1?q?xx?= 4837 =?iso-8859-1?q?_x?= 4838 =?iso-8859-1?q?xx?= 4839 =?iso-8859-1?q?x_?= 4840 =?iso-8859-1?q?xx?= 4841 =?iso-8859-1?q?xx?= 4842 =?iso-8859-1?q?_x?= 4843 =?iso-8859-1?q?xx?= 4844 =?iso-8859-1?q?x_?= 4845 =?iso-8859-1?q?xx?= 4846 =?iso-8859-1?q?xx?= 4847 =?iso-8859-1?q?_x?= 4848 =?iso-8859-1?q?xx?= 4849 =?iso-8859-1?q?x_?= 4850 =?iso-8859-1?q?xx?= 4851 =?iso-8859-1?q?xx?= 4852 =?iso-8859-1?q?_x?= 4853 =?iso-8859-1?q?xx?= 4854 =?iso-8859-1?q?x_?= 4855 =?iso-8859-1?q?xx?= 4856 =?iso-8859-1?q?xx?= 4857 =?iso-8859-1?q?_?=""") 4858 eq(x, str(make_header(decode_header(s)))) 4859 h = Header(charset='iso-8859-1', maxlinelen=40) 4860 h.append('xxxx ' * 20) 4861 s = h.encode() 4862 eq(s, """\ 4863=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?= 4864 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?= 4865 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= 4866 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= 4867 =?iso-8859-1?q?_xxxx_xxxx_?=""") 4868 eq(x, str(make_header(decode_header(s)))) 4869 4870 def test_base64_splittable(self): 4871 eq = self.ndiffAssertEqual 4872 h = Header(charset='koi8-r', maxlinelen=20) 4873 x = 'xxxx ' * 20 4874 h.append(x) 4875 s = h.encode() 4876 eq(s, """\ 4877=?koi8-r?b?eHh4?= 4878 =?koi8-r?b?eCB4?= 4879 =?koi8-r?b?eHh4?= 4880 =?koi8-r?b?IHh4?= 4881 =?koi8-r?b?eHgg?= 4882 =?koi8-r?b?eHh4?= 4883 =?koi8-r?b?eCB4?= 4884 =?koi8-r?b?eHh4?= 4885 =?koi8-r?b?IHh4?= 4886 =?koi8-r?b?eHgg?= 4887 =?koi8-r?b?eHh4?= 4888 =?koi8-r?b?eCB4?= 4889 =?koi8-r?b?eHh4?= 4890 =?koi8-r?b?IHh4?= 4891 =?koi8-r?b?eHgg?= 4892 =?koi8-r?b?eHh4?= 4893 =?koi8-r?b?eCB4?= 4894 =?koi8-r?b?eHh4?= 4895 =?koi8-r?b?IHh4?= 4896 =?koi8-r?b?eHgg?= 4897 =?koi8-r?b?eHh4?= 4898 =?koi8-r?b?eCB4?= 4899 =?koi8-r?b?eHh4?= 4900 =?koi8-r?b?IHh4?= 4901 =?koi8-r?b?eHgg?= 4902 =?koi8-r?b?eHh4?= 4903 =?koi8-r?b?eCB4?= 4904 =?koi8-r?b?eHh4?= 4905 =?koi8-r?b?IHh4?= 4906 =?koi8-r?b?eHgg?= 4907 =?koi8-r?b?eHh4?= 4908 =?koi8-r?b?eCB4?= 4909 =?koi8-r?b?eHh4?= 4910 =?koi8-r?b?IA==?=""") 4911 eq(x, str(make_header(decode_header(s)))) 4912 h = Header(charset='koi8-r', maxlinelen=40) 4913 h.append(x) 4914 s = h.encode() 4915 eq(s, """\ 4916=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?= 4917 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?= 4918 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?= 4919 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?= 4920 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?= 4921 =?koi8-r?b?eHh4eCB4eHh4IA==?=""") 4922 eq(x, str(make_header(decode_header(s)))) 4923 4924 def test_us_ascii_header(self): 4925 eq = self.assertEqual 4926 s = 'hello' 4927 x = decode_header(s) 4928 eq(x, [('hello', None)]) 4929 h = make_header(x) 4930 eq(s, h.encode()) 4931 4932 def test_string_charset(self): 4933 eq = self.assertEqual 4934 h = Header() 4935 h.append('hello', 'iso-8859-1') 4936 eq(h, 'hello') 4937 4938## def test_unicode_error(self): 4939## raises = self.assertRaises 4940## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') 4941## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') 4942## h = Header() 4943## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') 4944## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') 4945## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') 4946 4947 def test_utf8_shortest(self): 4948 eq = self.assertEqual 4949 h = Header('p\xf6stal', 'utf-8') 4950 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') 4951 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8') 4952 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') 4953 4954 def test_bad_8bit_header(self): 4955 raises = self.assertRaises 4956 eq = self.assertEqual 4957 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4958 raises(UnicodeError, Header, x) 4959 h = Header() 4960 raises(UnicodeError, h.append, x) 4961 e = x.decode('utf-8', 'replace') 4962 eq(str(Header(x, errors='replace')), e) 4963 h.append(x, errors='replace') 4964 eq(str(h), e) 4965 4966 def test_escaped_8bit_header(self): 4967 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4968 e = x.decode('ascii', 'surrogateescape') 4969 h = Header(e, charset=email.charset.UNKNOWN8BIT) 4970 self.assertEqual(str(h), 4971 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4972 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4973 4974 def test_header_handles_binary_unknown8bit(self): 4975 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4976 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4977 self.assertEqual(str(h), 4978 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4979 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4980 4981 def test_make_header_handles_binary_unknown8bit(self): 4982 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4983 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4984 h2 = email.header.make_header(email.header.decode_header(h)) 4985 self.assertEqual(str(h2), 4986 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4987 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) 4988 4989 def test_modify_returned_list_does_not_change_header(self): 4990 h = Header('test') 4991 chunks = email.header.decode_header(h) 4992 chunks.append(('ascii', 'test2')) 4993 self.assertEqual(str(h), 'test') 4994 4995 def test_encoded_adjacent_nonencoded(self): 4996 eq = self.assertEqual 4997 h = Header() 4998 h.append('hello', 'iso-8859-1') 4999 h.append('world') 5000 s = h.encode() 5001 eq(s, '=?iso-8859-1?q?hello?= world') 5002 h = make_header(decode_header(s)) 5003 eq(h.encode(), s) 5004 5005 def test_whitespace_keeper(self): 5006 eq = self.assertEqual 5007 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' 5008 parts = decode_header(s) 5009 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)]) 5010 hdr = make_header(parts) 5011 eq(hdr.encode(), 5012 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 5013 5014 def test_broken_base64_header(self): 5015 raises = self.assertRaises 5016 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' 5017 raises(errors.HeaderParseError, decode_header, s) 5018 5019 def test_shift_jis_charset(self): 5020 h = Header('文', charset='shift_jis') 5021 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=') 5022 5023 def test_flatten_header_with_no_value(self): 5024 # Issue 11401 (regression from email 4.x) Note that the space after 5025 # the header doesn't reflect the input, but this is also the way 5026 # email 4.x behaved. At some point it would be nice to fix that. 5027 msg = email.message_from_string("EmptyHeader:") 5028 self.assertEqual(str(msg), "EmptyHeader: \n\n") 5029 5030 def test_encode_preserves_leading_ws_on_value(self): 5031 msg = Message() 5032 msg['SomeHeader'] = ' value with leading ws' 5033 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n") 5034 5035 def test_whitespace_header(self): 5036 self.assertEqual(Header(' ').encode(), ' ') 5037 5038 5039 5040# Test RFC 2231 header parameters (en/de)coding 5041class TestRFC2231(TestEmailBase): 5042 5043 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5044 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5045 def test_get_param(self): 5046 eq = self.assertEqual 5047 msg = self._msgobj('msg_29.txt') 5048 eq(msg.get_param('title'), 5049 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5050 eq(msg.get_param('title', unquote=False), 5051 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) 5052 5053 def test_set_param(self): 5054 eq = self.ndiffAssertEqual 5055 msg = Message() 5056 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5057 charset='us-ascii') 5058 eq(msg.get_param('title'), 5059 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) 5060 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5061 charset='us-ascii', language='en') 5062 eq(msg.get_param('title'), 5063 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5064 msg = self._msgobj('msg_01.txt') 5065 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5066 charset='us-ascii', language='en') 5067 eq(msg.as_string(maxheaderlen=78), """\ 5068Return-Path: <bbb@zzz.org> 5069Delivered-To: bbb@zzz.org 5070Received: by mail.zzz.org (Postfix, from userid 889) 5071\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5072MIME-Version: 1.0 5073Content-Transfer-Encoding: 7bit 5074Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5075From: bbb@ddd.com (John X. Doe) 5076To: bbb@zzz.org 5077Subject: This is a test message 5078Date: Fri, 4 May 2001 14:05:44 -0400 5079Content-Type: text/plain; charset=us-ascii; 5080 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5081 5082 5083Hi, 5084 5085Do you like this message? 5086 5087-Me 5088""") 5089 5090 def test_set_param_requote(self): 5091 msg = Message() 5092 msg.set_param('title', 'foo') 5093 self.assertEqual(msg['content-type'], 'text/plain; title="foo"') 5094 msg.set_param('title', 'bar', requote=False) 5095 self.assertEqual(msg['content-type'], 'text/plain; title=bar') 5096 # tspecial is still quoted. 5097 msg.set_param('title', "(bar)bell", requote=False) 5098 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"') 5099 5100 def test_del_param(self): 5101 eq = self.ndiffAssertEqual 5102 msg = self._msgobj('msg_01.txt') 5103 msg.set_param('foo', 'bar', charset='us-ascii', language='en') 5104 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5105 charset='us-ascii', language='en') 5106 msg.del_param('foo', header='Content-Type') 5107 eq(msg.as_string(maxheaderlen=78), """\ 5108Return-Path: <bbb@zzz.org> 5109Delivered-To: bbb@zzz.org 5110Received: by mail.zzz.org (Postfix, from userid 889) 5111\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5112MIME-Version: 1.0 5113Content-Transfer-Encoding: 7bit 5114Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5115From: bbb@ddd.com (John X. Doe) 5116To: bbb@zzz.org 5117Subject: This is a test message 5118Date: Fri, 4 May 2001 14:05:44 -0400 5119Content-Type: text/plain; charset="us-ascii"; 5120 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5121 5122 5123Hi, 5124 5125Do you like this message? 5126 5127-Me 5128""") 5129 5130 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset 5131 # I changed the charset name, though, because the one in the file isn't 5132 # a legal charset name. Should add a test for an illegal charset. 5133 def test_rfc2231_get_content_charset(self): 5134 eq = self.assertEqual 5135 msg = self._msgobj('msg_32.txt') 5136 eq(msg.get_content_charset(), 'us-ascii') 5137 5138 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes 5139 def test_rfc2231_parse_rfc_quoting(self): 5140 m = textwrap.dedent('''\ 5141 Content-Disposition: inline; 5142 \tfilename*0*=''This%20is%20even%20more%20; 5143 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; 5144 \tfilename*2="is it not.pdf" 5145 5146 ''') 5147 msg = email.message_from_string(m) 5148 self.assertEqual(msg.get_filename(), 5149 'This is even more ***fun*** is it not.pdf') 5150 self.assertEqual(m, msg.as_string()) 5151 5152 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5153 def test_rfc2231_parse_extra_quoting(self): 5154 m = textwrap.dedent('''\ 5155 Content-Disposition: inline; 5156 \tfilename*0*="''This%20is%20even%20more%20"; 5157 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5158 \tfilename*2="is it not.pdf" 5159 5160 ''') 5161 msg = email.message_from_string(m) 5162 self.assertEqual(msg.get_filename(), 5163 'This is even more ***fun*** is it not.pdf') 5164 self.assertEqual(m, msg.as_string()) 5165 5166 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset 5167 # but new test uses *0* because otherwise lang/charset is not valid. 5168 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values 5169 def test_rfc2231_no_language_or_charset(self): 5170 m = '''\ 5171Content-Transfer-Encoding: 8bit 5172Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" 5173Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm 5174 5175''' 5176 msg = email.message_from_string(m) 5177 param = msg.get_param('NAME') 5178 self.assertNotIsInstance(param, tuple) 5179 self.assertEqual( 5180 param, 5181 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') 5182 5183 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset 5184 def test_rfc2231_no_language_or_charset_in_filename(self): 5185 m = '''\ 5186Content-Disposition: inline; 5187\tfilename*0*="''This%20is%20even%20more%20"; 5188\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5189\tfilename*2="is it not.pdf" 5190 5191''' 5192 msg = email.message_from_string(m) 5193 self.assertEqual(msg.get_filename(), 5194 'This is even more ***fun*** is it not.pdf') 5195 5196 # Duplicate of previous test? 5197 def test_rfc2231_no_language_or_charset_in_filename_encoded(self): 5198 m = '''\ 5199Content-Disposition: inline; 5200\tfilename*0*="''This%20is%20even%20more%20"; 5201\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5202\tfilename*2="is it not.pdf" 5203 5204''' 5205 msg = email.message_from_string(m) 5206 self.assertEqual(msg.get_filename(), 5207 'This is even more ***fun*** is it not.pdf') 5208 5209 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded, 5210 # but the test below is wrong (the first part should be decoded). 5211 def test_rfc2231_partly_encoded(self): 5212 m = '''\ 5213Content-Disposition: inline; 5214\tfilename*0="''This%20is%20even%20more%20"; 5215\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5216\tfilename*2="is it not.pdf" 5217 5218''' 5219 msg = email.message_from_string(m) 5220 self.assertEqual( 5221 msg.get_filename(), 5222 'This%20is%20even%20more%20***fun*** is it not.pdf') 5223 5224 def test_rfc2231_partly_nonencoded(self): 5225 m = '''\ 5226Content-Disposition: inline; 5227\tfilename*0="This%20is%20even%20more%20"; 5228\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; 5229\tfilename*2="is it not.pdf" 5230 5231''' 5232 msg = email.message_from_string(m) 5233 self.assertEqual( 5234 msg.get_filename(), 5235 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') 5236 5237 def test_rfc2231_no_language_or_charset_in_boundary(self): 5238 m = '''\ 5239Content-Type: multipart/alternative; 5240\tboundary*0*="''This%20is%20even%20more%20"; 5241\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5242\tboundary*2="is it not.pdf" 5243 5244''' 5245 msg = email.message_from_string(m) 5246 self.assertEqual(msg.get_boundary(), 5247 'This is even more ***fun*** is it not.pdf') 5248 5249 def test_rfc2231_no_language_or_charset_in_charset(self): 5250 # This is a nonsensical charset value, but tests the code anyway 5251 m = '''\ 5252Content-Type: text/plain; 5253\tcharset*0*="This%20is%20even%20more%20"; 5254\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5255\tcharset*2="is it not.pdf" 5256 5257''' 5258 msg = email.message_from_string(m) 5259 self.assertEqual(msg.get_content_charset(), 5260 'this is even more ***fun*** is it not.pdf') 5261 5262 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii 5263 def test_rfc2231_bad_encoding_in_filename(self): 5264 m = '''\ 5265Content-Disposition: inline; 5266\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; 5267\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5268\tfilename*2="is it not.pdf" 5269 5270''' 5271 msg = email.message_from_string(m) 5272 self.assertEqual(msg.get_filename(), 5273 'This is even more ***fun*** is it not.pdf') 5274 5275 def test_rfc2231_bad_encoding_in_charset(self): 5276 m = """\ 5277Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D 5278 5279""" 5280 msg = email.message_from_string(m) 5281 # This should return None because non-ascii characters in the charset 5282 # are not allowed. 5283 self.assertEqual(msg.get_content_charset(), None) 5284 5285 def test_rfc2231_bad_character_in_charset(self): 5286 m = """\ 5287Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D 5288 5289""" 5290 msg = email.message_from_string(m) 5291 # This should return None because non-ascii characters in the charset 5292 # are not allowed. 5293 self.assertEqual(msg.get_content_charset(), None) 5294 5295 def test_rfc2231_bad_character_in_filename(self): 5296 m = '''\ 5297Content-Disposition: inline; 5298\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; 5299\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5300\tfilename*2*="is it not.pdf%E2" 5301 5302''' 5303 msg = email.message_from_string(m) 5304 self.assertEqual(msg.get_filename(), 5305 'This is even more ***fun*** is it not.pdf\ufffd') 5306 5307 def test_rfc2231_unknown_encoding(self): 5308 m = """\ 5309Content-Transfer-Encoding: 8bit 5310Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt 5311 5312""" 5313 msg = email.message_from_string(m) 5314 self.assertEqual(msg.get_filename(), 'myfile.txt') 5315 5316 def test_rfc2231_single_tick_in_filename_extended(self): 5317 eq = self.assertEqual 5318 m = """\ 5319Content-Type: application/x-foo; 5320\tname*0*=\"Frank's\"; name*1*=\" Document\" 5321 5322""" 5323 msg = email.message_from_string(m) 5324 charset, language, s = msg.get_param('name') 5325 eq(charset, None) 5326 eq(language, None) 5327 eq(s, "Frank's Document") 5328 5329 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5330 def test_rfc2231_single_tick_in_filename(self): 5331 m = """\ 5332Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" 5333 5334""" 5335 msg = email.message_from_string(m) 5336 param = msg.get_param('name') 5337 self.assertNotIsInstance(param, tuple) 5338 self.assertEqual(param, "Frank's Document") 5339 5340 def test_rfc2231_missing_tick(self): 5341 m = '''\ 5342Content-Disposition: inline; 5343\tfilename*0*="'This%20is%20broken"; 5344''' 5345 msg = email.message_from_string(m) 5346 self.assertEqual( 5347 msg.get_filename(), 5348 "'This is broken") 5349 5350 def test_rfc2231_missing_tick_with_encoded_non_ascii(self): 5351 m = '''\ 5352Content-Disposition: inline; 5353\tfilename*0*="'This%20is%E2broken"; 5354''' 5355 msg = email.message_from_string(m) 5356 self.assertEqual( 5357 msg.get_filename(), 5358 "'This is\ufffdbroken") 5359 5360 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang 5361 def test_rfc2231_tick_attack_extended(self): 5362 eq = self.assertEqual 5363 m = """\ 5364Content-Type: application/x-foo; 5365\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" 5366 5367""" 5368 msg = email.message_from_string(m) 5369 charset, language, s = msg.get_param('name') 5370 eq(charset, 'us-ascii') 5371 eq(language, 'en-us') 5372 eq(s, "Frank's Document") 5373 5374 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value 5375 def test_rfc2231_tick_attack(self): 5376 m = """\ 5377Content-Type: application/x-foo; 5378\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" 5379 5380""" 5381 msg = email.message_from_string(m) 5382 param = msg.get_param('name') 5383 self.assertNotIsInstance(param, tuple) 5384 self.assertEqual(param, "us-ascii'en-us'Frank's Document") 5385 5386 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes 5387 def test_rfc2231_no_extended_values(self): 5388 eq = self.assertEqual 5389 m = """\ 5390Content-Type: application/x-foo; name=\"Frank's Document\" 5391 5392""" 5393 msg = email.message_from_string(m) 5394 eq(msg.get_param('name'), "Frank's Document") 5395 5396 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments 5397 def test_rfc2231_encoded_then_unencoded_segments(self): 5398 eq = self.assertEqual 5399 m = """\ 5400Content-Type: application/x-foo; 5401\tname*0*=\"us-ascii'en-us'My\"; 5402\tname*1=\" Document\"; 5403\tname*2*=\" For You\" 5404 5405""" 5406 msg = email.message_from_string(m) 5407 charset, language, s = msg.get_param('name') 5408 eq(charset, 'us-ascii') 5409 eq(language, 'en-us') 5410 eq(s, 'My Document For You') 5411 5412 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments 5413 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments 5414 def test_rfc2231_unencoded_then_encoded_segments(self): 5415 eq = self.assertEqual 5416 m = """\ 5417Content-Type: application/x-foo; 5418\tname*0=\"us-ascii'en-us'My\"; 5419\tname*1*=\" Document\"; 5420\tname*2*=\" For You\" 5421 5422""" 5423 msg = email.message_from_string(m) 5424 charset, language, s = msg.get_param('name') 5425 eq(charset, 'us-ascii') 5426 eq(language, 'en-us') 5427 eq(s, 'My Document For You') 5428 5429 def test_should_not_hang_on_invalid_ew_messages(self): 5430 messages = ["""From: user@host.com 5431To: user@host.com 5432Bad-Header: 5433 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?= 5434 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?= 5435 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?= 5436 5437Hello! 5438""", """From: ����� �������� <xxx@xxx> 5439To: "xxx" <xxx@xxx> 5440Subject: ��� ���������� ����� ����� � ��������� �� ���� 5441MIME-Version: 1.0 5442Content-Type: text/plain; charset="windows-1251"; 5443Content-Transfer-Encoding: 8bit 5444 5445�� ����� � ���� ������ ��� �������� 5446"""] 5447 for m in messages: 5448 with self.subTest(m=m): 5449 msg = email.message_from_string(m) 5450 5451 5452# Tests to ensure that signed parts of an email are completely preserved, as 5453# required by RFC1847 section 2.1. Note that these are incomplete, because the 5454# email package does not currently always preserve the body. See issue 1670765. 5455class TestSigned(TestEmailBase): 5456 5457 def _msg_and_obj(self, filename): 5458 with openfile(filename) as fp: 5459 original = fp.read() 5460 msg = email.message_from_string(original) 5461 return original, msg 5462 5463 def _signed_parts_eq(self, original, result): 5464 # Extract the first mime part of each message 5465 import re 5466 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) 5467 inpart = repart.search(original).group(2) 5468 outpart = repart.search(result).group(2) 5469 self.assertEqual(outpart, inpart) 5470 5471 def test_long_headers_as_string(self): 5472 original, msg = self._msg_and_obj('msg_45.txt') 5473 result = msg.as_string() 5474 self._signed_parts_eq(original, result) 5475 5476 def test_long_headers_as_string_maxheaderlen(self): 5477 original, msg = self._msg_and_obj('msg_45.txt') 5478 result = msg.as_string(maxheaderlen=60) 5479 self._signed_parts_eq(original, result) 5480 5481 def test_long_headers_flatten(self): 5482 original, msg = self._msg_and_obj('msg_45.txt') 5483 fp = StringIO() 5484 Generator(fp).flatten(msg) 5485 result = fp.getvalue() 5486 self._signed_parts_eq(original, result) 5487 5488 5489 5490if __name__ == '__main__': 5491 unittest.main() 5492