1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3# email package unit tests
4
5import re
6import time
7import base64
8import unittest
9import textwrap
10
11from io import StringIO, BytesIO
12from itertools import chain
13from random import choice
14from threading import Thread
15from unittest.mock import patch
16
17import email
18import email.policy
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator, BytesGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email.mime.nonmultipart import MIMENonMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
40from test.support import unlink, start_threads
41from test.test_email import openfile, TestEmailBase
42
43# These imports are documented to work, but we are testing them using a
44# different path, so we import them here just to make sure they are importable.
45from email.parser import FeedParser, BytesFeedParser
46
47NL = '\n'
48EMPTYSTRING = ''
49SPACE = ' '
50
51
52# Test various aspects of the Message class's API
53class TestMessageAPI(TestEmailBase):
54    def test_get_all(self):
55        eq = self.assertEqual
56        msg = self._msgobj('msg_20.txt')
57        eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
58        eq(msg.get_all('xx', 'n/a'), 'n/a')
59
60    def test_getset_charset(self):
61        eq = self.assertEqual
62        msg = Message()
63        eq(msg.get_charset(), None)
64        charset = Charset('iso-8859-1')
65        msg.set_charset(charset)
66        eq(msg['mime-version'], '1.0')
67        eq(msg.get_content_type(), 'text/plain')
68        eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
69        eq(msg.get_param('charset'), 'iso-8859-1')
70        eq(msg['content-transfer-encoding'], 'quoted-printable')
71        eq(msg.get_charset().input_charset, 'iso-8859-1')
72        # Remove the charset
73        msg.set_charset(None)
74        eq(msg.get_charset(), None)
75        eq(msg['content-type'], 'text/plain')
76        # Try adding a charset when there's already MIME headers present
77        msg = Message()
78        msg['MIME-Version'] = '2.0'
79        msg['Content-Type'] = 'text/x-weird'
80        msg['Content-Transfer-Encoding'] = 'quinted-puntable'
81        msg.set_charset(charset)
82        eq(msg['mime-version'], '2.0')
83        eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
84        eq(msg['content-transfer-encoding'], 'quinted-puntable')
85
86    def test_set_charset_from_string(self):
87        eq = self.assertEqual
88        msg = Message()
89        msg.set_charset('us-ascii')
90        eq(msg.get_charset().input_charset, 'us-ascii')
91        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
92
93    def test_set_payload_with_charset(self):
94        msg = Message()
95        charset = Charset('iso-8859-1')
96        msg.set_payload('This is a string payload', charset)
97        self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
98
99    def test_set_payload_with_8bit_data_and_charset(self):
100        data = b'\xd0\x90\xd0\x91\xd0\x92'
101        charset = Charset('utf-8')
102        msg = Message()
103        msg.set_payload(data, charset)
104        self.assertEqual(msg['content-transfer-encoding'], 'base64')
105        self.assertEqual(msg.get_payload(decode=True), data)
106        self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
107
108    def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
109        data = b'\xd0\x90\xd0\x91\xd0\x92'
110        charset = Charset('utf-8')
111        charset.body_encoding = None # Disable base64 encoding
112        msg = Message()
113        msg.set_payload(data.decode('utf-8'), charset)
114        self.assertEqual(msg['content-transfer-encoding'], '8bit')
115        self.assertEqual(msg.get_payload(decode=True), data)
116
117    def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
118        data = b'\xd0\x90\xd0\x91\xd0\x92'
119        charset = Charset('utf-8')
120        charset.body_encoding = None # Disable base64 encoding
121        msg = Message()
122        msg.set_payload(data, charset)
123        self.assertEqual(msg['content-transfer-encoding'], '8bit')
124        self.assertEqual(msg.get_payload(decode=True), data)
125
126    def test_set_payload_to_list(self):
127        msg = Message()
128        msg.set_payload([])
129        self.assertEqual(msg.get_payload(), [])
130
131    def test_attach_when_payload_is_string(self):
132        msg = Message()
133        msg['Content-Type'] = 'multipart/mixed'
134        msg.set_payload('string payload')
135        sub_msg = MIMEMessage(Message())
136        self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart",
137                               msg.attach, sub_msg)
138
139    def test_get_charsets(self):
140        eq = self.assertEqual
141
142        msg = self._msgobj('msg_08.txt')
143        charsets = msg.get_charsets()
144        eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
145
146        msg = self._msgobj('msg_09.txt')
147        charsets = msg.get_charsets('dingbat')
148        eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
149                      'koi8-r'])
150
151        msg = self._msgobj('msg_12.txt')
152        charsets = msg.get_charsets()
153        eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
154                      'iso-8859-3', 'us-ascii', 'koi8-r'])
155
156    def test_get_filename(self):
157        eq = self.assertEqual
158
159        msg = self._msgobj('msg_04.txt')
160        filenames = [p.get_filename() for p in msg.get_payload()]
161        eq(filenames, ['msg.txt', 'msg.txt'])
162
163        msg = self._msgobj('msg_07.txt')
164        subpart = msg.get_payload(1)
165        eq(subpart.get_filename(), 'dingusfish.gif')
166
167    def test_get_filename_with_name_parameter(self):
168        eq = self.assertEqual
169
170        msg = self._msgobj('msg_44.txt')
171        filenames = [p.get_filename() for p in msg.get_payload()]
172        eq(filenames, ['msg.txt', 'msg.txt'])
173
174    def test_get_boundary(self):
175        eq = self.assertEqual
176        msg = self._msgobj('msg_07.txt')
177        # No quotes!
178        eq(msg.get_boundary(), 'BOUNDARY')
179
180    def test_set_boundary(self):
181        eq = self.assertEqual
182        # This one has no existing boundary parameter, but the Content-Type:
183        # header appears fifth.
184        msg = self._msgobj('msg_01.txt')
185        msg.set_boundary('BOUNDARY')
186        header, value = msg.items()[4]
187        eq(header.lower(), 'content-type')
188        eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
189        # This one has a Content-Type: header, with a boundary, stuck in the
190        # middle of its headers.  Make sure the order is preserved; it should
191        # be fifth.
192        msg = self._msgobj('msg_04.txt')
193        msg.set_boundary('BOUNDARY')
194        header, value = msg.items()[4]
195        eq(header.lower(), 'content-type')
196        eq(value, 'multipart/mixed; boundary="BOUNDARY"')
197        # And this one has no Content-Type: header at all.
198        msg = self._msgobj('msg_03.txt')
199        self.assertRaises(errors.HeaderParseError,
200                          msg.set_boundary, 'BOUNDARY')
201
202    def test_make_boundary(self):
203        msg = MIMEMultipart('form-data')
204        # Note that when the boundary gets created is an implementation
205        # detail and might change.
206        self.assertEqual(msg.items()[0][1], 'multipart/form-data')
207        # Trigger creation of boundary
208        msg.as_string()
209        self.assertEqual(msg.items()[0][1][:33],
210                        'multipart/form-data; boundary="==')
211        # XXX: there ought to be tests of the uniqueness of the boundary, too.
212
213    def test_message_rfc822_only(self):
214        # Issue 7970: message/rfc822 not in multipart parsed by
215        # HeaderParser caused an exception when flattened.
216        with openfile('msg_46.txt') as fp:
217            msgdata = fp.read()
218        parser = HeaderParser()
219        msg = parser.parsestr(msgdata)
220        out = StringIO()
221        gen = Generator(out, True, 0)
222        gen.flatten(msg, False)
223        self.assertEqual(out.getvalue(), msgdata)
224
225    def test_byte_message_rfc822_only(self):
226        # Make sure new bytes header parser also passes this.
227        with openfile('msg_46.txt') as fp:
228            msgdata = fp.read().encode('ascii')
229        parser = email.parser.BytesHeaderParser()
230        msg = parser.parsebytes(msgdata)
231        out = BytesIO()
232        gen = email.generator.BytesGenerator(out)
233        gen.flatten(msg)
234        self.assertEqual(out.getvalue(), msgdata)
235
236    def test_get_decoded_payload(self):
237        eq = self.assertEqual
238        msg = self._msgobj('msg_10.txt')
239        # The outer message is a multipart
240        eq(msg.get_payload(decode=True), None)
241        # Subpart 1 is 7bit encoded
242        eq(msg.get_payload(0).get_payload(decode=True),
243           b'This is a 7bit encoded message.\n')
244        # Subpart 2 is quopri
245        eq(msg.get_payload(1).get_payload(decode=True),
246           b'\xa1This is a Quoted Printable encoded message!\n')
247        # Subpart 3 is base64
248        eq(msg.get_payload(2).get_payload(decode=True),
249           b'This is a Base64 encoded message.')
250        # Subpart 4 is base64 with a trailing newline, which
251        # used to be stripped (issue 7143).
252        eq(msg.get_payload(3).get_payload(decode=True),
253           b'This is a Base64 encoded message.\n')
254        # Subpart 5 has no Content-Transfer-Encoding: header.
255        eq(msg.get_payload(4).get_payload(decode=True),
256           b'This has no Content-Transfer-Encoding: header.\n')
257
258    def test_get_decoded_uu_payload(self):
259        eq = self.assertEqual
260        msg = Message()
261        msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
262        for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
263            msg['content-transfer-encoding'] = cte
264            eq(msg.get_payload(decode=True), b'hello world')
265        # Now try some bogus data
266        msg.set_payload('foo')
267        eq(msg.get_payload(decode=True), b'foo')
268
269    def test_get_payload_n_raises_on_non_multipart(self):
270        msg = Message()
271        self.assertRaises(TypeError, msg.get_payload, 1)
272
273    def test_decoded_generator(self):
274        eq = self.assertEqual
275        msg = self._msgobj('msg_07.txt')
276        with openfile('msg_17.txt') as fp:
277            text = fp.read()
278        s = StringIO()
279        g = DecodedGenerator(s)
280        g.flatten(msg)
281        eq(s.getvalue(), text)
282
283    def test__contains__(self):
284        msg = Message()
285        msg['From'] = 'Me'
286        msg['to'] = 'You'
287        # Check for case insensitivity
288        self.assertIn('from', msg)
289        self.assertIn('From', msg)
290        self.assertIn('FROM', msg)
291        self.assertIn('to', msg)
292        self.assertIn('To', msg)
293        self.assertIn('TO', msg)
294
295    def test_as_string(self):
296        msg = self._msgobj('msg_01.txt')
297        with openfile('msg_01.txt') as fp:
298            text = fp.read()
299        self.assertEqual(text, str(msg))
300        fullrepr = msg.as_string(unixfrom=True)
301        lines = fullrepr.split('\n')
302        self.assertTrue(lines[0].startswith('From '))
303        self.assertEqual(text, NL.join(lines[1:]))
304
305    def test_as_string_policy(self):
306        msg = self._msgobj('msg_01.txt')
307        newpolicy = msg.policy.clone(linesep='\r\n')
308        fullrepr = msg.as_string(policy=newpolicy)
309        s = StringIO()
310        g = Generator(s, policy=newpolicy)
311        g.flatten(msg)
312        self.assertEqual(fullrepr, s.getvalue())
313
314    def test_nonascii_as_string_without_cte(self):
315        m = textwrap.dedent("""\
316            MIME-Version: 1.0
317            Content-type: text/plain; charset="iso-8859-1"
318
319            Test if non-ascii messages with no Content-Transfer-Encoding set
320            can be as_string'd:
321            Föö bär
322            """)
323        source = m.encode('iso-8859-1')
324        expected = textwrap.dedent("""\
325            MIME-Version: 1.0
326            Content-type: text/plain; charset="iso-8859-1"
327            Content-Transfer-Encoding: quoted-printable
328
329            Test if non-ascii messages with no Content-Transfer-Encoding set
330            can be as_string'd:
331            F=F6=F6 b=E4r
332            """)
333        msg = email.message_from_bytes(source)
334        self.assertEqual(msg.as_string(), expected)
335
336    def test_nonascii_as_string_without_content_type_and_cte(self):
337        m = textwrap.dedent("""\
338            MIME-Version: 1.0
339
340            Test if non-ascii messages with no Content-Type nor
341            Content-Transfer-Encoding set can be as_string'd:
342            Föö bär
343            """)
344        source = m.encode('iso-8859-1')
345        expected = source.decode('ascii', 'replace')
346        msg = email.message_from_bytes(source)
347        self.assertEqual(msg.as_string(), expected)
348
349    def test_as_bytes(self):
350        msg = self._msgobj('msg_01.txt')
351        with openfile('msg_01.txt') as fp:
352            data = fp.read().encode('ascii')
353        self.assertEqual(data, bytes(msg))
354        fullrepr = msg.as_bytes(unixfrom=True)
355        lines = fullrepr.split(b'\n')
356        self.assertTrue(lines[0].startswith(b'From '))
357        self.assertEqual(data, b'\n'.join(lines[1:]))
358
359    def test_as_bytes_policy(self):
360        msg = self._msgobj('msg_01.txt')
361        newpolicy = msg.policy.clone(linesep='\r\n')
362        fullrepr = msg.as_bytes(policy=newpolicy)
363        s = BytesIO()
364        g = BytesGenerator(s,policy=newpolicy)
365        g.flatten(msg)
366        self.assertEqual(fullrepr, s.getvalue())
367
368    # test_headerregistry.TestContentTypeHeader.bad_params
369    def test_bad_param(self):
370        msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
371        self.assertEqual(msg.get_param('baz'), '')
372
373    def test_missing_filename(self):
374        msg = email.message_from_string("From: foo\n")
375        self.assertEqual(msg.get_filename(), None)
376
377    def test_bogus_filename(self):
378        msg = email.message_from_string(
379        "Content-Disposition: blarg; filename\n")
380        self.assertEqual(msg.get_filename(), '')
381
382    def test_missing_boundary(self):
383        msg = email.message_from_string("From: foo\n")
384        self.assertEqual(msg.get_boundary(), None)
385
386    def test_get_params(self):
387        eq = self.assertEqual
388        msg = email.message_from_string(
389            'X-Header: foo=one; bar=two; baz=three\n')
390        eq(msg.get_params(header='x-header'),
391           [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
392        msg = email.message_from_string(
393            'X-Header: foo; bar=one; baz=two\n')
394        eq(msg.get_params(header='x-header'),
395           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
396        eq(msg.get_params(), None)
397        msg = email.message_from_string(
398            'X-Header: foo; bar="one"; baz=two\n')
399        eq(msg.get_params(header='x-header'),
400           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
401
402    # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
403    def test_get_param_liberal(self):
404        msg = Message()
405        msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
406        self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
407
408    def test_get_param(self):
409        eq = self.assertEqual
410        msg = email.message_from_string(
411            "X-Header: foo=one; bar=two; baz=three\n")
412        eq(msg.get_param('bar', header='x-header'), 'two')
413        eq(msg.get_param('quuz', header='x-header'), None)
414        eq(msg.get_param('quuz'), None)
415        msg = email.message_from_string(
416            'X-Header: foo; bar="one"; baz=two\n')
417        eq(msg.get_param('foo', header='x-header'), '')
418        eq(msg.get_param('bar', header='x-header'), 'one')
419        eq(msg.get_param('baz', header='x-header'), 'two')
420        # XXX: We are not RFC-2045 compliant!  We cannot parse:
421        # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
422        # msg.get_param("weird")
423        # yet.
424
425    # test_headerregistry.TestContentTypeHeader.spaces_around_semis
426    def test_get_param_funky_continuation_lines(self):
427        msg = self._msgobj('msg_22.txt')
428        self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
429
430    # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
431    def test_get_param_with_semis_in_quotes(self):
432        msg = email.message_from_string(
433            'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
434        self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
435        self.assertEqual(msg.get_param('name', unquote=False),
436                         '"Jim&amp;&amp;Jill"')
437
438    # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
439    def test_get_param_with_quotes(self):
440        msg = email.message_from_string(
441            'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
442        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
443        msg = email.message_from_string(
444            "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
445        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
446
447    def test_field_containment(self):
448        msg = email.message_from_string('Header: exists')
449        self.assertIn('header', msg)
450        self.assertIn('Header', msg)
451        self.assertIn('HEADER', msg)
452        self.assertNotIn('headerx', msg)
453
454    def test_set_param(self):
455        eq = self.assertEqual
456        msg = Message()
457        msg.set_param('charset', 'iso-2022-jp')
458        eq(msg.get_param('charset'), 'iso-2022-jp')
459        msg.set_param('importance', 'high value')
460        eq(msg.get_param('importance'), 'high value')
461        eq(msg.get_param('importance', unquote=False), '"high value"')
462        eq(msg.get_params(), [('text/plain', ''),
463                              ('charset', 'iso-2022-jp'),
464                              ('importance', 'high value')])
465        eq(msg.get_params(unquote=False), [('text/plain', ''),
466                                       ('charset', '"iso-2022-jp"'),
467                                       ('importance', '"high value"')])
468        msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
469        eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
470
471    def test_del_param(self):
472        eq = self.assertEqual
473        msg = self._msgobj('msg_05.txt')
474        eq(msg.get_params(),
475           [('multipart/report', ''), ('report-type', 'delivery-status'),
476            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
477        old_val = msg.get_param("report-type")
478        msg.del_param("report-type")
479        eq(msg.get_params(),
480           [('multipart/report', ''),
481            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
482        msg.set_param("report-type", old_val)
483        eq(msg.get_params(),
484           [('multipart/report', ''),
485            ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
486            ('report-type', old_val)])
487
488    def test_del_param_on_other_header(self):
489        msg = Message()
490        msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
491        msg.del_param('filename', 'content-disposition')
492        self.assertEqual(msg['content-disposition'], 'attachment')
493
494    def test_del_param_on_nonexistent_header(self):
495        msg = Message()
496        # Deleting param on empty msg should not raise exception.
497        msg.del_param('filename', 'content-disposition')
498
499    def test_del_nonexistent_param(self):
500        msg = Message()
501        msg.add_header('Content-Type', 'text/plain', charset='utf-8')
502        existing_header = msg['Content-Type']
503        msg.del_param('foobar', header='Content-Type')
504        self.assertEqual(msg['Content-Type'], existing_header)
505
506    def test_set_type(self):
507        eq = self.assertEqual
508        msg = Message()
509        self.assertRaises(ValueError, msg.set_type, 'text')
510        msg.set_type('text/plain')
511        eq(msg['content-type'], 'text/plain')
512        msg.set_param('charset', 'us-ascii')
513        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
514        msg.set_type('text/html')
515        eq(msg['content-type'], 'text/html; charset="us-ascii"')
516
517    def test_set_type_on_other_header(self):
518        msg = Message()
519        msg['X-Content-Type'] = 'text/plain'
520        msg.set_type('application/octet-stream', 'X-Content-Type')
521        self.assertEqual(msg['x-content-type'], 'application/octet-stream')
522
523    def test_get_content_type_missing(self):
524        msg = Message()
525        self.assertEqual(msg.get_content_type(), 'text/plain')
526
527    def test_get_content_type_missing_with_default_type(self):
528        msg = Message()
529        msg.set_default_type('message/rfc822')
530        self.assertEqual(msg.get_content_type(), 'message/rfc822')
531
532    def test_get_content_type_from_message_implicit(self):
533        msg = self._msgobj('msg_30.txt')
534        self.assertEqual(msg.get_payload(0).get_content_type(),
535                         'message/rfc822')
536
537    def test_get_content_type_from_message_explicit(self):
538        msg = self._msgobj('msg_28.txt')
539        self.assertEqual(msg.get_payload(0).get_content_type(),
540                         'message/rfc822')
541
542    def test_get_content_type_from_message_text_plain_implicit(self):
543        msg = self._msgobj('msg_03.txt')
544        self.assertEqual(msg.get_content_type(), 'text/plain')
545
546    def test_get_content_type_from_message_text_plain_explicit(self):
547        msg = self._msgobj('msg_01.txt')
548        self.assertEqual(msg.get_content_type(), 'text/plain')
549
550    def test_get_content_maintype_missing(self):
551        msg = Message()
552        self.assertEqual(msg.get_content_maintype(), 'text')
553
554    def test_get_content_maintype_missing_with_default_type(self):
555        msg = Message()
556        msg.set_default_type('message/rfc822')
557        self.assertEqual(msg.get_content_maintype(), 'message')
558
559    def test_get_content_maintype_from_message_implicit(self):
560        msg = self._msgobj('msg_30.txt')
561        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
562
563    def test_get_content_maintype_from_message_explicit(self):
564        msg = self._msgobj('msg_28.txt')
565        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
566
567    def test_get_content_maintype_from_message_text_plain_implicit(self):
568        msg = self._msgobj('msg_03.txt')
569        self.assertEqual(msg.get_content_maintype(), 'text')
570
571    def test_get_content_maintype_from_message_text_plain_explicit(self):
572        msg = self._msgobj('msg_01.txt')
573        self.assertEqual(msg.get_content_maintype(), 'text')
574
575    def test_get_content_subtype_missing(self):
576        msg = Message()
577        self.assertEqual(msg.get_content_subtype(), 'plain')
578
579    def test_get_content_subtype_missing_with_default_type(self):
580        msg = Message()
581        msg.set_default_type('message/rfc822')
582        self.assertEqual(msg.get_content_subtype(), 'rfc822')
583
584    def test_get_content_subtype_from_message_implicit(self):
585        msg = self._msgobj('msg_30.txt')
586        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
587
588    def test_get_content_subtype_from_message_explicit(self):
589        msg = self._msgobj('msg_28.txt')
590        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
591
592    def test_get_content_subtype_from_message_text_plain_implicit(self):
593        msg = self._msgobj('msg_03.txt')
594        self.assertEqual(msg.get_content_subtype(), 'plain')
595
596    def test_get_content_subtype_from_message_text_plain_explicit(self):
597        msg = self._msgobj('msg_01.txt')
598        self.assertEqual(msg.get_content_subtype(), 'plain')
599
600    def test_get_content_maintype_error(self):
601        msg = Message()
602        msg['Content-Type'] = 'no-slash-in-this-string'
603        self.assertEqual(msg.get_content_maintype(), 'text')
604
605    def test_get_content_subtype_error(self):
606        msg = Message()
607        msg['Content-Type'] = 'no-slash-in-this-string'
608        self.assertEqual(msg.get_content_subtype(), 'plain')
609
610    def test_replace_header(self):
611        eq = self.assertEqual
612        msg = Message()
613        msg.add_header('First', 'One')
614        msg.add_header('Second', 'Two')
615        msg.add_header('Third', 'Three')
616        eq(msg.keys(), ['First', 'Second', 'Third'])
617        eq(msg.values(), ['One', 'Two', 'Three'])
618        msg.replace_header('Second', 'Twenty')
619        eq(msg.keys(), ['First', 'Second', 'Third'])
620        eq(msg.values(), ['One', 'Twenty', 'Three'])
621        msg.add_header('First', 'Eleven')
622        msg.replace_header('First', 'One Hundred')
623        eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
624        eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
625        self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
626
627    def test_get_content_disposition(self):
628        msg = Message()
629        self.assertIsNone(msg.get_content_disposition())
630        msg.add_header('Content-Disposition', 'attachment',
631                       filename='random.avi')
632        self.assertEqual(msg.get_content_disposition(), 'attachment')
633        msg.replace_header('Content-Disposition', 'inline')
634        self.assertEqual(msg.get_content_disposition(), 'inline')
635        msg.replace_header('Content-Disposition', 'InlinE')
636        self.assertEqual(msg.get_content_disposition(), 'inline')
637
638    # test_defect_handling:test_invalid_chars_in_base64_payload
639    def test_broken_base64_payload(self):
640        x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
641        msg = Message()
642        msg['content-type'] = 'audio/x-midi'
643        msg['content-transfer-encoding'] = 'base64'
644        msg.set_payload(x)
645        self.assertEqual(msg.get_payload(decode=True),
646                         (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
647                          b'\xa1\x00p\xf6\xbf\xe9\x0f'))
648        self.assertIsInstance(msg.defects[0],
649                              errors.InvalidBase64CharactersDefect)
650
651    def test_broken_unicode_payload(self):
652        # This test improves coverage but is not a compliance test.
653        # The behavior in this situation is currently undefined by the API.
654        x = 'this is a br\xf6ken thing to do'
655        msg = Message()
656        msg['content-type'] = 'text/plain'
657        msg['content-transfer-encoding'] = '8bit'
658        msg.set_payload(x)
659        self.assertEqual(msg.get_payload(decode=True),
660                         bytes(x, 'raw-unicode-escape'))
661
662    def test_questionable_bytes_payload(self):
663        # This test improves coverage but is not a compliance test,
664        # since it involves poking inside the black box.
665        x = 'this is a quéstionable thing to do'.encode('utf-8')
666        msg = Message()
667        msg['content-type'] = 'text/plain; charset="utf-8"'
668        msg['content-transfer-encoding'] = '8bit'
669        msg._payload = x
670        self.assertEqual(msg.get_payload(decode=True), x)
671
672    # Issue 1078919
673    def test_ascii_add_header(self):
674        msg = Message()
675        msg.add_header('Content-Disposition', 'attachment',
676                       filename='bud.gif')
677        self.assertEqual('attachment; filename="bud.gif"',
678            msg['Content-Disposition'])
679
680    def test_noascii_add_header(self):
681        msg = Message()
682        msg.add_header('Content-Disposition', 'attachment',
683            filename="Fußballer.ppt")
684        self.assertEqual(
685            'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
686            msg['Content-Disposition'])
687
688    def test_nonascii_add_header_via_triple(self):
689        msg = Message()
690        msg.add_header('Content-Disposition', 'attachment',
691            filename=('iso-8859-1', '', 'Fußballer.ppt'))
692        self.assertEqual(
693            'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
694            msg['Content-Disposition'])
695
696    def test_ascii_add_header_with_tspecial(self):
697        msg = Message()
698        msg.add_header('Content-Disposition', 'attachment',
699            filename="windows [filename].ppt")
700        self.assertEqual(
701            'attachment; filename="windows [filename].ppt"',
702            msg['Content-Disposition'])
703
704    def test_nonascii_add_header_with_tspecial(self):
705        msg = Message()
706        msg.add_header('Content-Disposition', 'attachment',
707            filename="Fußballer [filename].ppt")
708        self.assertEqual(
709            "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
710            msg['Content-Disposition'])
711
712    def test_binary_quopri_payload(self):
713        for charset in ('latin-1', 'ascii'):
714            msg = Message()
715            msg['content-type'] = 'text/plain; charset=%s' % charset
716            msg['content-transfer-encoding'] = 'quoted-printable'
717            msg.set_payload(b'foo=e6=96=87bar')
718            self.assertEqual(
719                msg.get_payload(decode=True),
720                b'foo\xe6\x96\x87bar',
721                'get_payload returns wrong result with charset %s.' % charset)
722
723    def test_binary_base64_payload(self):
724        for charset in ('latin-1', 'ascii'):
725            msg = Message()
726            msg['content-type'] = 'text/plain; charset=%s' % charset
727            msg['content-transfer-encoding'] = 'base64'
728            msg.set_payload(b'Zm9v5paHYmFy')
729            self.assertEqual(
730                msg.get_payload(decode=True),
731                b'foo\xe6\x96\x87bar',
732                'get_payload returns wrong result with charset %s.' % charset)
733
734    def test_binary_uuencode_payload(self):
735        for charset in ('latin-1', 'ascii'):
736            for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
737                msg = Message()
738                msg['content-type'] = 'text/plain; charset=%s' % charset
739                msg['content-transfer-encoding'] = encoding
740                msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
741                self.assertEqual(
742                    msg.get_payload(decode=True),
743                    b'foo\xe6\x96\x87bar',
744                    str(('get_payload returns wrong result ',
745                         'with charset {0} and encoding {1}.')).\
746                        format(charset, encoding))
747
748    def test_add_header_with_name_only_param(self):
749        msg = Message()
750        msg.add_header('Content-Disposition', 'inline', foo_bar=None)
751        self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
752
753    def test_add_header_with_no_value(self):
754        msg = Message()
755        msg.add_header('X-Status', None)
756        self.assertEqual('', msg['X-Status'])
757
758    # Issue 5871: reject an attempt to embed a header inside a header value
759    # (header injection attack).
760    def test_embedded_header_via_Header_rejected(self):
761        msg = Message()
762        msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
763        self.assertRaises(errors.HeaderParseError, msg.as_string)
764
765    def test_embedded_header_via_string_rejected(self):
766        msg = Message()
767        msg['Dummy'] = 'dummy\nX-Injected-Header: test'
768        self.assertRaises(errors.HeaderParseError, msg.as_string)
769
770    def test_unicode_header_defaults_to_utf8_encoding(self):
771        # Issue 14291
772        m = MIMEText('abc\n')
773        m['Subject'] = 'É test'
774        self.assertEqual(str(m),textwrap.dedent("""\
775            Content-Type: text/plain; charset="us-ascii"
776            MIME-Version: 1.0
777            Content-Transfer-Encoding: 7bit
778            Subject: =?utf-8?q?=C3=89_test?=
779
780            abc
781            """))
782
783    def test_unicode_body_defaults_to_utf8_encoding(self):
784        # Issue 14291
785        m = MIMEText('É testabc\n')
786        self.assertEqual(str(m),textwrap.dedent("""\
787            Content-Type: text/plain; charset="utf-8"
788            MIME-Version: 1.0
789            Content-Transfer-Encoding: base64
790
791            w4kgdGVzdGFiYwo=
792            """))
793
794
795# Test the email.encoders module
796class TestEncoders(unittest.TestCase):
797
798    def test_EncodersEncode_base64(self):
799        with openfile('PyBanner048.gif', 'rb') as fp:
800            bindata = fp.read()
801        mimed = email.mime.image.MIMEImage(bindata)
802        base64ed = mimed.get_payload()
803        # the transfer-encoded body lines should all be <=76 characters
804        lines = base64ed.split('\n')
805        self.assertLessEqual(max([ len(x) for x in lines ]), 76)
806
807    def test_encode_empty_payload(self):
808        eq = self.assertEqual
809        msg = Message()
810        msg.set_charset('us-ascii')
811        eq(msg['content-transfer-encoding'], '7bit')
812
813    def test_default_cte(self):
814        eq = self.assertEqual
815        # 7bit data and the default us-ascii _charset
816        msg = MIMEText('hello world')
817        eq(msg['content-transfer-encoding'], '7bit')
818        # Similar, but with 8bit data
819        msg = MIMEText('hello \xf8 world')
820        eq(msg['content-transfer-encoding'], 'base64')
821        # And now with a different charset
822        msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
823        eq(msg['content-transfer-encoding'], 'quoted-printable')
824
825    def test_encode7or8bit(self):
826        # Make sure a charset whose input character set is 8bit but
827        # whose output character set is 7bit gets a transfer-encoding
828        # of 7bit.
829        eq = self.assertEqual
830        msg = MIMEText('文\n', _charset='euc-jp')
831        eq(msg['content-transfer-encoding'], '7bit')
832        eq(msg.as_string(), textwrap.dedent("""\
833            MIME-Version: 1.0
834            Content-Type: text/plain; charset="iso-2022-jp"
835            Content-Transfer-Encoding: 7bit
836
837            \x1b$BJ8\x1b(B
838            """))
839
840    def test_qp_encode_latin1(self):
841        msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
842        self.assertEqual(str(msg), textwrap.dedent("""\
843            MIME-Version: 1.0
844            Content-Type: text/text; charset="iso-8859-1"
845            Content-Transfer-Encoding: quoted-printable
846
847            =E1=F6
848            """))
849
850    def test_qp_encode_non_latin1(self):
851        # Issue 16948
852        msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
853        self.assertEqual(str(msg), textwrap.dedent("""\
854            MIME-Version: 1.0
855            Content-Type: text/text; charset="iso-8859-2"
856            Content-Transfer-Encoding: quoted-printable
857
858            =BF
859            """))
860
861
862# Test long header wrapping
863class TestLongHeaders(TestEmailBase):
864
865    maxDiff = None
866
867    def test_split_long_continuation(self):
868        eq = self.ndiffAssertEqual
869        msg = email.message_from_string("""\
870Subject: bug demonstration
871\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
872\tmore text
873
874test
875""")
876        sfp = StringIO()
877        g = Generator(sfp)
878        g.flatten(msg)
879        eq(sfp.getvalue(), """\
880Subject: bug demonstration
881\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
882\tmore text
883
884test
885""")
886
887    def test_another_long_almost_unsplittable_header(self):
888        eq = self.ndiffAssertEqual
889        hstr = """\
890bug demonstration
891\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
892\tmore text"""
893        h = Header(hstr, continuation_ws='\t')
894        eq(h.encode(), """\
895bug demonstration
896\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
897\tmore text""")
898        h = Header(hstr.replace('\t', ' '))
899        eq(h.encode(), """\
900bug demonstration
901 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
902 more text""")
903
904    def test_long_nonstring(self):
905        eq = self.ndiffAssertEqual
906        g = Charset("iso-8859-1")
907        cz = Charset("iso-8859-2")
908        utf8 = Charset("utf-8")
909        g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
910                  b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
911                  b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
912                  b'bef\xf6rdert. ')
913        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
914                   b'd\xf9vtipu.. ')
915        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
916                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
917                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
918                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
919                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
920                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
921                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
922                     '\u3044\u307e\u3059\u3002')
923        h = Header(g_head, g, header_name='Subject')
924        h.append(cz_head, cz)
925        h.append(utf8_head, utf8)
926        msg = Message()
927        msg['Subject'] = h
928        sfp = StringIO()
929        g = Generator(sfp)
930        g.flatten(msg)
931        eq(sfp.getvalue(), """\
932Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
933 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
934 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
935 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
936 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
937 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
938 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
939 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
940 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
941 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
942 =?utf-8?b?44CC?=
943
944""")
945        eq(h.encode(maxlinelen=76), """\
946=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
947 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
948 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
949 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
950 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
951 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
952 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
953 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
954 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
955 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
956 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
957
958    def test_long_header_encode(self):
959        eq = self.ndiffAssertEqual
960        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
961                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
962                   header_name='X-Foobar-Spoink-Defrobnit')
963        eq(h.encode(), '''\
964wasnipoop; giraffes="very-long-necked-animals";
965 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
966
967    def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
968        eq = self.ndiffAssertEqual
969        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
970                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
971                   header_name='X-Foobar-Spoink-Defrobnit',
972                   continuation_ws='\t')
973        eq(h.encode(), '''\
974wasnipoop; giraffes="very-long-necked-animals";
975 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
976
977    def test_long_header_encode_with_tab_continuation(self):
978        eq = self.ndiffAssertEqual
979        h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
980                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
981                   header_name='X-Foobar-Spoink-Defrobnit',
982                   continuation_ws='\t')
983        eq(h.encode(), '''\
984wasnipoop; giraffes="very-long-necked-animals";
985\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
986
987    def test_header_encode_with_different_output_charset(self):
988        h = Header('文', 'euc-jp')
989        self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
990
991    def test_long_header_encode_with_different_output_charset(self):
992        h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
993            b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
994            b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
995            b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
996        res = """\
997=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
998 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
999        self.assertEqual(h.encode(), res)
1000
1001    def test_header_splitter(self):
1002        eq = self.ndiffAssertEqual
1003        msg = MIMEText('')
1004        # It'd be great if we could use add_header() here, but that doesn't
1005        # guarantee an order of the parameters.
1006        msg['X-Foobar-Spoink-Defrobnit'] = (
1007            'wasnipoop; giraffes="very-long-necked-animals"; '
1008            'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
1009        sfp = StringIO()
1010        g = Generator(sfp)
1011        g.flatten(msg)
1012        eq(sfp.getvalue(), '''\
1013Content-Type: text/plain; charset="us-ascii"
1014MIME-Version: 1.0
1015Content-Transfer-Encoding: 7bit
1016X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
1017 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
1018
1019''')
1020
1021    def test_no_semis_header_splitter(self):
1022        eq = self.ndiffAssertEqual
1023        msg = Message()
1024        msg['From'] = 'test@dom.ain'
1025        msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
1026        msg.set_payload('Test')
1027        sfp = StringIO()
1028        g = Generator(sfp)
1029        g.flatten(msg)
1030        eq(sfp.getvalue(), """\
1031From: test@dom.ain
1032References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
1033 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
1034
1035Test""")
1036
1037    def test_last_split_chunk_does_not_fit(self):
1038        eq = self.ndiffAssertEqual
1039        h = Header('Subject: the first part of this is short, but_the_second'
1040            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1041            '_all_by_itself')
1042        eq(h.encode(), """\
1043Subject: the first part of this is short,
1044 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1045
1046    def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
1047        eq = self.ndiffAssertEqual
1048        h = Header(', but_the_second'
1049            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1050            '_all_by_itself')
1051        eq(h.encode(), """\
1052,
1053 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1054
1055    def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
1056        eq = self.ndiffAssertEqual
1057        h = Header(', , but_the_second'
1058            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1059            '_all_by_itself')
1060        eq(h.encode(), """\
1061, ,
1062 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1063
1064    def test_trailing_splitable_on_overlong_unsplitable(self):
1065        eq = self.ndiffAssertEqual
1066        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1067            'be_on_a_line_all_by_itself;')
1068        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
1069            "be_on_a_line_all_by_itself;")
1070
1071    def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
1072        eq = self.ndiffAssertEqual
1073        h = Header('; '
1074            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1075            'be_on_a_line_all_by_itself; ')
1076        eq(h.encode(), """\
1077;
1078 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1079
1080    def test_long_header_with_multiple_sequential_split_chars(self):
1081        eq = self.ndiffAssertEqual
1082        h = Header('This is a long line that has two whitespaces  in a row.  '
1083            'This used to cause truncation of the header when folded')
1084        eq(h.encode(), """\
1085This is a long line that has two whitespaces  in a row.  This used to cause
1086 truncation of the header when folded""")
1087
1088    def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
1089        eq = self.ndiffAssertEqual
1090        h = Header('thisverylongheaderhas;semicolons;and,commas,but'
1091            'they;arenotlegal;fold,points')
1092        eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
1093                        "arenotlegal;fold,points")
1094
1095    def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
1096        eq = self.ndiffAssertEqual
1097        h = Header('this is a  test where we need to have more than one line '
1098            'before; our final line that is just too big to fit;; '
1099            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1100            'be_on_a_line_all_by_itself;')
1101        eq(h.encode(), """\
1102this is a  test where we need to have more than one line before;
1103 our final line that is just too big to fit;;
1104 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1105
1106    def test_overlong_last_part_followed_by_split_point(self):
1107        eq = self.ndiffAssertEqual
1108        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1109            'be_on_a_line_all_by_itself ')
1110        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1111                        "should_be_on_a_line_all_by_itself ")
1112
1113    def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1114        eq = self.ndiffAssertEqual
1115        h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1116            'before_our_final_line_; ; '
1117            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1118            'be_on_a_line_all_by_itself; ')
1119        eq(h.encode(), """\
1120this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1121 ;
1122 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1123
1124    def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1125        eq = self.ndiffAssertEqual
1126        h = Header('this is a test where we need to have more than one line '
1127            'before our final line; ; '
1128            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1129            'be_on_a_line_all_by_itself; ')
1130        eq(h.encode(), """\
1131this is a test where we need to have more than one line before our final line;
1132 ;
1133 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1134
1135    def test_long_header_with_whitespace_runs(self):
1136        eq = self.ndiffAssertEqual
1137        msg = Message()
1138        msg['From'] = 'test@dom.ain'
1139        msg['References'] = SPACE.join(['<foo@dom.ain>  '] * 10)
1140        msg.set_payload('Test')
1141        sfp = StringIO()
1142        g = Generator(sfp)
1143        g.flatten(msg)
1144        eq(sfp.getvalue(), """\
1145From: test@dom.ain
1146References: <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1147   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1148   <foo@dom.ain>   <foo@dom.ain>\x20\x20
1149
1150Test""")
1151
1152    def test_long_run_with_semi_header_splitter(self):
1153        eq = self.ndiffAssertEqual
1154        msg = Message()
1155        msg['From'] = 'test@dom.ain'
1156        msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1157        msg.set_payload('Test')
1158        sfp = StringIO()
1159        g = Generator(sfp)
1160        g.flatten(msg)
1161        eq(sfp.getvalue(), """\
1162From: test@dom.ain
1163References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1164 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1165 <foo@dom.ain>; abc
1166
1167Test""")
1168
1169    def test_splitter_split_on_punctuation_only_if_fws(self):
1170        eq = self.ndiffAssertEqual
1171        msg = Message()
1172        msg['From'] = 'test@dom.ain'
1173        msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1174            'they;arenotlegal;fold,points')
1175        msg.set_payload('Test')
1176        sfp = StringIO()
1177        g = Generator(sfp)
1178        g.flatten(msg)
1179        # XXX the space after the header should not be there.
1180        eq(sfp.getvalue(), """\
1181From: test@dom.ain
1182References:\x20
1183 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1184
1185Test""")
1186
1187    def test_no_split_long_header(self):
1188        eq = self.ndiffAssertEqual
1189        hstr = 'References: ' + 'x' * 80
1190        h = Header(hstr)
1191        # These come on two lines because Headers are really field value
1192        # classes and don't really know about their field names.
1193        eq(h.encode(), """\
1194References:
1195 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1196        h = Header('x' * 80)
1197        eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
1198
1199    def test_splitting_multiple_long_lines(self):
1200        eq = self.ndiffAssertEqual
1201        hstr = """\
1202from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1203\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1204\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1205"""
1206        h = Header(hstr, continuation_ws='\t')
1207        eq(h.encode(), """\
1208from babylon.socal-raves.org (localhost [127.0.0.1]);
1209 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1210 for <mailman-admin@babylon.socal-raves.org>;
1211 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1212\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1213 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1214 for <mailman-admin@babylon.socal-raves.org>;
1215 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1216\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1217 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1218 for <mailman-admin@babylon.socal-raves.org>;
1219 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1220
1221    def test_splitting_first_line_only_is_long(self):
1222        eq = self.ndiffAssertEqual
1223        hstr = """\
1224from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1225\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1226\tid 17k4h5-00034i-00
1227\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1228        h = Header(hstr, maxlinelen=78, header_name='Received',
1229                   continuation_ws='\t')
1230        eq(h.encode(), """\
1231from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1232 helo=cthulhu.gerg.ca)
1233\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1234\tid 17k4h5-00034i-00
1235\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1236
1237    def test_long_8bit_header(self):
1238        eq = self.ndiffAssertEqual
1239        msg = Message()
1240        h = Header('Britische Regierung gibt', 'iso-8859-1',
1241                    header_name='Subject')
1242        h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
1243        eq(h.encode(maxlinelen=76), """\
1244=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1245 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
1246        msg['Subject'] = h
1247        eq(msg.as_string(maxheaderlen=76), """\
1248Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1249 =?iso-8859-1?q?hore-Windkraftprojekte?=
1250
1251""")
1252        eq(msg.as_string(maxheaderlen=0), """\
1253Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
1254
1255""")
1256
1257    def test_long_8bit_header_no_charset(self):
1258        eq = self.ndiffAssertEqual
1259        msg = Message()
1260        header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1261                         'f\xfcr Offshore-Windkraftprojekte '
1262                         '<a-very-long-address@example.com>')
1263        msg['Reply-To'] = header_string
1264        eq(msg.as_string(maxheaderlen=78), """\
1265Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1266 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1267
1268""")
1269        msg = Message()
1270        msg['Reply-To'] = Header(header_string,
1271                                 header_name='Reply-To')
1272        eq(msg.as_string(maxheaderlen=78), """\
1273Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1274 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1275
1276""")
1277
1278    def test_long_to_header(self):
1279        eq = self.ndiffAssertEqual
1280        to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
1281              '<someone@eecs.umich.edu>, '
1282              '"Someone Test #B" <someone@umich.edu>, '
1283              '"Someone Test #C" <someone@eecs.umich.edu>, '
1284              '"Someone Test #D" <someone@eecs.umich.edu>')
1285        msg = Message()
1286        msg['To'] = to
1287        eq(msg.as_string(maxheaderlen=78), '''\
1288To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
1289 "Someone Test #B" <someone@umich.edu>,
1290 "Someone Test #C" <someone@eecs.umich.edu>,
1291 "Someone Test #D" <someone@eecs.umich.edu>
1292
1293''')
1294
1295    def test_long_line_after_append(self):
1296        eq = self.ndiffAssertEqual
1297        s = 'This is an example of string which has almost the limit of header length.'
1298        h = Header(s)
1299        h.append('Add another line.')
1300        eq(h.encode(maxlinelen=76), """\
1301This is an example of string which has almost the limit of header length.
1302 Add another line.""")
1303
1304    def test_shorter_line_with_append(self):
1305        eq = self.ndiffAssertEqual
1306        s = 'This is a shorter line.'
1307        h = Header(s)
1308        h.append('Add another sentence. (Surprise?)')
1309        eq(h.encode(),
1310           'This is a shorter line. Add another sentence. (Surprise?)')
1311
1312    def test_long_field_name(self):
1313        eq = self.ndiffAssertEqual
1314        fn = 'X-Very-Very-Very-Long-Header-Name'
1315        gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1316              'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1317              'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1318              'bef\xf6rdert. ')
1319        h = Header(gs, 'iso-8859-1', header_name=fn)
1320        # BAW: this seems broken because the first line is too long
1321        eq(h.encode(maxlinelen=76), """\
1322=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1323 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1324 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1325 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
1326
1327    def test_long_received_header(self):
1328        h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1329             'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1330             'Wed, 05 Mar 2003 18:10:18 -0700')
1331        msg = Message()
1332        msg['Received-1'] = Header(h, continuation_ws='\t')
1333        msg['Received-2'] = h
1334        # This should be splitting on spaces not semicolons.
1335        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1336Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1337 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1338 Wed, 05 Mar 2003 18:10:18 -0700
1339Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1340 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1341 Wed, 05 Mar 2003 18:10:18 -0700
1342
1343""")
1344
1345    def test_string_headerinst_eq(self):
1346        h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1347             'tu-muenchen.de> (David Bremner\'s message of '
1348             '"Thu, 6 Mar 2003 13:58:21 +0100")')
1349        msg = Message()
1350        msg['Received-1'] = Header(h, header_name='Received-1',
1351                                   continuation_ws='\t')
1352        msg['Received-2'] = h
1353        # XXX The space after the ':' should not be there.
1354        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1355Received-1:\x20
1356 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1357 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1358Received-2:\x20
1359 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1360 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1361
1362""")
1363
1364    def test_long_unbreakable_lines_with_continuation(self):
1365        eq = self.ndiffAssertEqual
1366        msg = Message()
1367        t = """\
1368iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1369 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1370        msg['Face-1'] = t
1371        msg['Face-2'] = Header(t, header_name='Face-2')
1372        msg['Face-3'] = ' ' + t
1373        # XXX This splitting is all wrong.  It the first value line should be
1374        # snug against the field name or the space after the header not there.
1375        eq(msg.as_string(maxheaderlen=78), """\
1376Face-1:\x20
1377 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1378 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1379Face-2:\x20
1380 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1381 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1382Face-3:\x20
1383 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1384 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1385
1386""")
1387
1388    def test_another_long_multiline_header(self):
1389        eq = self.ndiffAssertEqual
1390        m = ('Received: from siimage.com '
1391             '([172.25.1.3]) by zima.siliconimage.com with '
1392             'Microsoft SMTPSVC(5.0.2195.4905); '
1393             'Wed, 16 Oct 2002 07:41:11 -0700')
1394        msg = email.message_from_string(m)
1395        eq(msg.as_string(maxheaderlen=78), '''\
1396Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1397 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
1398
1399''')
1400
1401    def test_long_lines_with_different_header(self):
1402        eq = self.ndiffAssertEqual
1403        h = ('List-Unsubscribe: '
1404             '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1405             '        <mailto:spamassassin-talk-request@lists.sourceforge.net'
1406             '?subject=unsubscribe>')
1407        msg = Message()
1408        msg['List'] = h
1409        msg['List'] = Header(h, header_name='List')
1410        eq(msg.as_string(maxheaderlen=78), """\
1411List: List-Unsubscribe:
1412 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1413        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1414List: List-Unsubscribe:
1415 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1416        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1417
1418""")
1419
1420    def test_long_rfc2047_header_with_embedded_fws(self):
1421        h = Header(textwrap.dedent("""\
1422            We're going to pretend this header is in a non-ascii character set
1423            \tto see if line wrapping with encoded words and embedded
1424               folding white space works"""),
1425                   charset='utf-8',
1426                   header_name='Test')
1427        self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1428            =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1429             =?utf-8?q?cter_set?=
1430             =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1431             =?utf-8?q?_folding_white_space_works?=""")+'\n')
1432
1433
1434
1435# Test mangling of "From " lines in the body of a message
1436class TestFromMangling(unittest.TestCase):
1437    def setUp(self):
1438        self.msg = Message()
1439        self.msg['From'] = 'aaa@bbb.org'
1440        self.msg.set_payload("""\
1441From the desk of A.A.A.:
1442Blah blah blah
1443""")
1444
1445    def test_mangled_from(self):
1446        s = StringIO()
1447        g = Generator(s, mangle_from_=True)
1448        g.flatten(self.msg)
1449        self.assertEqual(s.getvalue(), """\
1450From: aaa@bbb.org
1451
1452>From the desk of A.A.A.:
1453Blah blah blah
1454""")
1455
1456    def test_dont_mangle_from(self):
1457        s = StringIO()
1458        g = Generator(s, mangle_from_=False)
1459        g.flatten(self.msg)
1460        self.assertEqual(s.getvalue(), """\
1461From: aaa@bbb.org
1462
1463From the desk of A.A.A.:
1464Blah blah blah
1465""")
1466
1467    def test_mangle_from_in_preamble_and_epilog(self):
1468        s = StringIO()
1469        g = Generator(s, mangle_from_=True)
1470        msg = email.message_from_string(textwrap.dedent("""\
1471            From: foo@bar.com
1472            Mime-Version: 1.0
1473            Content-Type: multipart/mixed; boundary=XXX
1474
1475            From somewhere unknown
1476
1477            --XXX
1478            Content-Type: text/plain
1479
1480            foo
1481
1482            --XXX--
1483
1484            From somewhere unknowable
1485            """))
1486        g.flatten(msg)
1487        self.assertEqual(len([1 for x in s.getvalue().split('\n')
1488                                  if x.startswith('>From ')]), 2)
1489
1490    def test_mangled_from_with_bad_bytes(self):
1491        source = textwrap.dedent("""\
1492            Content-Type: text/plain; charset="utf-8"
1493            MIME-Version: 1.0
1494            Content-Transfer-Encoding: 8bit
1495            From: aaa@bbb.org
1496
1497        """).encode('utf-8')
1498        msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1499        b = BytesIO()
1500        g = BytesGenerator(b, mangle_from_=True)
1501        g.flatten(msg)
1502        self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1503
1504    def test_mutltipart_with_bad_bytes_in_cte(self):
1505        # bpo30835
1506        source = textwrap.dedent("""\
1507            From: aperson@example.com
1508            Content-Type: multipart/mixed; boundary="1"
1509            Content-Transfer-Encoding: \xc8
1510        """).encode('utf-8')
1511        msg = email.message_from_bytes(source)
1512
1513
1514# Test the basic MIMEAudio class
1515class TestMIMEAudio(unittest.TestCase):
1516    def setUp(self):
1517        with openfile('audiotest.au', 'rb') as fp:
1518            self._audiodata = fp.read()
1519        self._au = MIMEAudio(self._audiodata)
1520
1521    def test_guess_minor_type(self):
1522        self.assertEqual(self._au.get_content_type(), 'audio/basic')
1523
1524    def test_encoding(self):
1525        payload = self._au.get_payload()
1526        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1527                self._audiodata)
1528
1529    def test_checkSetMinor(self):
1530        au = MIMEAudio(self._audiodata, 'fish')
1531        self.assertEqual(au.get_content_type(), 'audio/fish')
1532
1533    def test_add_header(self):
1534        eq = self.assertEqual
1535        self._au.add_header('Content-Disposition', 'attachment',
1536                            filename='audiotest.au')
1537        eq(self._au['content-disposition'],
1538           'attachment; filename="audiotest.au"')
1539        eq(self._au.get_params(header='content-disposition'),
1540           [('attachment', ''), ('filename', 'audiotest.au')])
1541        eq(self._au.get_param('filename', header='content-disposition'),
1542           'audiotest.au')
1543        missing = []
1544        eq(self._au.get_param('attachment', header='content-disposition'), '')
1545        self.assertIs(self._au.get_param('foo', failobj=missing,
1546                                         header='content-disposition'), missing)
1547        # Try some missing stuff
1548        self.assertIs(self._au.get_param('foobar', missing), missing)
1549        self.assertIs(self._au.get_param('attachment', missing,
1550                                         header='foobar'), missing)
1551
1552
1553
1554# Test the basic MIMEImage class
1555class TestMIMEImage(unittest.TestCase):
1556    def setUp(self):
1557        with openfile('PyBanner048.gif', 'rb') as fp:
1558            self._imgdata = fp.read()
1559        self._im = MIMEImage(self._imgdata)
1560
1561    def test_guess_minor_type(self):
1562        self.assertEqual(self._im.get_content_type(), 'image/gif')
1563
1564    def test_encoding(self):
1565        payload = self._im.get_payload()
1566        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1567                self._imgdata)
1568
1569    def test_checkSetMinor(self):
1570        im = MIMEImage(self._imgdata, 'fish')
1571        self.assertEqual(im.get_content_type(), 'image/fish')
1572
1573    def test_add_header(self):
1574        eq = self.assertEqual
1575        self._im.add_header('Content-Disposition', 'attachment',
1576                            filename='dingusfish.gif')
1577        eq(self._im['content-disposition'],
1578           'attachment; filename="dingusfish.gif"')
1579        eq(self._im.get_params(header='content-disposition'),
1580           [('attachment', ''), ('filename', 'dingusfish.gif')])
1581        eq(self._im.get_param('filename', header='content-disposition'),
1582           'dingusfish.gif')
1583        missing = []
1584        eq(self._im.get_param('attachment', header='content-disposition'), '')
1585        self.assertIs(self._im.get_param('foo', failobj=missing,
1586                                         header='content-disposition'), missing)
1587        # Try some missing stuff
1588        self.assertIs(self._im.get_param('foobar', missing), missing)
1589        self.assertIs(self._im.get_param('attachment', missing,
1590                                         header='foobar'), missing)
1591
1592
1593
1594# Test the basic MIMEApplication class
1595class TestMIMEApplication(unittest.TestCase):
1596    def test_headers(self):
1597        eq = self.assertEqual
1598        msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
1599        eq(msg.get_content_type(), 'application/octet-stream')
1600        eq(msg['content-transfer-encoding'], 'base64')
1601
1602    def test_body(self):
1603        eq = self.assertEqual
1604        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1605        msg = MIMEApplication(bytesdata)
1606        # whitespace in the cte encoded block is RFC-irrelevant.
1607        eq(msg.get_payload().strip(), '+vv8/f7/')
1608        eq(msg.get_payload(decode=True), bytesdata)
1609
1610    def test_binary_body_with_encode_7or8bit(self):
1611        # Issue 17171.
1612        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1613        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1614        # Treated as a string, this will be invalid code points.
1615        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1616        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1617        self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1618        s = BytesIO()
1619        g = BytesGenerator(s)
1620        g.flatten(msg)
1621        wireform = s.getvalue()
1622        msg2 = email.message_from_bytes(wireform)
1623        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1624        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1625        self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1626
1627    def test_binary_body_with_encode_noop(self):
1628        # Issue 16564: This does not produce an RFC valid message, since to be
1629        # valid it should have a CTE of binary.  But the below works in
1630        # Python2, and is documented as working this way.
1631        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1632        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1633        # Treated as a string, this will be invalid code points.
1634        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1635        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1636        s = BytesIO()
1637        g = BytesGenerator(s)
1638        g.flatten(msg)
1639        wireform = s.getvalue()
1640        msg2 = email.message_from_bytes(wireform)
1641        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1642        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1643
1644    def test_binary_body_with_unicode_linend_encode_noop(self):
1645        # Issue 19003: This is a variation on #16564.
1646        bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff'
1647        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1648        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1649        s = BytesIO()
1650        g = BytesGenerator(s)
1651        g.flatten(msg)
1652        wireform = s.getvalue()
1653        msg2 = email.message_from_bytes(wireform)
1654        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1655
1656    def test_binary_body_with_encode_quopri(self):
1657        # Issue 14360.
1658        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1659        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1660        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1661        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1662        self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1663        s = BytesIO()
1664        g = BytesGenerator(s)
1665        g.flatten(msg)
1666        wireform = s.getvalue()
1667        msg2 = email.message_from_bytes(wireform)
1668        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1669        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1670        self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1671
1672    def test_binary_body_with_encode_base64(self):
1673        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1674        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1675        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1676        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1677        s = BytesIO()
1678        g = BytesGenerator(s)
1679        g.flatten(msg)
1680        wireform = s.getvalue()
1681        msg2 = email.message_from_bytes(wireform)
1682        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1683        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1684
1685
1686# Test the basic MIMEText class
1687class TestMIMEText(unittest.TestCase):
1688    def setUp(self):
1689        self._msg = MIMEText('hello there')
1690
1691    def test_types(self):
1692        eq = self.assertEqual
1693        eq(self._msg.get_content_type(), 'text/plain')
1694        eq(self._msg.get_param('charset'), 'us-ascii')
1695        missing = []
1696        self.assertIs(self._msg.get_param('foobar', missing), missing)
1697        self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1698                      missing)
1699
1700    def test_payload(self):
1701        self.assertEqual(self._msg.get_payload(), 'hello there')
1702        self.assertFalse(self._msg.is_multipart())
1703
1704    def test_charset(self):
1705        eq = self.assertEqual
1706        msg = MIMEText('hello there', _charset='us-ascii')
1707        eq(msg.get_charset().input_charset, 'us-ascii')
1708        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1709        # Also accept a Charset instance
1710        charset = Charset('utf-8')
1711        charset.body_encoding = None
1712        msg = MIMEText('hello there', _charset=charset)
1713        eq(msg.get_charset().input_charset, 'utf-8')
1714        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1715        eq(msg.get_payload(), 'hello there')
1716
1717    def test_7bit_input(self):
1718        eq = self.assertEqual
1719        msg = MIMEText('hello there', _charset='us-ascii')
1720        eq(msg.get_charset().input_charset, 'us-ascii')
1721        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1722
1723    def test_7bit_input_no_charset(self):
1724        eq = self.assertEqual
1725        msg = MIMEText('hello there')
1726        eq(msg.get_charset(), 'us-ascii')
1727        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1728        self.assertIn('hello there', msg.as_string())
1729
1730    def test_utf8_input(self):
1731        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1732        eq = self.assertEqual
1733        msg = MIMEText(teststr, _charset='utf-8')
1734        eq(msg.get_charset().output_charset, 'utf-8')
1735        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1736        eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1737
1738    @unittest.skip("can't fix because of backward compat in email5, "
1739        "will fix in email6")
1740    def test_utf8_input_no_charset(self):
1741        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1742        self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1743
1744
1745
1746# Test complicated multipart/* messages
1747class TestMultipart(TestEmailBase):
1748    def setUp(self):
1749        with openfile('PyBanner048.gif', 'rb') as fp:
1750            data = fp.read()
1751        container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1752        image = MIMEImage(data, name='dingusfish.gif')
1753        image.add_header('content-disposition', 'attachment',
1754                         filename='dingusfish.gif')
1755        intro = MIMEText('''\
1756Hi there,
1757
1758This is the dingus fish.
1759''')
1760        container.attach(intro)
1761        container.attach(image)
1762        container['From'] = 'Barry <barry@digicool.com>'
1763        container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1764        container['Subject'] = 'Here is your dingus fish'
1765
1766        now = 987809702.54848599
1767        timetuple = time.localtime(now)
1768        if timetuple[-1] == 0:
1769            tzsecs = time.timezone
1770        else:
1771            tzsecs = time.altzone
1772        if tzsecs > 0:
1773            sign = '-'
1774        else:
1775            sign = '+'
1776        tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1777        container['Date'] = time.strftime(
1778            '%a, %d %b %Y %H:%M:%S',
1779            time.localtime(now)) + tzoffset
1780        self._msg = container
1781        self._im = image
1782        self._txt = intro
1783
1784    def test_hierarchy(self):
1785        # convenience
1786        eq = self.assertEqual
1787        raises = self.assertRaises
1788        # tests
1789        m = self._msg
1790        self.assertTrue(m.is_multipart())
1791        eq(m.get_content_type(), 'multipart/mixed')
1792        eq(len(m.get_payload()), 2)
1793        raises(IndexError, m.get_payload, 2)
1794        m0 = m.get_payload(0)
1795        m1 = m.get_payload(1)
1796        self.assertIs(m0, self._txt)
1797        self.assertIs(m1, self._im)
1798        eq(m.get_payload(), [m0, m1])
1799        self.assertFalse(m0.is_multipart())
1800        self.assertFalse(m1.is_multipart())
1801
1802    def test_empty_multipart_idempotent(self):
1803        text = """\
1804Content-Type: multipart/mixed; boundary="BOUNDARY"
1805MIME-Version: 1.0
1806Subject: A subject
1807To: aperson@dom.ain
1808From: bperson@dom.ain
1809
1810
1811--BOUNDARY
1812
1813
1814--BOUNDARY--
1815"""
1816        msg = Parser().parsestr(text)
1817        self.ndiffAssertEqual(text, msg.as_string())
1818
1819    def test_no_parts_in_a_multipart_with_none_epilogue(self):
1820        outer = MIMEBase('multipart', 'mixed')
1821        outer['Subject'] = 'A subject'
1822        outer['To'] = 'aperson@dom.ain'
1823        outer['From'] = 'bperson@dom.ain'
1824        outer.set_boundary('BOUNDARY')
1825        self.ndiffAssertEqual(outer.as_string(), '''\
1826Content-Type: multipart/mixed; boundary="BOUNDARY"
1827MIME-Version: 1.0
1828Subject: A subject
1829To: aperson@dom.ain
1830From: bperson@dom.ain
1831
1832--BOUNDARY
1833
1834--BOUNDARY--
1835''')
1836
1837    def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1838        outer = MIMEBase('multipart', 'mixed')
1839        outer['Subject'] = 'A subject'
1840        outer['To'] = 'aperson@dom.ain'
1841        outer['From'] = 'bperson@dom.ain'
1842        outer.preamble = ''
1843        outer.epilogue = ''
1844        outer.set_boundary('BOUNDARY')
1845        self.ndiffAssertEqual(outer.as_string(), '''\
1846Content-Type: multipart/mixed; boundary="BOUNDARY"
1847MIME-Version: 1.0
1848Subject: A subject
1849To: aperson@dom.ain
1850From: bperson@dom.ain
1851
1852
1853--BOUNDARY
1854
1855--BOUNDARY--
1856''')
1857
1858    def test_one_part_in_a_multipart(self):
1859        eq = self.ndiffAssertEqual
1860        outer = MIMEBase('multipart', 'mixed')
1861        outer['Subject'] = 'A subject'
1862        outer['To'] = 'aperson@dom.ain'
1863        outer['From'] = 'bperson@dom.ain'
1864        outer.set_boundary('BOUNDARY')
1865        msg = MIMEText('hello world')
1866        outer.attach(msg)
1867        eq(outer.as_string(), '''\
1868Content-Type: multipart/mixed; boundary="BOUNDARY"
1869MIME-Version: 1.0
1870Subject: A subject
1871To: aperson@dom.ain
1872From: bperson@dom.ain
1873
1874--BOUNDARY
1875Content-Type: text/plain; charset="us-ascii"
1876MIME-Version: 1.0
1877Content-Transfer-Encoding: 7bit
1878
1879hello world
1880--BOUNDARY--
1881''')
1882
1883    def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1884        eq = self.ndiffAssertEqual
1885        outer = MIMEBase('multipart', 'mixed')
1886        outer['Subject'] = 'A subject'
1887        outer['To'] = 'aperson@dom.ain'
1888        outer['From'] = 'bperson@dom.ain'
1889        outer.preamble = ''
1890        msg = MIMEText('hello world')
1891        outer.attach(msg)
1892        outer.set_boundary('BOUNDARY')
1893        eq(outer.as_string(), '''\
1894Content-Type: multipart/mixed; boundary="BOUNDARY"
1895MIME-Version: 1.0
1896Subject: A subject
1897To: aperson@dom.ain
1898From: bperson@dom.ain
1899
1900
1901--BOUNDARY
1902Content-Type: text/plain; charset="us-ascii"
1903MIME-Version: 1.0
1904Content-Transfer-Encoding: 7bit
1905
1906hello world
1907--BOUNDARY--
1908''')
1909
1910
1911    def test_seq_parts_in_a_multipart_with_none_preamble(self):
1912        eq = self.ndiffAssertEqual
1913        outer = MIMEBase('multipart', 'mixed')
1914        outer['Subject'] = 'A subject'
1915        outer['To'] = 'aperson@dom.ain'
1916        outer['From'] = 'bperson@dom.ain'
1917        outer.preamble = None
1918        msg = MIMEText('hello world')
1919        outer.attach(msg)
1920        outer.set_boundary('BOUNDARY')
1921        eq(outer.as_string(), '''\
1922Content-Type: multipart/mixed; boundary="BOUNDARY"
1923MIME-Version: 1.0
1924Subject: A subject
1925To: aperson@dom.ain
1926From: bperson@dom.ain
1927
1928--BOUNDARY
1929Content-Type: text/plain; charset="us-ascii"
1930MIME-Version: 1.0
1931Content-Transfer-Encoding: 7bit
1932
1933hello world
1934--BOUNDARY--
1935''')
1936
1937
1938    def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1939        eq = self.ndiffAssertEqual
1940        outer = MIMEBase('multipart', 'mixed')
1941        outer['Subject'] = 'A subject'
1942        outer['To'] = 'aperson@dom.ain'
1943        outer['From'] = 'bperson@dom.ain'
1944        outer.epilogue = None
1945        msg = MIMEText('hello world')
1946        outer.attach(msg)
1947        outer.set_boundary('BOUNDARY')
1948        eq(outer.as_string(), '''\
1949Content-Type: multipart/mixed; boundary="BOUNDARY"
1950MIME-Version: 1.0
1951Subject: A subject
1952To: aperson@dom.ain
1953From: bperson@dom.ain
1954
1955--BOUNDARY
1956Content-Type: text/plain; charset="us-ascii"
1957MIME-Version: 1.0
1958Content-Transfer-Encoding: 7bit
1959
1960hello world
1961--BOUNDARY--
1962''')
1963
1964
1965    def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1966        eq = self.ndiffAssertEqual
1967        outer = MIMEBase('multipart', 'mixed')
1968        outer['Subject'] = 'A subject'
1969        outer['To'] = 'aperson@dom.ain'
1970        outer['From'] = 'bperson@dom.ain'
1971        outer.epilogue = ''
1972        msg = MIMEText('hello world')
1973        outer.attach(msg)
1974        outer.set_boundary('BOUNDARY')
1975        eq(outer.as_string(), '''\
1976Content-Type: multipart/mixed; boundary="BOUNDARY"
1977MIME-Version: 1.0
1978Subject: A subject
1979To: aperson@dom.ain
1980From: bperson@dom.ain
1981
1982--BOUNDARY
1983Content-Type: text/plain; charset="us-ascii"
1984MIME-Version: 1.0
1985Content-Transfer-Encoding: 7bit
1986
1987hello world
1988--BOUNDARY--
1989''')
1990
1991
1992    def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1993        eq = self.ndiffAssertEqual
1994        outer = MIMEBase('multipart', 'mixed')
1995        outer['Subject'] = 'A subject'
1996        outer['To'] = 'aperson@dom.ain'
1997        outer['From'] = 'bperson@dom.ain'
1998        outer.epilogue = '\n'
1999        msg = MIMEText('hello world')
2000        outer.attach(msg)
2001        outer.set_boundary('BOUNDARY')
2002        eq(outer.as_string(), '''\
2003Content-Type: multipart/mixed; boundary="BOUNDARY"
2004MIME-Version: 1.0
2005Subject: A subject
2006To: aperson@dom.ain
2007From: bperson@dom.ain
2008
2009--BOUNDARY
2010Content-Type: text/plain; charset="us-ascii"
2011MIME-Version: 1.0
2012Content-Transfer-Encoding: 7bit
2013
2014hello world
2015--BOUNDARY--
2016
2017''')
2018
2019    def test_message_external_body(self):
2020        eq = self.assertEqual
2021        msg = self._msgobj('msg_36.txt')
2022        eq(len(msg.get_payload()), 2)
2023        msg1 = msg.get_payload(1)
2024        eq(msg1.get_content_type(), 'multipart/alternative')
2025        eq(len(msg1.get_payload()), 2)
2026        for subpart in msg1.get_payload():
2027            eq(subpart.get_content_type(), 'message/external-body')
2028            eq(len(subpart.get_payload()), 1)
2029            subsubpart = subpart.get_payload(0)
2030            eq(subsubpart.get_content_type(), 'text/plain')
2031
2032    def test_double_boundary(self):
2033        # msg_37.txt is a multipart that contains two dash-boundary's in a
2034        # row.  Our interpretation of RFC 2046 calls for ignoring the second
2035        # and subsequent boundaries.
2036        msg = self._msgobj('msg_37.txt')
2037        self.assertEqual(len(msg.get_payload()), 3)
2038
2039    def test_nested_inner_contains_outer_boundary(self):
2040        eq = self.ndiffAssertEqual
2041        # msg_38.txt has an inner part that contains outer boundaries.  My
2042        # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
2043        # these are illegal and should be interpreted as unterminated inner
2044        # parts.
2045        msg = self._msgobj('msg_38.txt')
2046        sfp = StringIO()
2047        iterators._structure(msg, sfp)
2048        eq(sfp.getvalue(), """\
2049multipart/mixed
2050    multipart/mixed
2051        multipart/alternative
2052            text/plain
2053        text/plain
2054    text/plain
2055    text/plain
2056""")
2057
2058    def test_nested_with_same_boundary(self):
2059        eq = self.ndiffAssertEqual
2060        # msg 39.txt is similarly evil in that it's got inner parts that use
2061        # the same boundary as outer parts.  Again, I believe the way this is
2062        # parsed is closest to the spirit of RFC 2046
2063        msg = self._msgobj('msg_39.txt')
2064        sfp = StringIO()
2065        iterators._structure(msg, sfp)
2066        eq(sfp.getvalue(), """\
2067multipart/mixed
2068    multipart/mixed
2069        multipart/alternative
2070        application/octet-stream
2071        application/octet-stream
2072    text/plain
2073""")
2074
2075    def test_boundary_in_non_multipart(self):
2076        msg = self._msgobj('msg_40.txt')
2077        self.assertEqual(msg.as_string(), '''\
2078MIME-Version: 1.0
2079Content-Type: text/html; boundary="--961284236552522269"
2080
2081----961284236552522269
2082Content-Type: text/html;
2083Content-Transfer-Encoding: 7Bit
2084
2085<html></html>
2086
2087----961284236552522269--
2088''')
2089
2090    def test_boundary_with_leading_space(self):
2091        eq = self.assertEqual
2092        msg = email.message_from_string('''\
2093MIME-Version: 1.0
2094Content-Type: multipart/mixed; boundary="    XXXX"
2095
2096--    XXXX
2097Content-Type: text/plain
2098
2099
2100--    XXXX
2101Content-Type: text/plain
2102
2103--    XXXX--
2104''')
2105        self.assertTrue(msg.is_multipart())
2106        eq(msg.get_boundary(), '    XXXX')
2107        eq(len(msg.get_payload()), 2)
2108
2109    def test_boundary_without_trailing_newline(self):
2110        m = Parser().parsestr("""\
2111Content-Type: multipart/mixed; boundary="===============0012394164=="
2112MIME-Version: 1.0
2113
2114--===============0012394164==
2115Content-Type: image/file1.jpg
2116MIME-Version: 1.0
2117Content-Transfer-Encoding: base64
2118
2119YXNkZg==
2120--===============0012394164==--""")
2121        self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
2122
2123    def test_mimebase_default_policy(self):
2124        m = MIMEBase('multipart', 'mixed')
2125        self.assertIs(m.policy, email.policy.compat32)
2126
2127    def test_mimebase_custom_policy(self):
2128        m = MIMEBase('multipart', 'mixed', policy=email.policy.default)
2129        self.assertIs(m.policy, email.policy.default)
2130
2131# Test some badly formatted messages
2132class TestNonConformant(TestEmailBase):
2133
2134    def test_parse_missing_minor_type(self):
2135        eq = self.assertEqual
2136        msg = self._msgobj('msg_14.txt')
2137        eq(msg.get_content_type(), 'text/plain')
2138        eq(msg.get_content_maintype(), 'text')
2139        eq(msg.get_content_subtype(), 'plain')
2140
2141    # test_defect_handling
2142    def test_same_boundary_inner_outer(self):
2143        msg = self._msgobj('msg_15.txt')
2144        # XXX We can probably eventually do better
2145        inner = msg.get_payload(0)
2146        self.assertTrue(hasattr(inner, 'defects'))
2147        self.assertEqual(len(inner.defects), 1)
2148        self.assertIsInstance(inner.defects[0],
2149                              errors.StartBoundaryNotFoundDefect)
2150
2151    # test_defect_handling
2152    def test_multipart_no_boundary(self):
2153        msg = self._msgobj('msg_25.txt')
2154        self.assertIsInstance(msg.get_payload(), str)
2155        self.assertEqual(len(msg.defects), 2)
2156        self.assertIsInstance(msg.defects[0],
2157                              errors.NoBoundaryInMultipartDefect)
2158        self.assertIsInstance(msg.defects[1],
2159                              errors.MultipartInvariantViolationDefect)
2160
2161    multipart_msg = textwrap.dedent("""\
2162        Date: Wed, 14 Nov 2007 12:56:23 GMT
2163        From: foo@bar.invalid
2164        To: foo@bar.invalid
2165        Subject: Content-Transfer-Encoding: base64 and multipart
2166        MIME-Version: 1.0
2167        Content-Type: multipart/mixed;
2168            boundary="===============3344438784458119861=="{}
2169
2170        --===============3344438784458119861==
2171        Content-Type: text/plain
2172
2173        Test message
2174
2175        --===============3344438784458119861==
2176        Content-Type: application/octet-stream
2177        Content-Transfer-Encoding: base64
2178
2179        YWJj
2180
2181        --===============3344438784458119861==--
2182        """)
2183
2184    # test_defect_handling
2185    def test_multipart_invalid_cte(self):
2186        msg = self._str_msg(
2187            self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2188        self.assertEqual(len(msg.defects), 1)
2189        self.assertIsInstance(msg.defects[0],
2190            errors.InvalidMultipartContentTransferEncodingDefect)
2191
2192    # test_defect_handling
2193    def test_multipart_no_cte_no_defect(self):
2194        msg = self._str_msg(self.multipart_msg.format(''))
2195        self.assertEqual(len(msg.defects), 0)
2196
2197    # test_defect_handling
2198    def test_multipart_valid_cte_no_defect(self):
2199        for cte in ('7bit', '8bit', 'BINary'):
2200            msg = self._str_msg(
2201                self.multipart_msg.format(
2202                    "\nContent-Transfer-Encoding: {}".format(cte)))
2203            self.assertEqual(len(msg.defects), 0)
2204
2205    # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
2206    def test_invalid_content_type(self):
2207        eq = self.assertEqual
2208        neq = self.ndiffAssertEqual
2209        msg = Message()
2210        # RFC 2045, $5.2 says invalid yields text/plain
2211        msg['Content-Type'] = 'text'
2212        eq(msg.get_content_maintype(), 'text')
2213        eq(msg.get_content_subtype(), 'plain')
2214        eq(msg.get_content_type(), 'text/plain')
2215        # Clear the old value and try something /really/ invalid
2216        del msg['content-type']
2217        msg['Content-Type'] = 'foo'
2218        eq(msg.get_content_maintype(), 'text')
2219        eq(msg.get_content_subtype(), 'plain')
2220        eq(msg.get_content_type(), 'text/plain')
2221        # Still, make sure that the message is idempotently generated
2222        s = StringIO()
2223        g = Generator(s)
2224        g.flatten(msg)
2225        neq(s.getvalue(), 'Content-Type: foo\n\n')
2226
2227    def test_no_start_boundary(self):
2228        eq = self.ndiffAssertEqual
2229        msg = self._msgobj('msg_31.txt')
2230        eq(msg.get_payload(), """\
2231--BOUNDARY
2232Content-Type: text/plain
2233
2234message 1
2235
2236--BOUNDARY
2237Content-Type: text/plain
2238
2239message 2
2240
2241--BOUNDARY--
2242""")
2243
2244    def test_no_separating_blank_line(self):
2245        eq = self.ndiffAssertEqual
2246        msg = self._msgobj('msg_35.txt')
2247        eq(msg.as_string(), """\
2248From: aperson@dom.ain
2249To: bperson@dom.ain
2250Subject: here's something interesting
2251
2252counter to RFC 2822, there's no separating newline here
2253""")
2254
2255    # test_defect_handling
2256    def test_lying_multipart(self):
2257        msg = self._msgobj('msg_41.txt')
2258        self.assertTrue(hasattr(msg, 'defects'))
2259        self.assertEqual(len(msg.defects), 2)
2260        self.assertIsInstance(msg.defects[0],
2261                              errors.NoBoundaryInMultipartDefect)
2262        self.assertIsInstance(msg.defects[1],
2263                              errors.MultipartInvariantViolationDefect)
2264
2265    # test_defect_handling
2266    def test_missing_start_boundary(self):
2267        outer = self._msgobj('msg_42.txt')
2268        # The message structure is:
2269        #
2270        # multipart/mixed
2271        #    text/plain
2272        #    message/rfc822
2273        #        multipart/mixed [*]
2274        #
2275        # [*] This message is missing its start boundary
2276        bad = outer.get_payload(1).get_payload(0)
2277        self.assertEqual(len(bad.defects), 1)
2278        self.assertIsInstance(bad.defects[0],
2279                              errors.StartBoundaryNotFoundDefect)
2280
2281    # test_defect_handling
2282    def test_first_line_is_continuation_header(self):
2283        eq = self.assertEqual
2284        m = ' Line 1\nSubject: test\n\nbody'
2285        msg = email.message_from_string(m)
2286        eq(msg.keys(), ['Subject'])
2287        eq(msg.get_payload(), 'body')
2288        eq(len(msg.defects), 1)
2289        self.assertDefectsEqual(msg.defects,
2290                                 [errors.FirstHeaderLineIsContinuationDefect])
2291        eq(msg.defects[0].line, ' Line 1\n')
2292
2293    # test_defect_handling
2294    def test_missing_header_body_separator(self):
2295        # Our heuristic if we see a line that doesn't look like a header (no
2296        # leading whitespace but no ':') is to assume that the blank line that
2297        # separates the header from the body is missing, and to stop parsing
2298        # headers and start parsing the body.
2299        msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2300        self.assertEqual(msg.keys(), ['Subject'])
2301        self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2302        self.assertDefectsEqual(msg.defects,
2303                                [errors.MissingHeaderBodySeparatorDefect])
2304
2305
2306# Test RFC 2047 header encoding and decoding
2307class TestRFC2047(TestEmailBase):
2308    def test_rfc2047_multiline(self):
2309        eq = self.assertEqual
2310        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2311 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2312        dh = decode_header(s)
2313        eq(dh, [
2314            (b'Re: ', None),
2315            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2316            (b' baz foo bar ', None),
2317            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2318        header = make_header(dh)
2319        eq(str(header),
2320           'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
2321        self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
2322Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2323 =?mac-iceland?q?=9Arg=8Cs?=""")
2324
2325    def test_whitespace_keeper_unicode(self):
2326        eq = self.assertEqual
2327        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2328        dh = decode_header(s)
2329        eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2330                (b' Pirard <pirard@dom.ain>', None)])
2331        header = str(make_header(dh))
2332        eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2333
2334    def test_whitespace_keeper_unicode_2(self):
2335        eq = self.assertEqual
2336        s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2337        dh = decode_header(s)
2338        eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2339                (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
2340        hu = str(make_header(dh))
2341        eq(hu, 'The quick brown fox jumped over the lazy dog')
2342
2343    def test_rfc2047_missing_whitespace(self):
2344        s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2345        dh = decode_header(s)
2346        self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2347                              (b'rg', None), (b'\xe5', 'iso-8859-1'),
2348                              (b'sbord', None)])
2349
2350    def test_rfc2047_with_whitespace(self):
2351        s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2352        dh = decode_header(s)
2353        self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2354                              (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2355                              (b' sbord', None)])
2356
2357    def test_rfc2047_B_bad_padding(self):
2358        s = '=?iso-8859-1?B?%s?='
2359        data = [                                # only test complete bytes
2360            ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2361            ('dmk=', b'vi'), ('dmk', b'vi')
2362          ]
2363        for q, a in data:
2364            dh = decode_header(s % q)
2365            self.assertEqual(dh, [(a, 'iso-8859-1')])
2366
2367    def test_rfc2047_Q_invalid_digits(self):
2368        # issue 10004.
2369        s = '=?iso-8859-1?Q?andr=e9=zz?='
2370        self.assertEqual(decode_header(s),
2371                        [(b'andr\xe9=zz', 'iso-8859-1')])
2372
2373    def test_rfc2047_rfc2047_1(self):
2374        # 1st testcase at end of rfc2047
2375        s = '(=?ISO-8859-1?Q?a?=)'
2376        self.assertEqual(decode_header(s),
2377            [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2378
2379    def test_rfc2047_rfc2047_2(self):
2380        # 2nd testcase at end of rfc2047
2381        s = '(=?ISO-8859-1?Q?a?= b)'
2382        self.assertEqual(decode_header(s),
2383            [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2384
2385    def test_rfc2047_rfc2047_3(self):
2386        # 3rd testcase at end of rfc2047
2387        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2388        self.assertEqual(decode_header(s),
2389            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2390
2391    def test_rfc2047_rfc2047_4(self):
2392        # 4th testcase at end of rfc2047
2393        s = '(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)'
2394        self.assertEqual(decode_header(s),
2395            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2396
2397    def test_rfc2047_rfc2047_5a(self):
2398        # 5th testcase at end of rfc2047 newline is \r\n
2399        s = '(=?ISO-8859-1?Q?a?=\r\n    =?ISO-8859-1?Q?b?=)'
2400        self.assertEqual(decode_header(s),
2401            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2402
2403    def test_rfc2047_rfc2047_5b(self):
2404        # 5th testcase at end of rfc2047 newline is \n
2405        s = '(=?ISO-8859-1?Q?a?=\n    =?ISO-8859-1?Q?b?=)'
2406        self.assertEqual(decode_header(s),
2407            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2408
2409    def test_rfc2047_rfc2047_6(self):
2410        # 6th testcase at end of rfc2047
2411        s = '(=?ISO-8859-1?Q?a_b?=)'
2412        self.assertEqual(decode_header(s),
2413            [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2414
2415    def test_rfc2047_rfc2047_7(self):
2416        # 7th testcase at end of rfc2047
2417        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2418        self.assertEqual(decode_header(s),
2419            [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2420             (b')', None)])
2421        self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2422        self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2423
2424    def test_multiline_header(self):
2425        s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2426        self.assertEqual(decode_header(s),
2427            [(b'"M\xfcller T"', 'windows-1252'),
2428             (b'<T.Mueller@xxx.com>', None)])
2429        self.assertEqual(make_header(decode_header(s)).encode(),
2430                         ''.join(s.splitlines()))
2431        self.assertEqual(str(make_header(decode_header(s))),
2432                         '"Müller T" <T.Mueller@xxx.com>')
2433
2434
2435# Test the MIMEMessage class
2436class TestMIMEMessage(TestEmailBase):
2437    def setUp(self):
2438        with openfile('msg_11.txt') as fp:
2439            self._text = fp.read()
2440
2441    def test_type_error(self):
2442        self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2443
2444    def test_valid_argument(self):
2445        eq = self.assertEqual
2446        subject = 'A sub-message'
2447        m = Message()
2448        m['Subject'] = subject
2449        r = MIMEMessage(m)
2450        eq(r.get_content_type(), 'message/rfc822')
2451        payload = r.get_payload()
2452        self.assertIsInstance(payload, list)
2453        eq(len(payload), 1)
2454        subpart = payload[0]
2455        self.assertIs(subpart, m)
2456        eq(subpart['subject'], subject)
2457
2458    def test_bad_multipart(self):
2459        msg1 = Message()
2460        msg1['Subject'] = 'subpart 1'
2461        msg2 = Message()
2462        msg2['Subject'] = 'subpart 2'
2463        r = MIMEMessage(msg1)
2464        self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2465
2466    def test_generate(self):
2467        # First craft the message to be encapsulated
2468        m = Message()
2469        m['Subject'] = 'An enclosed message'
2470        m.set_payload('Here is the body of the message.\n')
2471        r = MIMEMessage(m)
2472        r['Subject'] = 'The enclosing message'
2473        s = StringIO()
2474        g = Generator(s)
2475        g.flatten(r)
2476        self.assertEqual(s.getvalue(), """\
2477Content-Type: message/rfc822
2478MIME-Version: 1.0
2479Subject: The enclosing message
2480
2481Subject: An enclosed message
2482
2483Here is the body of the message.
2484""")
2485
2486    def test_parse_message_rfc822(self):
2487        eq = self.assertEqual
2488        msg = self._msgobj('msg_11.txt')
2489        eq(msg.get_content_type(), 'message/rfc822')
2490        payload = msg.get_payload()
2491        self.assertIsInstance(payload, list)
2492        eq(len(payload), 1)
2493        submsg = payload[0]
2494        self.assertIsInstance(submsg, Message)
2495        eq(submsg['subject'], 'An enclosed message')
2496        eq(submsg.get_payload(), 'Here is the body of the message.\n')
2497
2498    def test_dsn(self):
2499        eq = self.assertEqual
2500        # msg 16 is a Delivery Status Notification, see RFC 1894
2501        msg = self._msgobj('msg_16.txt')
2502        eq(msg.get_content_type(), 'multipart/report')
2503        self.assertTrue(msg.is_multipart())
2504        eq(len(msg.get_payload()), 3)
2505        # Subpart 1 is a text/plain, human readable section
2506        subpart = msg.get_payload(0)
2507        eq(subpart.get_content_type(), 'text/plain')
2508        eq(subpart.get_payload(), """\
2509This report relates to a message you sent with the following header fields:
2510
2511  Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2512  Date: Sun, 23 Sep 2001 20:10:55 -0700
2513  From: "Ian T. Henry" <henryi@oxy.edu>
2514  To: SoCal Raves <scr@socal-raves.org>
2515  Subject: [scr] yeah for Ians!!
2516
2517Your message cannot be delivered to the following recipients:
2518
2519  Recipient address: jangel1@cougar.noc.ucla.edu
2520  Reason: recipient reached disk quota
2521
2522""")
2523        # Subpart 2 contains the machine parsable DSN information.  It
2524        # consists of two blocks of headers, represented by two nested Message
2525        # objects.
2526        subpart = msg.get_payload(1)
2527        eq(subpart.get_content_type(), 'message/delivery-status')
2528        eq(len(subpart.get_payload()), 2)
2529        # message/delivery-status should treat each block as a bunch of
2530        # headers, i.e. a bunch of Message objects.
2531        dsn1 = subpart.get_payload(0)
2532        self.assertIsInstance(dsn1, Message)
2533        eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2534        eq(dsn1.get_param('dns', header='reporting-mta'), '')
2535        # Try a missing one <wink>
2536        eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2537        dsn2 = subpart.get_payload(1)
2538        self.assertIsInstance(dsn2, Message)
2539        eq(dsn2['action'], 'failed')
2540        eq(dsn2.get_params(header='original-recipient'),
2541           [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2542        eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2543        # Subpart 3 is the original message
2544        subpart = msg.get_payload(2)
2545        eq(subpart.get_content_type(), 'message/rfc822')
2546        payload = subpart.get_payload()
2547        self.assertIsInstance(payload, list)
2548        eq(len(payload), 1)
2549        subsubpart = payload[0]
2550        self.assertIsInstance(subsubpart, Message)
2551        eq(subsubpart.get_content_type(), 'text/plain')
2552        eq(subsubpart['message-id'],
2553           '<002001c144a6$8752e060$56104586@oxy.edu>')
2554
2555    def test_epilogue(self):
2556        eq = self.ndiffAssertEqual
2557        with openfile('msg_21.txt') as fp:
2558            text = fp.read()
2559        msg = Message()
2560        msg['From'] = 'aperson@dom.ain'
2561        msg['To'] = 'bperson@dom.ain'
2562        msg['Subject'] = 'Test'
2563        msg.preamble = 'MIME message'
2564        msg.epilogue = 'End of MIME message\n'
2565        msg1 = MIMEText('One')
2566        msg2 = MIMEText('Two')
2567        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2568        msg.attach(msg1)
2569        msg.attach(msg2)
2570        sfp = StringIO()
2571        g = Generator(sfp)
2572        g.flatten(msg)
2573        eq(sfp.getvalue(), text)
2574
2575    def test_no_nl_preamble(self):
2576        eq = self.ndiffAssertEqual
2577        msg = Message()
2578        msg['From'] = 'aperson@dom.ain'
2579        msg['To'] = 'bperson@dom.ain'
2580        msg['Subject'] = 'Test'
2581        msg.preamble = 'MIME message'
2582        msg.epilogue = ''
2583        msg1 = MIMEText('One')
2584        msg2 = MIMEText('Two')
2585        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2586        msg.attach(msg1)
2587        msg.attach(msg2)
2588        eq(msg.as_string(), """\
2589From: aperson@dom.ain
2590To: bperson@dom.ain
2591Subject: Test
2592Content-Type: multipart/mixed; boundary="BOUNDARY"
2593
2594MIME message
2595--BOUNDARY
2596Content-Type: text/plain; charset="us-ascii"
2597MIME-Version: 1.0
2598Content-Transfer-Encoding: 7bit
2599
2600One
2601--BOUNDARY
2602Content-Type: text/plain; charset="us-ascii"
2603MIME-Version: 1.0
2604Content-Transfer-Encoding: 7bit
2605
2606Two
2607--BOUNDARY--
2608""")
2609
2610    def test_default_type(self):
2611        eq = self.assertEqual
2612        with openfile('msg_30.txt') as fp:
2613            msg = email.message_from_file(fp)
2614        container1 = msg.get_payload(0)
2615        eq(container1.get_default_type(), 'message/rfc822')
2616        eq(container1.get_content_type(), 'message/rfc822')
2617        container2 = msg.get_payload(1)
2618        eq(container2.get_default_type(), 'message/rfc822')
2619        eq(container2.get_content_type(), 'message/rfc822')
2620        container1a = container1.get_payload(0)
2621        eq(container1a.get_default_type(), 'text/plain')
2622        eq(container1a.get_content_type(), 'text/plain')
2623        container2a = container2.get_payload(0)
2624        eq(container2a.get_default_type(), 'text/plain')
2625        eq(container2a.get_content_type(), 'text/plain')
2626
2627    def test_default_type_with_explicit_container_type(self):
2628        eq = self.assertEqual
2629        with openfile('msg_28.txt') as fp:
2630            msg = email.message_from_file(fp)
2631        container1 = msg.get_payload(0)
2632        eq(container1.get_default_type(), 'message/rfc822')
2633        eq(container1.get_content_type(), 'message/rfc822')
2634        container2 = msg.get_payload(1)
2635        eq(container2.get_default_type(), 'message/rfc822')
2636        eq(container2.get_content_type(), 'message/rfc822')
2637        container1a = container1.get_payload(0)
2638        eq(container1a.get_default_type(), 'text/plain')
2639        eq(container1a.get_content_type(), 'text/plain')
2640        container2a = container2.get_payload(0)
2641        eq(container2a.get_default_type(), 'text/plain')
2642        eq(container2a.get_content_type(), 'text/plain')
2643
2644    def test_default_type_non_parsed(self):
2645        eq = self.assertEqual
2646        neq = self.ndiffAssertEqual
2647        # Set up container
2648        container = MIMEMultipart('digest', 'BOUNDARY')
2649        container.epilogue = ''
2650        # Set up subparts
2651        subpart1a = MIMEText('message 1\n')
2652        subpart2a = MIMEText('message 2\n')
2653        subpart1 = MIMEMessage(subpart1a)
2654        subpart2 = MIMEMessage(subpart2a)
2655        container.attach(subpart1)
2656        container.attach(subpart2)
2657        eq(subpart1.get_content_type(), 'message/rfc822')
2658        eq(subpart1.get_default_type(), 'message/rfc822')
2659        eq(subpart2.get_content_type(), 'message/rfc822')
2660        eq(subpart2.get_default_type(), 'message/rfc822')
2661        neq(container.as_string(0), '''\
2662Content-Type: multipart/digest; boundary="BOUNDARY"
2663MIME-Version: 1.0
2664
2665--BOUNDARY
2666Content-Type: message/rfc822
2667MIME-Version: 1.0
2668
2669Content-Type: text/plain; charset="us-ascii"
2670MIME-Version: 1.0
2671Content-Transfer-Encoding: 7bit
2672
2673message 1
2674
2675--BOUNDARY
2676Content-Type: message/rfc822
2677MIME-Version: 1.0
2678
2679Content-Type: text/plain; charset="us-ascii"
2680MIME-Version: 1.0
2681Content-Transfer-Encoding: 7bit
2682
2683message 2
2684
2685--BOUNDARY--
2686''')
2687        del subpart1['content-type']
2688        del subpart1['mime-version']
2689        del subpart2['content-type']
2690        del subpart2['mime-version']
2691        eq(subpart1.get_content_type(), 'message/rfc822')
2692        eq(subpart1.get_default_type(), 'message/rfc822')
2693        eq(subpart2.get_content_type(), 'message/rfc822')
2694        eq(subpart2.get_default_type(), 'message/rfc822')
2695        neq(container.as_string(0), '''\
2696Content-Type: multipart/digest; boundary="BOUNDARY"
2697MIME-Version: 1.0
2698
2699--BOUNDARY
2700
2701Content-Type: text/plain; charset="us-ascii"
2702MIME-Version: 1.0
2703Content-Transfer-Encoding: 7bit
2704
2705message 1
2706
2707--BOUNDARY
2708
2709Content-Type: text/plain; charset="us-ascii"
2710MIME-Version: 1.0
2711Content-Transfer-Encoding: 7bit
2712
2713message 2
2714
2715--BOUNDARY--
2716''')
2717
2718    def test_mime_attachments_in_constructor(self):
2719        eq = self.assertEqual
2720        text1 = MIMEText('')
2721        text2 = MIMEText('')
2722        msg = MIMEMultipart(_subparts=(text1, text2))
2723        eq(len(msg.get_payload()), 2)
2724        eq(msg.get_payload(0), text1)
2725        eq(msg.get_payload(1), text2)
2726
2727    def test_default_multipart_constructor(self):
2728        msg = MIMEMultipart()
2729        self.assertTrue(msg.is_multipart())
2730
2731    def test_multipart_default_policy(self):
2732        msg = MIMEMultipart()
2733        msg['To'] = 'a@b.com'
2734        msg['To'] = 'c@d.com'
2735        self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com'])
2736
2737    def test_multipart_custom_policy(self):
2738        msg = MIMEMultipart(policy=email.policy.default)
2739        msg['To'] = 'a@b.com'
2740        with self.assertRaises(ValueError) as cm:
2741            msg['To'] = 'c@d.com'
2742        self.assertEqual(str(cm.exception),
2743                         'There may be at most 1 To headers in a message')
2744
2745# A general test of parser->model->generator idempotency.  IOW, read a message
2746# in, parse it into a message object tree, then without touching the tree,
2747# regenerate the plain text.  The original text and the transformed text
2748# should be identical.  Note: that we ignore the Unix-From since that may
2749# contain a changed date.
2750class TestIdempotent(TestEmailBase):
2751
2752    linesep = '\n'
2753
2754    def _msgobj(self, filename):
2755        with openfile(filename) as fp:
2756            data = fp.read()
2757        msg = email.message_from_string(data)
2758        return msg, data
2759
2760    def _idempotent(self, msg, text, unixfrom=False):
2761        eq = self.ndiffAssertEqual
2762        s = StringIO()
2763        g = Generator(s, maxheaderlen=0)
2764        g.flatten(msg, unixfrom=unixfrom)
2765        eq(text, s.getvalue())
2766
2767    def test_parse_text_message(self):
2768        eq = self.assertEqual
2769        msg, text = self._msgobj('msg_01.txt')
2770        eq(msg.get_content_type(), 'text/plain')
2771        eq(msg.get_content_maintype(), 'text')
2772        eq(msg.get_content_subtype(), 'plain')
2773        eq(msg.get_params()[1], ('charset', 'us-ascii'))
2774        eq(msg.get_param('charset'), 'us-ascii')
2775        eq(msg.preamble, None)
2776        eq(msg.epilogue, None)
2777        self._idempotent(msg, text)
2778
2779    def test_parse_untyped_message(self):
2780        eq = self.assertEqual
2781        msg, text = self._msgobj('msg_03.txt')
2782        eq(msg.get_content_type(), 'text/plain')
2783        eq(msg.get_params(), None)
2784        eq(msg.get_param('charset'), None)
2785        self._idempotent(msg, text)
2786
2787    def test_simple_multipart(self):
2788        msg, text = self._msgobj('msg_04.txt')
2789        self._idempotent(msg, text)
2790
2791    def test_MIME_digest(self):
2792        msg, text = self._msgobj('msg_02.txt')
2793        self._idempotent(msg, text)
2794
2795    def test_long_header(self):
2796        msg, text = self._msgobj('msg_27.txt')
2797        self._idempotent(msg, text)
2798
2799    def test_MIME_digest_with_part_headers(self):
2800        msg, text = self._msgobj('msg_28.txt')
2801        self._idempotent(msg, text)
2802
2803    def test_mixed_with_image(self):
2804        msg, text = self._msgobj('msg_06.txt')
2805        self._idempotent(msg, text)
2806
2807    def test_multipart_report(self):
2808        msg, text = self._msgobj('msg_05.txt')
2809        self._idempotent(msg, text)
2810
2811    def test_dsn(self):
2812        msg, text = self._msgobj('msg_16.txt')
2813        self._idempotent(msg, text)
2814
2815    def test_preamble_epilogue(self):
2816        msg, text = self._msgobj('msg_21.txt')
2817        self._idempotent(msg, text)
2818
2819    def test_multipart_one_part(self):
2820        msg, text = self._msgobj('msg_23.txt')
2821        self._idempotent(msg, text)
2822
2823    def test_multipart_no_parts(self):
2824        msg, text = self._msgobj('msg_24.txt')
2825        self._idempotent(msg, text)
2826
2827    def test_no_start_boundary(self):
2828        msg, text = self._msgobj('msg_31.txt')
2829        self._idempotent(msg, text)
2830
2831    def test_rfc2231_charset(self):
2832        msg, text = self._msgobj('msg_32.txt')
2833        self._idempotent(msg, text)
2834
2835    def test_more_rfc2231_parameters(self):
2836        msg, text = self._msgobj('msg_33.txt')
2837        self._idempotent(msg, text)
2838
2839    def test_text_plain_in_a_multipart_digest(self):
2840        msg, text = self._msgobj('msg_34.txt')
2841        self._idempotent(msg, text)
2842
2843    def test_nested_multipart_mixeds(self):
2844        msg, text = self._msgobj('msg_12a.txt')
2845        self._idempotent(msg, text)
2846
2847    def test_message_external_body_idempotent(self):
2848        msg, text = self._msgobj('msg_36.txt')
2849        self._idempotent(msg, text)
2850
2851    def test_message_delivery_status(self):
2852        msg, text = self._msgobj('msg_43.txt')
2853        self._idempotent(msg, text, unixfrom=True)
2854
2855    def test_message_signed_idempotent(self):
2856        msg, text = self._msgobj('msg_45.txt')
2857        self._idempotent(msg, text)
2858
2859    def test_content_type(self):
2860        eq = self.assertEqual
2861        # Get a message object and reset the seek pointer for other tests
2862        msg, text = self._msgobj('msg_05.txt')
2863        eq(msg.get_content_type(), 'multipart/report')
2864        # Test the Content-Type: parameters
2865        params = {}
2866        for pk, pv in msg.get_params():
2867            params[pk] = pv
2868        eq(params['report-type'], 'delivery-status')
2869        eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2870        eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2871        eq(msg.epilogue, self.linesep)
2872        eq(len(msg.get_payload()), 3)
2873        # Make sure the subparts are what we expect
2874        msg1 = msg.get_payload(0)
2875        eq(msg1.get_content_type(), 'text/plain')
2876        eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
2877        msg2 = msg.get_payload(1)
2878        eq(msg2.get_content_type(), 'text/plain')
2879        eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
2880        msg3 = msg.get_payload(2)
2881        eq(msg3.get_content_type(), 'message/rfc822')
2882        self.assertIsInstance(msg3, Message)
2883        payload = msg3.get_payload()
2884        self.assertIsInstance(payload, list)
2885        eq(len(payload), 1)
2886        msg4 = payload[0]
2887        self.assertIsInstance(msg4, Message)
2888        eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
2889
2890    def test_parser(self):
2891        eq = self.assertEqual
2892        msg, text = self._msgobj('msg_06.txt')
2893        # Check some of the outer headers
2894        eq(msg.get_content_type(), 'message/rfc822')
2895        # Make sure the payload is a list of exactly one sub-Message, and that
2896        # that submessage has a type of text/plain
2897        payload = msg.get_payload()
2898        self.assertIsInstance(payload, list)
2899        eq(len(payload), 1)
2900        msg1 = payload[0]
2901        self.assertIsInstance(msg1, Message)
2902        eq(msg1.get_content_type(), 'text/plain')
2903        self.assertIsInstance(msg1.get_payload(), str)
2904        eq(msg1.get_payload(), self.linesep)
2905
2906
2907
2908# Test various other bits of the package's functionality
2909class TestMiscellaneous(TestEmailBase):
2910    def test_message_from_string(self):
2911        with openfile('msg_01.txt') as fp:
2912            text = fp.read()
2913        msg = email.message_from_string(text)
2914        s = StringIO()
2915        # Don't wrap/continue long headers since we're trying to test
2916        # idempotency.
2917        g = Generator(s, maxheaderlen=0)
2918        g.flatten(msg)
2919        self.assertEqual(text, s.getvalue())
2920
2921    def test_message_from_file(self):
2922        with openfile('msg_01.txt') as fp:
2923            text = fp.read()
2924            fp.seek(0)
2925            msg = email.message_from_file(fp)
2926            s = StringIO()
2927            # Don't wrap/continue long headers since we're trying to test
2928            # idempotency.
2929            g = Generator(s, maxheaderlen=0)
2930            g.flatten(msg)
2931            self.assertEqual(text, s.getvalue())
2932
2933    def test_message_from_string_with_class(self):
2934        with openfile('msg_01.txt') as fp:
2935            text = fp.read()
2936
2937        # Create a subclass
2938        class MyMessage(Message):
2939            pass
2940
2941        msg = email.message_from_string(text, MyMessage)
2942        self.assertIsInstance(msg, MyMessage)
2943        # Try something more complicated
2944        with openfile('msg_02.txt') as fp:
2945            text = fp.read()
2946        msg = email.message_from_string(text, MyMessage)
2947        for subpart in msg.walk():
2948            self.assertIsInstance(subpart, MyMessage)
2949
2950    def test_message_from_file_with_class(self):
2951        # Create a subclass
2952        class MyMessage(Message):
2953            pass
2954
2955        with openfile('msg_01.txt') as fp:
2956            msg = email.message_from_file(fp, MyMessage)
2957        self.assertIsInstance(msg, MyMessage)
2958        # Try something more complicated
2959        with openfile('msg_02.txt') as fp:
2960            msg = email.message_from_file(fp, MyMessage)
2961        for subpart in msg.walk():
2962            self.assertIsInstance(subpart, MyMessage)
2963
2964    def test_custom_message_does_not_require_arguments(self):
2965        class MyMessage(Message):
2966            def __init__(self):
2967                super().__init__()
2968        msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2969        self.assertIsInstance(msg, MyMessage)
2970
2971    def test__all__(self):
2972        module = __import__('email')
2973        self.assertEqual(sorted(module.__all__), [
2974            'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2975            'generator', 'header', 'iterators', 'message',
2976            'message_from_binary_file', 'message_from_bytes',
2977            'message_from_file', 'message_from_string', 'mime', 'parser',
2978            'quoprimime', 'utils',
2979            ])
2980
2981    def test_formatdate(self):
2982        now = time.time()
2983        self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2984                         time.gmtime(now)[:6])
2985
2986    def test_formatdate_localtime(self):
2987        now = time.time()
2988        self.assertEqual(
2989            utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2990            time.localtime(now)[:6])
2991
2992    def test_formatdate_usegmt(self):
2993        now = time.time()
2994        self.assertEqual(
2995            utils.formatdate(now, localtime=False),
2996            time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2997        self.assertEqual(
2998            utils.formatdate(now, localtime=False, usegmt=True),
2999            time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
3000
3001    # parsedate and parsedate_tz will become deprecated interfaces someday
3002    def test_parsedate_returns_None_for_invalid_strings(self):
3003        self.assertIsNone(utils.parsedate(''))
3004        self.assertIsNone(utils.parsedate_tz(''))
3005        self.assertIsNone(utils.parsedate(' '))
3006        self.assertIsNone(utils.parsedate_tz(' '))
3007        self.assertIsNone(utils.parsedate('0'))
3008        self.assertIsNone(utils.parsedate_tz('0'))
3009        self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
3010        self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
3011        # Not a part of the spec but, but this has historically worked:
3012        self.assertIsNone(utils.parsedate(None))
3013        self.assertIsNone(utils.parsedate_tz(None))
3014
3015    def test_parsedate_compact(self):
3016        # The FWS after the comma is optional
3017        self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
3018                         utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
3019
3020    def test_parsedate_no_dayofweek(self):
3021        eq = self.assertEqual
3022        eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
3023           (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
3024
3025    def test_parsedate_compact_no_dayofweek(self):
3026        eq = self.assertEqual
3027        eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
3028           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3029
3030    def test_parsedate_no_space_before_positive_offset(self):
3031        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
3032           (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3033
3034    def test_parsedate_no_space_before_negative_offset(self):
3035        # Issue 1155362: we already handled '+' for this case.
3036        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
3037           (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
3038
3039
3040    def test_parsedate_accepts_time_with_dots(self):
3041        eq = self.assertEqual
3042        eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
3043           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3044        eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
3045           (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
3046
3047    def test_parsedate_acceptable_to_time_functions(self):
3048        eq = self.assertEqual
3049        timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
3050        t = int(time.mktime(timetup))
3051        eq(time.localtime(t)[:6], timetup[:6])
3052        eq(int(time.strftime('%Y', timetup)), 2003)
3053        timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
3054        t = int(time.mktime(timetup[:9]))
3055        eq(time.localtime(t)[:6], timetup[:6])
3056        eq(int(time.strftime('%Y', timetup[:9])), 2003)
3057
3058    def test_mktime_tz(self):
3059        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3060                                          -1, -1, -1, 0)), 0)
3061        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3062                                          -1, -1, -1, 1234)), -1234)
3063
3064    def test_parsedate_y2k(self):
3065        """Test for parsing a date with a two-digit year.
3066
3067        Parsing a date with a two-digit year should return the correct
3068        four-digit year. RFC822 allows two-digit years, but RFC2822 (which
3069        obsoletes RFC822) requires four-digit years.
3070
3071        """
3072        self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
3073                         utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
3074        self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
3075                         utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
3076
3077    def test_parseaddr_empty(self):
3078        self.assertEqual(utils.parseaddr('<>'), ('', ''))
3079        self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
3080
3081    def test_parseaddr_multiple_domains(self):
3082        self.assertEqual(
3083            utils.parseaddr('a@b@c'),
3084            ('', '')
3085        )
3086        self.assertEqual(
3087            utils.parseaddr('a@b.c@c'),
3088            ('', '')
3089        )
3090        self.assertEqual(
3091            utils.parseaddr('a@172.17.0.1@c'),
3092            ('', '')
3093        )
3094
3095    def test_noquote_dump(self):
3096        self.assertEqual(
3097            utils.formataddr(('A Silly Person', 'person@dom.ain')),
3098            'A Silly Person <person@dom.ain>')
3099
3100    def test_escape_dump(self):
3101        self.assertEqual(
3102            utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
3103            r'"A (Very) Silly Person" <person@dom.ain>')
3104        self.assertEqual(
3105            utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
3106            ('A (Very) Silly Person', 'person@dom.ain'))
3107        a = r'A \(Special\) Person'
3108        b = 'person@dom.ain'
3109        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3110
3111    def test_escape_backslashes(self):
3112        self.assertEqual(
3113            utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')),
3114            r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
3115        a = r'Arthur \Backslash\ Foobar'
3116        b = 'person@dom.ain'
3117        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3118
3119    def test_quotes_unicode_names(self):
3120        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3121        name = "H\u00e4ns W\u00fcrst"
3122        addr = 'person@dom.ain'
3123        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3124        latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
3125        self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
3126        self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
3127            latin1_quopri)
3128
3129    def test_accepts_any_charset_like_object(self):
3130        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3131        name = "H\u00e4ns W\u00fcrst"
3132        addr = 'person@dom.ain'
3133        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3134        foobar = "FOOBAR"
3135        class CharsetMock:
3136            def header_encode(self, string):
3137                return foobar
3138        mock = CharsetMock()
3139        mock_expected = "%s <%s>" % (foobar, addr)
3140        self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
3141        self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
3142            utf8_base64)
3143
3144    def test_invalid_charset_like_object_raises_error(self):
3145        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3146        name = "H\u00e4ns W\u00fcrst"
3147        addr = 'person@dom.ain'
3148        # An object without a header_encode method:
3149        bad_charset = object()
3150        self.assertRaises(AttributeError, utils.formataddr, (name, addr),
3151            bad_charset)
3152
3153    def test_unicode_address_raises_error(self):
3154        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3155        addr = 'pers\u00f6n@dom.in'
3156        self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
3157        self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
3158
3159    def test_name_with_dot(self):
3160        x = 'John X. Doe <jxd@example.com>'
3161        y = '"John X. Doe" <jxd@example.com>'
3162        a, b = ('John X. Doe', 'jxd@example.com')
3163        self.assertEqual(utils.parseaddr(x), (a, b))
3164        self.assertEqual(utils.parseaddr(y), (a, b))
3165        # formataddr() quotes the name if there's a dot in it
3166        self.assertEqual(utils.formataddr((a, b)), y)
3167
3168    def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3169        # issue 10005.  Note that in the third test the second pair of
3170        # backslashes is not actually a quoted pair because it is not inside a
3171        # comment or quoted string: the address being parsed has a quoted
3172        # string containing a quoted backslash, followed by 'example' and two
3173        # backslashes, followed by another quoted string containing a space and
3174        # the word 'example'.  parseaddr copies those two backslashes
3175        # literally.  Per rfc5322 this is not technically correct since a \ may
3176        # not appear in an address outside of a quoted string.  It is probably
3177        # a sensible Postel interpretation, though.
3178        eq = self.assertEqual
3179        eq(utils.parseaddr('""example" example"@example.com'),
3180          ('', '""example" example"@example.com'))
3181        eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3182          ('', '"\\"example\\" example"@example.com'))
3183        eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3184          ('', '"\\\\"example\\\\" example"@example.com'))
3185
3186    def test_parseaddr_preserves_spaces_in_local_part(self):
3187        # issue 9286.  A normal RFC5322 local part should not contain any
3188        # folding white space, but legacy local parts can (they are a sequence
3189        # of atoms, not dotatoms).  On the other hand we strip whitespace from
3190        # before the @ and around dots, on the assumption that the whitespace
3191        # around the punctuation is a mistake in what would otherwise be
3192        # an RFC5322 local part.  Leading whitespace is, usual, stripped as well.
3193        self.assertEqual(('', "merwok wok@xample.com"),
3194            utils.parseaddr("merwok wok@xample.com"))
3195        self.assertEqual(('', "merwok  wok@xample.com"),
3196            utils.parseaddr("merwok  wok@xample.com"))
3197        self.assertEqual(('', "merwok  wok@xample.com"),
3198            utils.parseaddr(" merwok  wok  @xample.com"))
3199        self.assertEqual(('', 'merwok"wok"  wok@xample.com'),
3200            utils.parseaddr('merwok"wok"  wok@xample.com'))
3201        self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3202            utils.parseaddr('merwok. wok .  wok@xample.com'))
3203
3204    def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3205        addr = ("'foo@example.com' (foo@example.com)",
3206                'foo@example.com')
3207        addrstr = ('"\'foo@example.com\' '
3208                            '(foo@example.com)" <foo@example.com>')
3209        self.assertEqual(utils.parseaddr(addrstr), addr)
3210        self.assertEqual(utils.formataddr(addr), addrstr)
3211
3212
3213    def test_multiline_from_comment(self):
3214        x = """\
3215Foo
3216\tBar <foo@example.com>"""
3217        self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3218
3219    def test_quote_dump(self):
3220        self.assertEqual(
3221            utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3222            r'"A Silly; Person" <person@dom.ain>')
3223
3224    def test_charset_richcomparisons(self):
3225        eq = self.assertEqual
3226        ne = self.assertNotEqual
3227        cset1 = Charset()
3228        cset2 = Charset()
3229        eq(cset1, 'us-ascii')
3230        eq(cset1, 'US-ASCII')
3231        eq(cset1, 'Us-AsCiI')
3232        eq('us-ascii', cset1)
3233        eq('US-ASCII', cset1)
3234        eq('Us-AsCiI', cset1)
3235        ne(cset1, 'usascii')
3236        ne(cset1, 'USASCII')
3237        ne(cset1, 'UsAsCiI')
3238        ne('usascii', cset1)
3239        ne('USASCII', cset1)
3240        ne('UsAsCiI', cset1)
3241        eq(cset1, cset2)
3242        eq(cset2, cset1)
3243
3244    def test_getaddresses(self):
3245        eq = self.assertEqual
3246        eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3247                               'Bud Person <bperson@dom.ain>']),
3248           [('Al Person', 'aperson@dom.ain'),
3249            ('Bud Person', 'bperson@dom.ain')])
3250
3251    def test_getaddresses_nasty(self):
3252        eq = self.assertEqual
3253        eq(utils.getaddresses(['foo: ;']), [('', '')])
3254        eq(utils.getaddresses(
3255           ['[]*-- =~$']),
3256           [('', ''), ('', ''), ('', '*--')])
3257        eq(utils.getaddresses(
3258           ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3259           [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3260
3261    def test_getaddresses_embedded_comment(self):
3262        """Test proper handling of a nested comment"""
3263        eq = self.assertEqual
3264        addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3265        eq(addrs[0][1], 'foo@bar.com')
3266
3267    def test_make_msgid_collisions(self):
3268        # Test make_msgid uniqueness, even with multiple threads
3269        class MsgidsThread(Thread):
3270            def run(self):
3271                # generate msgids for 3 seconds
3272                self.msgids = []
3273                append = self.msgids.append
3274                make_msgid = utils.make_msgid
3275                clock = time.monotonic
3276                tfin = clock() + 3.0
3277                while clock() < tfin:
3278                    append(make_msgid(domain='testdomain-string'))
3279
3280        threads = [MsgidsThread() for i in range(5)]
3281        with start_threads(threads):
3282            pass
3283        all_ids = sum([t.msgids for t in threads], [])
3284        self.assertEqual(len(set(all_ids)), len(all_ids))
3285
3286    def test_utils_quote_unquote(self):
3287        eq = self.assertEqual
3288        msg = Message()
3289        msg.add_header('content-disposition', 'attachment',
3290                       filename='foo\\wacky"name')
3291        eq(msg.get_filename(), 'foo\\wacky"name')
3292
3293    def test_get_body_encoding_with_bogus_charset(self):
3294        charset = Charset('not a charset')
3295        self.assertEqual(charset.get_body_encoding(), 'base64')
3296
3297    def test_get_body_encoding_with_uppercase_charset(self):
3298        eq = self.assertEqual
3299        msg = Message()
3300        msg['Content-Type'] = 'text/plain; charset=UTF-8'
3301        eq(msg['content-type'], 'text/plain; charset=UTF-8')
3302        charsets = msg.get_charsets()
3303        eq(len(charsets), 1)
3304        eq(charsets[0], 'utf-8')
3305        charset = Charset(charsets[0])
3306        eq(charset.get_body_encoding(), 'base64')
3307        msg.set_payload(b'hello world', charset=charset)
3308        eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3309        eq(msg.get_payload(decode=True), b'hello world')
3310        eq(msg['content-transfer-encoding'], 'base64')
3311        # Try another one
3312        msg = Message()
3313        msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3314        charsets = msg.get_charsets()
3315        eq(len(charsets), 1)
3316        eq(charsets[0], 'us-ascii')
3317        charset = Charset(charsets[0])
3318        eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3319        msg.set_payload('hello world', charset=charset)
3320        eq(msg.get_payload(), 'hello world')
3321        eq(msg['content-transfer-encoding'], '7bit')
3322
3323    def test_charsets_case_insensitive(self):
3324        lc = Charset('us-ascii')
3325        uc = Charset('US-ASCII')
3326        self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3327
3328    def test_partial_falls_inside_message_delivery_status(self):
3329        eq = self.ndiffAssertEqual
3330        # The Parser interface provides chunks of data to FeedParser in 8192
3331        # byte gulps.  SF bug #1076485 found one of those chunks inside
3332        # message/delivery-status header block, which triggered an
3333        # unreadline() of NeedMoreData.
3334        msg = self._msgobj('msg_43.txt')
3335        sfp = StringIO()
3336        iterators._structure(msg, sfp)
3337        eq(sfp.getvalue(), """\
3338multipart/report
3339    text/plain
3340    message/delivery-status
3341        text/plain
3342        text/plain
3343        text/plain
3344        text/plain
3345        text/plain
3346        text/plain
3347        text/plain
3348        text/plain
3349        text/plain
3350        text/plain
3351        text/plain
3352        text/plain
3353        text/plain
3354        text/plain
3355        text/plain
3356        text/plain
3357        text/plain
3358        text/plain
3359        text/plain
3360        text/plain
3361        text/plain
3362        text/plain
3363        text/plain
3364        text/plain
3365        text/plain
3366        text/plain
3367    text/rfc822-headers
3368""")
3369
3370    def test_make_msgid_domain(self):
3371        self.assertEqual(
3372            email.utils.make_msgid(domain='testdomain-string')[-19:],
3373            '@testdomain-string>')
3374
3375    def test_make_msgid_idstring(self):
3376        self.assertEqual(
3377            email.utils.make_msgid(idstring='test-idstring',
3378                domain='testdomain-string')[-33:],
3379            '.test-idstring@testdomain-string>')
3380
3381    def test_make_msgid_default_domain(self):
3382        with patch('socket.getfqdn') as mock_getfqdn:
3383            mock_getfqdn.return_value = domain = 'pythontest.example.com'
3384            self.assertTrue(
3385                email.utils.make_msgid().endswith(
3386                    '@' + domain + '>'))
3387
3388    def test_Generator_linend(self):
3389        # Issue 14645.
3390        with openfile('msg_26.txt', newline='\n') as f:
3391            msgtxt = f.read()
3392        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3393        msg = email.message_from_string(msgtxt)
3394        s = StringIO()
3395        g = email.generator.Generator(s)
3396        g.flatten(msg)
3397        self.assertEqual(s.getvalue(), msgtxt_nl)
3398
3399    def test_BytesGenerator_linend(self):
3400        # Issue 14645.
3401        with openfile('msg_26.txt', newline='\n') as f:
3402            msgtxt = f.read()
3403        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3404        msg = email.message_from_string(msgtxt_nl)
3405        s = BytesIO()
3406        g = email.generator.BytesGenerator(s)
3407        g.flatten(msg, linesep='\r\n')
3408        self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3409
3410    def test_BytesGenerator_linend_with_non_ascii(self):
3411        # Issue 14645.
3412        with openfile('msg_26.txt', 'rb') as f:
3413            msgtxt = f.read()
3414        msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3415        msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3416        msg = email.message_from_bytes(msgtxt_nl)
3417        s = BytesIO()
3418        g = email.generator.BytesGenerator(s)
3419        g.flatten(msg, linesep='\r\n')
3420        self.assertEqual(s.getvalue(), msgtxt)
3421
3422    def test_mime_classes_policy_argument(self):
3423        with openfile('audiotest.au', 'rb') as fp:
3424            audiodata = fp.read()
3425        with openfile('PyBanner048.gif', 'rb') as fp:
3426            bindata = fp.read()
3427        classes = [
3428            (MIMEApplication, ('',)),
3429            (MIMEAudio, (audiodata,)),
3430            (MIMEImage, (bindata,)),
3431            (MIMEMessage, (Message(),)),
3432            (MIMENonMultipart, ('multipart', 'mixed')),
3433            (MIMEText, ('',)),
3434        ]
3435        for cls, constructor in classes:
3436            with self.subTest(cls=cls.__name__, policy='compat32'):
3437                m = cls(*constructor)
3438                self.assertIs(m.policy, email.policy.compat32)
3439            with self.subTest(cls=cls.__name__, policy='default'):
3440                m = cls(*constructor, policy=email.policy.default)
3441                self.assertIs(m.policy, email.policy.default)
3442
3443
3444# Test the iterator/generators
3445class TestIterators(TestEmailBase):
3446    def test_body_line_iterator(self):
3447        eq = self.assertEqual
3448        neq = self.ndiffAssertEqual
3449        # First a simple non-multipart message
3450        msg = self._msgobj('msg_01.txt')
3451        it = iterators.body_line_iterator(msg)
3452        lines = list(it)
3453        eq(len(lines), 6)
3454        neq(EMPTYSTRING.join(lines), msg.get_payload())
3455        # Now a more complicated multipart
3456        msg = self._msgobj('msg_02.txt')
3457        it = iterators.body_line_iterator(msg)
3458        lines = list(it)
3459        eq(len(lines), 43)
3460        with openfile('msg_19.txt') as fp:
3461            neq(EMPTYSTRING.join(lines), fp.read())
3462
3463    def test_typed_subpart_iterator(self):
3464        eq = self.assertEqual
3465        msg = self._msgobj('msg_04.txt')
3466        it = iterators.typed_subpart_iterator(msg, 'text')
3467        lines = []
3468        subparts = 0
3469        for subpart in it:
3470            subparts += 1
3471            lines.append(subpart.get_payload())
3472        eq(subparts, 2)
3473        eq(EMPTYSTRING.join(lines), """\
3474a simple kind of mirror
3475to reflect upon our own
3476a simple kind of mirror
3477to reflect upon our own
3478""")
3479
3480    def test_typed_subpart_iterator_default_type(self):
3481        eq = self.assertEqual
3482        msg = self._msgobj('msg_03.txt')
3483        it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3484        lines = []
3485        subparts = 0
3486        for subpart in it:
3487            subparts += 1
3488            lines.append(subpart.get_payload())
3489        eq(subparts, 1)
3490        eq(EMPTYSTRING.join(lines), """\
3491
3492Hi,
3493
3494Do you like this message?
3495
3496-Me
3497""")
3498
3499    def test_pushCR_LF(self):
3500        '''FeedParser BufferedSubFile.push() assumed it received complete
3501           line endings.  A CR ending one push() followed by a LF starting
3502           the next push() added an empty line.
3503        '''
3504        imt = [
3505            ("a\r \n",  2),
3506            ("b",       0),
3507            ("c\n",     1),
3508            ("",        0),
3509            ("d\r\n",   1),
3510            ("e\r",     0),
3511            ("\nf",     1),
3512            ("\r\n",    1),
3513          ]
3514        from email.feedparser import BufferedSubFile, NeedMoreData
3515        bsf = BufferedSubFile()
3516        om = []
3517        nt = 0
3518        for il, n in imt:
3519            bsf.push(il)
3520            nt += n
3521            n1 = 0
3522            for ol in iter(bsf.readline, NeedMoreData):
3523                om.append(ol)
3524                n1 += 1
3525            self.assertEqual(n, n1)
3526        self.assertEqual(len(om), nt)
3527        self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
3528
3529    def test_push_random(self):
3530        from email.feedparser import BufferedSubFile, NeedMoreData
3531
3532        n = 10000
3533        chunksize = 5
3534        chars = 'abcd \t\r\n'
3535
3536        s = ''.join(choice(chars) for i in range(n)) + '\n'
3537        target = s.splitlines(True)
3538
3539        bsf = BufferedSubFile()
3540        lines = []
3541        for i in range(0, len(s), chunksize):
3542            chunk = s[i:i+chunksize]
3543            bsf.push(chunk)
3544            lines.extend(iter(bsf.readline, NeedMoreData))
3545        self.assertEqual(lines, target)
3546
3547
3548class TestFeedParsers(TestEmailBase):
3549
3550    def parse(self, chunks):
3551        feedparser = FeedParser()
3552        for chunk in chunks:
3553            feedparser.feed(chunk)
3554        return feedparser.close()
3555
3556    def test_empty_header_name_handled(self):
3557        # Issue 19996
3558        msg = self.parse("First: val\n: bad\nSecond: val")
3559        self.assertEqual(msg['First'], 'val')
3560        self.assertEqual(msg['Second'], 'val')
3561
3562    def test_newlines(self):
3563        m = self.parse(['a:\nb:\rc:\r\nd:\n'])
3564        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3565        m = self.parse(['a:\nb:\rc:\r\nd:'])
3566        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3567        m = self.parse(['a:\rb', 'c:\n'])
3568        self.assertEqual(m.keys(), ['a', 'bc'])
3569        m = self.parse(['a:\r', 'b:\n'])
3570        self.assertEqual(m.keys(), ['a', 'b'])
3571        m = self.parse(['a:\r', '\nb:\n'])
3572        self.assertEqual(m.keys(), ['a', 'b'])
3573
3574        # Only CR and LF should break header fields
3575        m = self.parse(['a:\x85b:\u2028c:\n'])
3576        self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
3577        m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
3578        self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
3579
3580    def test_long_lines(self):
3581        # Expected peak memory use on 32-bit platform: 6*N*M bytes.
3582        M, N = 1000, 20000
3583        m = self.parse(['a:b\n\n'] + ['x'*M] * N)
3584        self.assertEqual(m.items(), [('a', 'b')])
3585        self.assertEqual(m.get_payload(), 'x'*M*N)
3586        m = self.parse(['a:b\r\r'] + ['x'*M] * N)
3587        self.assertEqual(m.items(), [('a', 'b')])
3588        self.assertEqual(m.get_payload(), 'x'*M*N)
3589        m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N)
3590        self.assertEqual(m.items(), [('a', 'b')])
3591        self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N)
3592        m = self.parse(['a:\r', 'b: '] + ['x'*M] * N)
3593        self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)])
3594
3595
3596class TestParsers(TestEmailBase):
3597
3598    def test_header_parser(self):
3599        eq = self.assertEqual
3600        # Parse only the headers of a complex multipart MIME document
3601        with openfile('msg_02.txt') as fp:
3602            msg = HeaderParser().parse(fp)
3603        eq(msg['from'], 'ppp-request@zzz.org')
3604        eq(msg['to'], 'ppp@zzz.org')
3605        eq(msg.get_content_type(), 'multipart/mixed')
3606        self.assertFalse(msg.is_multipart())
3607        self.assertIsInstance(msg.get_payload(), str)
3608
3609    def test_bytes_header_parser(self):
3610        eq = self.assertEqual
3611        # Parse only the headers of a complex multipart MIME document
3612        with openfile('msg_02.txt', 'rb') as fp:
3613            msg = email.parser.BytesHeaderParser().parse(fp)
3614        eq(msg['from'], 'ppp-request@zzz.org')
3615        eq(msg['to'], 'ppp@zzz.org')
3616        eq(msg.get_content_type(), 'multipart/mixed')
3617        self.assertFalse(msg.is_multipart())
3618        self.assertIsInstance(msg.get_payload(), str)
3619        self.assertIsInstance(msg.get_payload(decode=True), bytes)
3620
3621    def test_bytes_parser_does_not_close_file(self):
3622        with openfile('msg_02.txt', 'rb') as fp:
3623            email.parser.BytesParser().parse(fp)
3624            self.assertFalse(fp.closed)
3625
3626    def test_bytes_parser_on_exception_does_not_close_file(self):
3627        with openfile('msg_15.txt', 'rb') as fp:
3628            bytesParser = email.parser.BytesParser
3629            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3630                              bytesParser(policy=email.policy.strict).parse,
3631                              fp)
3632            self.assertFalse(fp.closed)
3633
3634    def test_parser_does_not_close_file(self):
3635        with openfile('msg_02.txt', 'r') as fp:
3636            email.parser.Parser().parse(fp)
3637            self.assertFalse(fp.closed)
3638
3639    def test_parser_on_exception_does_not_close_file(self):
3640        with openfile('msg_15.txt', 'r') as fp:
3641            parser = email.parser.Parser
3642            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3643                              parser(policy=email.policy.strict).parse, fp)
3644            self.assertFalse(fp.closed)
3645
3646    def test_whitespace_continuation(self):
3647        eq = self.assertEqual
3648        # This message contains a line after the Subject: header that has only
3649        # whitespace, but it is not empty!
3650        msg = email.message_from_string("""\
3651From: aperson@dom.ain
3652To: bperson@dom.ain
3653Subject: the next line has a space on it
3654\x20
3655Date: Mon, 8 Apr 2002 15:09:19 -0400
3656Message-ID: spam
3657
3658Here's the message body
3659""")
3660        eq(msg['subject'], 'the next line has a space on it\n ')
3661        eq(msg['message-id'], 'spam')
3662        eq(msg.get_payload(), "Here's the message body\n")
3663
3664    def test_whitespace_continuation_last_header(self):
3665        eq = self.assertEqual
3666        # Like the previous test, but the subject line is the last
3667        # header.
3668        msg = email.message_from_string("""\
3669From: aperson@dom.ain
3670To: bperson@dom.ain
3671Date: Mon, 8 Apr 2002 15:09:19 -0400
3672Message-ID: spam
3673Subject: the next line has a space on it
3674\x20
3675
3676Here's the message body
3677""")
3678        eq(msg['subject'], 'the next line has a space on it\n ')
3679        eq(msg['message-id'], 'spam')
3680        eq(msg.get_payload(), "Here's the message body\n")
3681
3682    def test_crlf_separation(self):
3683        eq = self.assertEqual
3684        with openfile('msg_26.txt', newline='\n') as fp:
3685            msg = Parser().parse(fp)
3686        eq(len(msg.get_payload()), 2)
3687        part1 = msg.get_payload(0)
3688        eq(part1.get_content_type(), 'text/plain')
3689        eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3690        part2 = msg.get_payload(1)
3691        eq(part2.get_content_type(), 'application/riscos')
3692
3693    def test_crlf_flatten(self):
3694        # Using newline='\n' preserves the crlfs in this input file.
3695        with openfile('msg_26.txt', newline='\n') as fp:
3696            text = fp.read()
3697        msg = email.message_from_string(text)
3698        s = StringIO()
3699        g = Generator(s)
3700        g.flatten(msg, linesep='\r\n')
3701        self.assertEqual(s.getvalue(), text)
3702
3703    maxDiff = None
3704
3705    def test_multipart_digest_with_extra_mime_headers(self):
3706        eq = self.assertEqual
3707        neq = self.ndiffAssertEqual
3708        with openfile('msg_28.txt') as fp:
3709            msg = email.message_from_file(fp)
3710        # Structure is:
3711        # multipart/digest
3712        #   message/rfc822
3713        #     text/plain
3714        #   message/rfc822
3715        #     text/plain
3716        eq(msg.is_multipart(), 1)
3717        eq(len(msg.get_payload()), 2)
3718        part1 = msg.get_payload(0)
3719        eq(part1.get_content_type(), 'message/rfc822')
3720        eq(part1.is_multipart(), 1)
3721        eq(len(part1.get_payload()), 1)
3722        part1a = part1.get_payload(0)
3723        eq(part1a.is_multipart(), 0)
3724        eq(part1a.get_content_type(), 'text/plain')
3725        neq(part1a.get_payload(), 'message 1\n')
3726        # next message/rfc822
3727        part2 = msg.get_payload(1)
3728        eq(part2.get_content_type(), 'message/rfc822')
3729        eq(part2.is_multipart(), 1)
3730        eq(len(part2.get_payload()), 1)
3731        part2a = part2.get_payload(0)
3732        eq(part2a.is_multipart(), 0)
3733        eq(part2a.get_content_type(), 'text/plain')
3734        neq(part2a.get_payload(), 'message 2\n')
3735
3736    def test_three_lines(self):
3737        # A bug report by Andrew McNamara
3738        lines = ['From: Andrew Person <aperson@dom.ain',
3739                 'Subject: Test',
3740                 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3741        msg = email.message_from_string(NL.join(lines))
3742        self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3743
3744    def test_strip_line_feed_and_carriage_return_in_headers(self):
3745        eq = self.assertEqual
3746        # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3747        value1 = 'text'
3748        value2 = 'more text'
3749        m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3750            value1, value2)
3751        msg = email.message_from_string(m)
3752        eq(msg.get('Header'), value1)
3753        eq(msg.get('Next-Header'), value2)
3754
3755    def test_rfc2822_header_syntax(self):
3756        eq = self.assertEqual
3757        m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3758        msg = email.message_from_string(m)
3759        eq(len(msg), 3)
3760        eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3761        eq(msg.get_payload(), 'body')
3762
3763    def test_rfc2822_space_not_allowed_in_header(self):
3764        eq = self.assertEqual
3765        m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3766        msg = email.message_from_string(m)
3767        eq(len(msg.keys()), 0)
3768
3769    def test_rfc2822_one_character_header(self):
3770        eq = self.assertEqual
3771        m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3772        msg = email.message_from_string(m)
3773        headers = msg.keys()
3774        headers.sort()
3775        eq(headers, ['A', 'B', 'CC'])
3776        eq(msg.get_payload(), 'body')
3777
3778    def test_CRLFLF_at_end_of_part(self):
3779        # issue 5610: feedparser should not eat two chars from body part ending
3780        # with "\r\n\n".
3781        m = (
3782            "From: foo@bar.com\n"
3783            "To: baz\n"
3784            "Mime-Version: 1.0\n"
3785            "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3786            "\n"
3787            "--BOUNDARY\n"
3788            "Content-Type: text/plain\n"
3789            "\n"
3790            "body ending with CRLF newline\r\n"
3791            "\n"
3792            "--BOUNDARY--\n"
3793          )
3794        msg = email.message_from_string(m)
3795        self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
3796
3797
3798class Test8BitBytesHandling(TestEmailBase):
3799    # In Python3 all input is string, but that doesn't work if the actual input
3800    # uses an 8bit transfer encoding.  To hack around that, in email 5.1 we
3801    # decode byte streams using the surrogateescape error handler, and
3802    # reconvert to binary at appropriate places if we detect surrogates.  This
3803    # doesn't allow us to transform headers with 8bit bytes (they get munged),
3804    # but it does allow us to parse and preserve them, and to decode body
3805    # parts that use an 8bit CTE.
3806
3807    bodytest_msg = textwrap.dedent("""\
3808        From: foo@bar.com
3809        To: baz
3810        Mime-Version: 1.0
3811        Content-Type: text/plain; charset={charset}
3812        Content-Transfer-Encoding: {cte}
3813
3814        {bodyline}
3815        """)
3816
3817    def test_known_8bit_CTE(self):
3818        m = self.bodytest_msg.format(charset='utf-8',
3819                                     cte='8bit',
3820                                     bodyline='pöstal').encode('utf-8')
3821        msg = email.message_from_bytes(m)
3822        self.assertEqual(msg.get_payload(), "pöstal\n")
3823        self.assertEqual(msg.get_payload(decode=True),
3824                         "pöstal\n".encode('utf-8'))
3825
3826    def test_unknown_8bit_CTE(self):
3827        m = self.bodytest_msg.format(charset='notavalidcharset',
3828                                     cte='8bit',
3829                                     bodyline='pöstal').encode('utf-8')
3830        msg = email.message_from_bytes(m)
3831        self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
3832        self.assertEqual(msg.get_payload(decode=True),
3833                         "pöstal\n".encode('utf-8'))
3834
3835    def test_8bit_in_quopri_body(self):
3836        # This is non-RFC compliant data...without 'decode' the library code
3837        # decodes the body using the charset from the headers, and because the
3838        # source byte really is utf-8 this works.  This is likely to fail
3839        # against real dirty data (ie: produce mojibake), but the data is
3840        # invalid anyway so it is as good a guess as any.  But this means that
3841        # this test just confirms the current behavior; that behavior is not
3842        # necessarily the best possible behavior.  With 'decode' it is
3843        # returning the raw bytes, so that test should be of correct behavior,
3844        # or at least produce the same result that email4 did.
3845        m = self.bodytest_msg.format(charset='utf-8',
3846                                     cte='quoted-printable',
3847                                     bodyline='p=C3=B6stál').encode('utf-8')
3848        msg = email.message_from_bytes(m)
3849        self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3850        self.assertEqual(msg.get_payload(decode=True),
3851                         'pöstál\n'.encode('utf-8'))
3852
3853    def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3854        # This is similar to the previous test, but proves that if the 8bit
3855        # byte is undecodeable in the specified charset, it gets replaced
3856        # by the unicode 'unknown' character.  Again, this may or may not
3857        # be the ideal behavior.  Note that if decode=False none of the
3858        # decoders will get involved, so this is the only test we need
3859        # for this behavior.
3860        m = self.bodytest_msg.format(charset='ascii',
3861                                     cte='quoted-printable',
3862                                     bodyline='p=C3=B6stál').encode('utf-8')
3863        msg = email.message_from_bytes(m)
3864        self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
3865        self.assertEqual(msg.get_payload(decode=True),
3866                        'pöstál\n'.encode('utf-8'))
3867
3868    # test_defect_handling:test_invalid_chars_in_base64_payload
3869    def test_8bit_in_base64_body(self):
3870        # If we get 8bit bytes in a base64 body, we can just ignore them
3871        # as being outside the base64 alphabet and decode anyway.  But
3872        # we register a defect.
3873        m = self.bodytest_msg.format(charset='utf-8',
3874                                     cte='base64',
3875                                     bodyline='cMO2c3RhbAá=').encode('utf-8')
3876        msg = email.message_from_bytes(m)
3877        self.assertEqual(msg.get_payload(decode=True),
3878                         'pöstal'.encode('utf-8'))
3879        self.assertIsInstance(msg.defects[0],
3880                              errors.InvalidBase64CharactersDefect)
3881
3882    def test_8bit_in_uuencode_body(self):
3883        # Sticking an 8bit byte in a uuencode block makes it undecodable by
3884        # normal means, so the block is returned undecoded, but as bytes.
3885        m = self.bodytest_msg.format(charset='utf-8',
3886                                     cte='uuencode',
3887                                     bodyline='<,.V<W1A; á ').encode('utf-8')
3888        msg = email.message_from_bytes(m)
3889        self.assertEqual(msg.get_payload(decode=True),
3890                         '<,.V<W1A; á \n'.encode('utf-8'))
3891
3892
3893    headertest_headers = (
3894        ('From: foo@bar.com', ('From', 'foo@bar.com')),
3895        ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3896        ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3897            '\tJean de Baddie',
3898            ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3899                'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3900                ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3901        ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3902        )
3903    headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3904        '\nYes, they are flying.\n').encode('utf-8')
3905
3906    def test_get_8bit_header(self):
3907        msg = email.message_from_bytes(self.headertest_msg)
3908        self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3909        self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
3910
3911    def test_print_8bit_headers(self):
3912        msg = email.message_from_bytes(self.headertest_msg)
3913        self.assertEqual(str(msg),
3914                         textwrap.dedent("""\
3915                            From: {}
3916                            To: {}
3917                            Subject: {}
3918                            From: {}
3919
3920                            Yes, they are flying.
3921                            """).format(*[expected[1] for (_, expected) in
3922                                        self.headertest_headers]))
3923
3924    def test_values_with_8bit_headers(self):
3925        msg = email.message_from_bytes(self.headertest_msg)
3926        self.assertListEqual([str(x) for x in msg.values()],
3927                              ['foo@bar.com',
3928                               'b\uFFFD\uFFFDz',
3929                               'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3930                                   'coll\uFFFD\uFFFDgue, le pouf '
3931                                   'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3932                                   '\tJean de Baddie',
3933                               "g\uFFFD\uFFFDst"])
3934
3935    def test_items_with_8bit_headers(self):
3936        msg = email.message_from_bytes(self.headertest_msg)
3937        self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
3938                              [('From', 'foo@bar.com'),
3939                               ('To', 'b\uFFFD\uFFFDz'),
3940                               ('Subject', 'Maintenant je vous '
3941                                  'pr\uFFFD\uFFFDsente '
3942                                  'mon coll\uFFFD\uFFFDgue, le pouf '
3943                                  'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3944                                  '\tJean de Baddie'),
3945                               ('From', 'g\uFFFD\uFFFDst')])
3946
3947    def test_get_all_with_8bit_headers(self):
3948        msg = email.message_from_bytes(self.headertest_msg)
3949        self.assertListEqual([str(x) for x in msg.get_all('from')],
3950                              ['foo@bar.com',
3951                               'g\uFFFD\uFFFDst'])
3952
3953    def test_get_content_type_with_8bit(self):
3954        msg = email.message_from_bytes(textwrap.dedent("""\
3955            Content-Type: text/pl\xA7in; charset=utf-8
3956            """).encode('latin-1'))
3957        self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3958        self.assertEqual(msg.get_content_maintype(), "text")
3959        self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3960
3961    # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
3962    def test_get_params_with_8bit(self):
3963        msg = email.message_from_bytes(
3964            'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3965        self.assertEqual(msg.get_params(header='x-header'),
3966           [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3967        self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3968        # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3969        self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3970
3971    # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
3972    def test_get_rfc2231_params_with_8bit(self):
3973        msg = email.message_from_bytes(textwrap.dedent("""\
3974            Content-Type: text/plain; charset=us-ascii;
3975             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3976             ).encode('latin-1'))
3977        self.assertEqual(msg.get_param('title'),
3978            ('us-ascii', 'en', 'This is not f\uFFFDn'))
3979
3980    def test_set_rfc2231_params_with_8bit(self):
3981        msg = email.message_from_bytes(textwrap.dedent("""\
3982            Content-Type: text/plain; charset=us-ascii;
3983             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3984             ).encode('latin-1'))
3985        msg.set_param('title', 'test')
3986        self.assertEqual(msg.get_param('title'), 'test')
3987
3988    def test_del_rfc2231_params_with_8bit(self):
3989        msg = email.message_from_bytes(textwrap.dedent("""\
3990            Content-Type: text/plain; charset=us-ascii;
3991             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3992             ).encode('latin-1'))
3993        msg.del_param('title')
3994        self.assertEqual(msg.get_param('title'), None)
3995        self.assertEqual(msg.get_content_maintype(), 'text')
3996
3997    def test_get_payload_with_8bit_cte_header(self):
3998        msg = email.message_from_bytes(textwrap.dedent("""\
3999            Content-Transfer-Encoding: b\xa7se64
4000            Content-Type: text/plain; charset=latin-1
4001
4002            payload
4003            """).encode('latin-1'))
4004        self.assertEqual(msg.get_payload(), 'payload\n')
4005        self.assertEqual(msg.get_payload(decode=True), b'payload\n')
4006
4007    non_latin_bin_msg = textwrap.dedent("""\
4008        From: foo@bar.com
4009        To: báz
4010        Subject: Maintenant je vous présente mon collègue, le pouf célèbre
4011        \tJean de Baddie
4012        Mime-Version: 1.0
4013        Content-Type: text/plain; charset="utf-8"
4014        Content-Transfer-Encoding: 8bit
4015
4016        Да, они летят.
4017        """).encode('utf-8')
4018
4019    def test_bytes_generator(self):
4020        msg = email.message_from_bytes(self.non_latin_bin_msg)
4021        out = BytesIO()
4022        email.generator.BytesGenerator(out).flatten(msg)
4023        self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
4024
4025    def test_bytes_generator_handles_None_body(self):
4026        #Issue 11019
4027        msg = email.message.Message()
4028        out = BytesIO()
4029        email.generator.BytesGenerator(out).flatten(msg)
4030        self.assertEqual(out.getvalue(), b"\n")
4031
4032    non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
4033        From: foo@bar.com
4034        To: =?unknown-8bit?q?b=C3=A1z?=
4035        Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
4036         =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
4037         =?unknown-8bit?q?_Jean_de_Baddie?=
4038        Mime-Version: 1.0
4039        Content-Type: text/plain; charset="utf-8"
4040        Content-Transfer-Encoding: base64
4041
4042        0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
4043        """)
4044
4045    def test_generator_handles_8bit(self):
4046        msg = email.message_from_bytes(self.non_latin_bin_msg)
4047        out = StringIO()
4048        email.generator.Generator(out).flatten(msg)
4049        self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
4050
4051    def test_str_generator_should_not_mutate_msg_when_handling_8bit(self):
4052        msg = email.message_from_bytes(self.non_latin_bin_msg)
4053        out = BytesIO()
4054        BytesGenerator(out).flatten(msg)
4055        orig_value = out.getvalue()
4056        Generator(StringIO()).flatten(msg) # Should not mutate msg!
4057        out = BytesIO()
4058        BytesGenerator(out).flatten(msg)
4059        self.assertEqual(out.getvalue(), orig_value)
4060
4061    def test_bytes_generator_with_unix_from(self):
4062        # The unixfrom contains a current date, so we can't check it
4063        # literally.  Just make sure the first word is 'From' and the
4064        # rest of the message matches the input.
4065        msg = email.message_from_bytes(self.non_latin_bin_msg)
4066        out = BytesIO()
4067        email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
4068        lines = out.getvalue().split(b'\n')
4069        self.assertEqual(lines[0].split()[0], b'From')
4070        self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
4071
4072    non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
4073    non_latin_bin_msg_as7bit[2:4] = [
4074        'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
4075         'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
4076    non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
4077
4078    def test_message_from_binary_file(self):
4079        fn = 'test.msg'
4080        self.addCleanup(unlink, fn)
4081        with open(fn, 'wb') as testfile:
4082            testfile.write(self.non_latin_bin_msg)
4083        with open(fn, 'rb') as testfile:
4084            m = email.parser.BytesParser().parse(testfile)
4085        self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
4086
4087    latin_bin_msg = textwrap.dedent("""\
4088        From: foo@bar.com
4089        To: Dinsdale
4090        Subject: Nudge nudge, wink, wink
4091        Mime-Version: 1.0
4092        Content-Type: text/plain; charset="latin-1"
4093        Content-Transfer-Encoding: 8bit
4094
4095        oh là là, know what I mean, know what I mean?
4096        """).encode('latin-1')
4097
4098    latin_bin_msg_as7bit = textwrap.dedent("""\
4099        From: foo@bar.com
4100        To: Dinsdale
4101        Subject: Nudge nudge, wink, wink
4102        Mime-Version: 1.0
4103        Content-Type: text/plain; charset="iso-8859-1"
4104        Content-Transfer-Encoding: quoted-printable
4105
4106        oh l=E0 l=E0, know what I mean, know what I mean?
4107        """)
4108
4109    def test_string_generator_reencodes_to_quopri_when_appropriate(self):
4110        m = email.message_from_bytes(self.latin_bin_msg)
4111        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4112
4113    def test_decoded_generator_emits_unicode_body(self):
4114        m = email.message_from_bytes(self.latin_bin_msg)
4115        out = StringIO()
4116        email.generator.DecodedGenerator(out).flatten(m)
4117        #DecodedHeader output contains an extra blank line compared
4118        #to the input message.  RDM: not sure if this is a bug or not,
4119        #but it is not specific to the 8bit->7bit conversion.
4120        self.assertEqual(out.getvalue(),
4121            self.latin_bin_msg.decode('latin-1')+'\n')
4122
4123    def test_bytes_feedparser(self):
4124        bfp = email.feedparser.BytesFeedParser()
4125        for i in range(0, len(self.latin_bin_msg), 10):
4126            bfp.feed(self.latin_bin_msg[i:i+10])
4127        m = bfp.close()
4128        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4129
4130    def test_crlf_flatten(self):
4131        with openfile('msg_26.txt', 'rb') as fp:
4132            text = fp.read()
4133        msg = email.message_from_bytes(text)
4134        s = BytesIO()
4135        g = email.generator.BytesGenerator(s)
4136        g.flatten(msg, linesep='\r\n')
4137        self.assertEqual(s.getvalue(), text)
4138
4139    def test_8bit_multipart(self):
4140        # Issue 11605
4141        source = textwrap.dedent("""\
4142            Date: Fri, 18 Mar 2011 17:15:43 +0100
4143            To: foo@example.com
4144            From: foodwatch-Newsletter <bar@example.com>
4145            Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
4146            Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
4147            MIME-Version: 1.0
4148            Content-Type: multipart/alternative;
4149                    boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
4150
4151            --b1_76a486bee62b0d200f33dc2ca08220ad
4152            Content-Type: text/plain; charset="utf-8"
4153            Content-Transfer-Encoding: 8bit
4154
4155            Guten Tag, ,
4156
4157            mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
4158            Nachrichten aus Japan.
4159
4160
4161            --b1_76a486bee62b0d200f33dc2ca08220ad
4162            Content-Type: text/html; charset="utf-8"
4163            Content-Transfer-Encoding: 8bit
4164
4165            <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
4166                "http://www.w3.org/TR/html4/loose.dtd">
4167            <html lang="de">
4168            <head>
4169                    <title>foodwatch - Newsletter</title>
4170            </head>
4171            <body>
4172              <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
4173                 die Nachrichten aus Japan.</p>
4174            </body>
4175            </html>
4176            --b1_76a486bee62b0d200f33dc2ca08220ad--
4177
4178            """).encode('utf-8')
4179        msg = email.message_from_bytes(source)
4180        s = BytesIO()
4181        g = email.generator.BytesGenerator(s)
4182        g.flatten(msg)
4183        self.assertEqual(s.getvalue(), source)
4184
4185    def test_bytes_generator_b_encoding_linesep(self):
4186        # Issue 14062: b encoding was tacking on an extra \n.
4187        m = Message()
4188        # This has enough non-ascii that it should always end up b encoded.
4189        m['Subject'] = Header('žluťoučký kůň')
4190        s = BytesIO()
4191        g = email.generator.BytesGenerator(s)
4192        g.flatten(m, linesep='\r\n')
4193        self.assertEqual(
4194            s.getvalue(),
4195            b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4196
4197    def test_generator_b_encoding_linesep(self):
4198        # Since this broke in ByteGenerator, test Generator for completeness.
4199        m = Message()
4200        # This has enough non-ascii that it should always end up b encoded.
4201        m['Subject'] = Header('žluťoučký kůň')
4202        s = StringIO()
4203        g = email.generator.Generator(s)
4204        g.flatten(m, linesep='\r\n')
4205        self.assertEqual(
4206            s.getvalue(),
4207            'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4208
4209    maxDiff = None
4210
4211
4212class BaseTestBytesGeneratorIdempotent:
4213
4214    maxDiff = None
4215
4216    def _msgobj(self, filename):
4217        with openfile(filename, 'rb') as fp:
4218            data = fp.read()
4219        data = self.normalize_linesep_regex.sub(self.blinesep, data)
4220        msg = email.message_from_bytes(data)
4221        return msg, data
4222
4223    def _idempotent(self, msg, data, unixfrom=False):
4224        b = BytesIO()
4225        g = email.generator.BytesGenerator(b, maxheaderlen=0)
4226        g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
4227        self.assertEqual(data, b.getvalue())
4228
4229
4230class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
4231                                    TestIdempotent):
4232    linesep = '\n'
4233    blinesep = b'\n'
4234    normalize_linesep_regex = re.compile(br'\r\n')
4235
4236
4237class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
4238                                       TestIdempotent):
4239    linesep = '\r\n'
4240    blinesep = b'\r\n'
4241    normalize_linesep_regex = re.compile(br'(?<!\r)\n')
4242
4243
4244class TestBase64(unittest.TestCase):
4245    def test_len(self):
4246        eq = self.assertEqual
4247        eq(base64mime.header_length('hello'),
4248           len(base64mime.body_encode(b'hello', eol='')))
4249        for size in range(15):
4250            if   size == 0 : bsize = 0
4251            elif size <= 3 : bsize = 4
4252            elif size <= 6 : bsize = 8
4253            elif size <= 9 : bsize = 12
4254            elif size <= 12: bsize = 16
4255            else           : bsize = 20
4256            eq(base64mime.header_length('x' * size), bsize)
4257
4258    def test_decode(self):
4259        eq = self.assertEqual
4260        eq(base64mime.decode(''), b'')
4261        eq(base64mime.decode('aGVsbG8='), b'hello')
4262
4263    def test_encode(self):
4264        eq = self.assertEqual
4265        eq(base64mime.body_encode(b''), b'')
4266        eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
4267        # Test the binary flag
4268        eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
4269        # Test the maxlinelen arg
4270        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
4271eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4272eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4273eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4274eHh4eCB4eHh4IA==
4275""")
4276        # Test the eol argument
4277        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4278           """\
4279eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4280eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4281eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4282eHh4eCB4eHh4IA==\r
4283""")
4284
4285    def test_header_encode(self):
4286        eq = self.assertEqual
4287        he = base64mime.header_encode
4288        eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
4289        eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
4290        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4291        # Test the charset option
4292        eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
4293        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4294
4295
4296
4297class TestQuopri(unittest.TestCase):
4298    def setUp(self):
4299        # Set of characters (as byte integers) that don't need to be encoded
4300        # in headers.
4301        self.hlit = list(chain(
4302            range(ord('a'), ord('z') + 1),
4303            range(ord('A'), ord('Z') + 1),
4304            range(ord('0'), ord('9') + 1),
4305            (c for c in b'!*+-/')))
4306        # Set of characters (as byte integers) that do need to be encoded in
4307        # headers.
4308        self.hnon = [c for c in range(256) if c not in self.hlit]
4309        assert len(self.hlit) + len(self.hnon) == 256
4310        # Set of characters (as byte integers) that don't need to be encoded
4311        # in bodies.
4312        self.blit = list(range(ord(' '), ord('~') + 1))
4313        self.blit.append(ord('\t'))
4314        self.blit.remove(ord('='))
4315        # Set of characters (as byte integers) that do need to be encoded in
4316        # bodies.
4317        self.bnon = [c for c in range(256) if c not in self.blit]
4318        assert len(self.blit) + len(self.bnon) == 256
4319
4320    def test_quopri_header_check(self):
4321        for c in self.hlit:
4322            self.assertFalse(quoprimime.header_check(c),
4323                        'Should not be header quopri encoded: %s' % chr(c))
4324        for c in self.hnon:
4325            self.assertTrue(quoprimime.header_check(c),
4326                            'Should be header quopri encoded: %s' % chr(c))
4327
4328    def test_quopri_body_check(self):
4329        for c in self.blit:
4330            self.assertFalse(quoprimime.body_check(c),
4331                        'Should not be body quopri encoded: %s' % chr(c))
4332        for c in self.bnon:
4333            self.assertTrue(quoprimime.body_check(c),
4334                            'Should be body quopri encoded: %s' % chr(c))
4335
4336    def test_header_quopri_len(self):
4337        eq = self.assertEqual
4338        eq(quoprimime.header_length(b'hello'), 5)
4339        # RFC 2047 chrome is not included in header_length().
4340        eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
4341           quoprimime.header_length(b'hello') +
4342           # =?xxx?q?...?= means 10 extra characters
4343           10)
4344        eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4345        # RFC 2047 chrome is not included in header_length().
4346        eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
4347           quoprimime.header_length(b'h@e@l@l@o@') +
4348           # =?xxx?q?...?= means 10 extra characters
4349           10)
4350        for c in self.hlit:
4351            eq(quoprimime.header_length(bytes([c])), 1,
4352               'expected length 1 for %r' % chr(c))
4353        for c in self.hnon:
4354            # Space is special; it's encoded to _
4355            if c == ord(' '):
4356                continue
4357            eq(quoprimime.header_length(bytes([c])), 3,
4358               'expected length 3 for %r' % chr(c))
4359        eq(quoprimime.header_length(b' '), 1)
4360
4361    def test_body_quopri_len(self):
4362        eq = self.assertEqual
4363        for c in self.blit:
4364            eq(quoprimime.body_length(bytes([c])), 1)
4365        for c in self.bnon:
4366            eq(quoprimime.body_length(bytes([c])), 3)
4367
4368    def test_quote_unquote_idempotent(self):
4369        for x in range(256):
4370            c = chr(x)
4371            self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4372
4373    def _test_header_encode(self, header, expected_encoded_header, charset=None):
4374        if charset is None:
4375            encoded_header = quoprimime.header_encode(header)
4376        else:
4377            encoded_header = quoprimime.header_encode(header, charset)
4378        self.assertEqual(encoded_header, expected_encoded_header)
4379
4380    def test_header_encode_null(self):
4381        self._test_header_encode(b'', '')
4382
4383    def test_header_encode_one_word(self):
4384        self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4385
4386    def test_header_encode_two_lines(self):
4387        self._test_header_encode(b'hello\nworld',
4388                                '=?iso-8859-1?q?hello=0Aworld?=')
4389
4390    def test_header_encode_non_ascii(self):
4391        self._test_header_encode(b'hello\xc7there',
4392                                '=?iso-8859-1?q?hello=C7there?=')
4393
4394    def test_header_encode_alt_charset(self):
4395        self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4396                charset='iso-8859-2')
4397
4398    def _test_header_decode(self, encoded_header, expected_decoded_header):
4399        decoded_header = quoprimime.header_decode(encoded_header)
4400        self.assertEqual(decoded_header, expected_decoded_header)
4401
4402    def test_header_decode_null(self):
4403        self._test_header_decode('', '')
4404
4405    def test_header_decode_one_word(self):
4406        self._test_header_decode('hello', 'hello')
4407
4408    def test_header_decode_two_lines(self):
4409        self._test_header_decode('hello=0Aworld', 'hello\nworld')
4410
4411    def test_header_decode_non_ascii(self):
4412        self._test_header_decode('hello=C7there', 'hello\xc7there')
4413
4414    def test_header_decode_re_bug_18380(self):
4415        # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4416        self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4417
4418    def _test_decode(self, encoded, expected_decoded, eol=None):
4419        if eol is None:
4420            decoded = quoprimime.decode(encoded)
4421        else:
4422            decoded = quoprimime.decode(encoded, eol=eol)
4423        self.assertEqual(decoded, expected_decoded)
4424
4425    def test_decode_null_word(self):
4426        self._test_decode('', '')
4427
4428    def test_decode_null_line_null_word(self):
4429        self._test_decode('\r\n', '\n')
4430
4431    def test_decode_one_word(self):
4432        self._test_decode('hello', 'hello')
4433
4434    def test_decode_one_word_eol(self):
4435        self._test_decode('hello', 'hello', eol='X')
4436
4437    def test_decode_one_line(self):
4438        self._test_decode('hello\r\n', 'hello\n')
4439
4440    def test_decode_one_line_lf(self):
4441        self._test_decode('hello\n', 'hello\n')
4442
4443    def test_decode_one_line_cr(self):
4444        self._test_decode('hello\r', 'hello\n')
4445
4446    def test_decode_one_line_nl(self):
4447        self._test_decode('hello\n', 'helloX', eol='X')
4448
4449    def test_decode_one_line_crnl(self):
4450        self._test_decode('hello\r\n', 'helloX', eol='X')
4451
4452    def test_decode_one_line_one_word(self):
4453        self._test_decode('hello\r\nworld', 'hello\nworld')
4454
4455    def test_decode_one_line_one_word_eol(self):
4456        self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4457
4458    def test_decode_two_lines(self):
4459        self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4460
4461    def test_decode_two_lines_eol(self):
4462        self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4463
4464    def test_decode_one_long_line(self):
4465        self._test_decode('Spam' * 250, 'Spam' * 250)
4466
4467    def test_decode_one_space(self):
4468        self._test_decode(' ', '')
4469
4470    def test_decode_multiple_spaces(self):
4471        self._test_decode(' ' * 5, '')
4472
4473    def test_decode_one_line_trailing_spaces(self):
4474        self._test_decode('hello    \r\n', 'hello\n')
4475
4476    def test_decode_two_lines_trailing_spaces(self):
4477        self._test_decode('hello    \r\nworld   \r\n', 'hello\nworld\n')
4478
4479    def test_decode_quoted_word(self):
4480        self._test_decode('=22quoted=20words=22', '"quoted words"')
4481
4482    def test_decode_uppercase_quoting(self):
4483        self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4484
4485    def test_decode_lowercase_quoting(self):
4486        self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4487
4488    def test_decode_soft_line_break(self):
4489        self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4490
4491    def test_decode_false_quoting(self):
4492        self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4493
4494    def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4495        kwargs = {}
4496        if maxlinelen is None:
4497            # Use body_encode's default.
4498            maxlinelen = 76
4499        else:
4500            kwargs['maxlinelen'] = maxlinelen
4501        if eol is None:
4502            # Use body_encode's default.
4503            eol = '\n'
4504        else:
4505            kwargs['eol'] = eol
4506        encoded_body = quoprimime.body_encode(body, **kwargs)
4507        self.assertEqual(encoded_body, expected_encoded_body)
4508        if eol == '\n' or eol == '\r\n':
4509            # We know how to split the result back into lines, so maxlinelen
4510            # can be checked.
4511            for line in encoded_body.splitlines():
4512                self.assertLessEqual(len(line), maxlinelen)
4513
4514    def test_encode_null(self):
4515        self._test_encode('', '')
4516
4517    def test_encode_null_lines(self):
4518        self._test_encode('\n\n', '\n\n')
4519
4520    def test_encode_one_line(self):
4521        self._test_encode('hello\n', 'hello\n')
4522
4523    def test_encode_one_line_crlf(self):
4524        self._test_encode('hello\r\n', 'hello\n')
4525
4526    def test_encode_one_line_eol(self):
4527        self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4528
4529    def test_encode_one_line_eol_after_non_ascii(self):
4530        # issue 20206; see changeset 0cf700464177 for why the encode/decode.
4531        self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
4532                          'hello=CF=85\r\n', eol='\r\n')
4533
4534    def test_encode_one_space(self):
4535        self._test_encode(' ', '=20')
4536
4537    def test_encode_one_line_one_space(self):
4538        self._test_encode(' \n', '=20\n')
4539
4540# XXX: body_encode() expect strings, but uses ord(char) from these strings
4541# to index into a 256-entry list.  For code points above 255, this will fail.
4542# Should there be a check for 8-bit only ord() values in body, or at least
4543# a comment about the expected input?
4544
4545    def test_encode_two_lines_one_space(self):
4546        self._test_encode(' \n \n', '=20\n=20\n')
4547
4548    def test_encode_one_word_trailing_spaces(self):
4549        self._test_encode('hello   ', 'hello  =20')
4550
4551    def test_encode_one_line_trailing_spaces(self):
4552        self._test_encode('hello   \n', 'hello  =20\n')
4553
4554    def test_encode_one_word_trailing_tab(self):
4555        self._test_encode('hello  \t', 'hello  =09')
4556
4557    def test_encode_one_line_trailing_tab(self):
4558        self._test_encode('hello  \t\n', 'hello  =09\n')
4559
4560    def test_encode_trailing_space_before_maxlinelen(self):
4561        self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4562
4563    def test_encode_trailing_space_at_maxlinelen(self):
4564        self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4565
4566    def test_encode_trailing_space_beyond_maxlinelen(self):
4567        self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4568
4569    def test_encode_whitespace_lines(self):
4570        self._test_encode(' \n' * 5, '=20\n' * 5)
4571
4572    def test_encode_quoted_equals(self):
4573        self._test_encode('a = b', 'a =3D b')
4574
4575    def test_encode_one_long_string(self):
4576        self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4577
4578    def test_encode_one_long_line(self):
4579        self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4580
4581    def test_encode_one_very_long_line(self):
4582        self._test_encode('x' * 200 + '\n',
4583                2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4584
4585    def test_encode_shortest_maxlinelen(self):
4586        self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
4587
4588    def test_encode_maxlinelen_too_small(self):
4589        self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4590
4591    def test_encode(self):
4592        eq = self.assertEqual
4593        eq(quoprimime.body_encode(''), '')
4594        eq(quoprimime.body_encode('hello'), 'hello')
4595        # Test the binary flag
4596        eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
4597        # Test the maxlinelen arg
4598        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
4599xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4600 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4601x xxxx xxxx xxxx xxxx=20""")
4602        # Test the eol argument
4603        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4604           """\
4605xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4606 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4607x xxxx xxxx xxxx xxxx=20""")
4608        eq(quoprimime.body_encode("""\
4609one line
4610
4611two line"""), """\
4612one line
4613
4614two line""")
4615
4616
4617
4618# Test the Charset class
4619class TestCharset(unittest.TestCase):
4620    def tearDown(self):
4621        from email import charset as CharsetModule
4622        try:
4623            del CharsetModule.CHARSETS['fake']
4624        except KeyError:
4625            pass
4626
4627    def test_codec_encodeable(self):
4628        eq = self.assertEqual
4629        # Make sure us-ascii = no Unicode conversion
4630        c = Charset('us-ascii')
4631        eq(c.header_encode('Hello World!'), 'Hello World!')
4632        # Test 8-bit idempotency with us-ascii
4633        s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
4634        self.assertRaises(UnicodeError, c.header_encode, s)
4635        c = Charset('utf-8')
4636        eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
4637
4638    def test_body_encode(self):
4639        eq = self.assertEqual
4640        # Try a charset with QP body encoding
4641        c = Charset('iso-8859-1')
4642        eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
4643        # Try a charset with Base64 body encoding
4644        c = Charset('utf-8')
4645        eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
4646        # Try a charset with None body encoding
4647        c = Charset('us-ascii')
4648        eq('hello world', c.body_encode('hello world'))
4649        # Try the convert argument, where input codec != output codec
4650        c = Charset('euc-jp')
4651        # With apologies to Tokio Kikuchi ;)
4652        # XXX FIXME
4653##         try:
4654##             eq('\x1b$B5FCO;~IW\x1b(B',
4655##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4656##             eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4657##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4658##         except LookupError:
4659##             # We probably don't have the Japanese codecs installed
4660##             pass
4661        # Testing SF bug #625509, which we have to fake, since there are no
4662        # built-in encodings where the header encoding is QP but the body
4663        # encoding is not.
4664        from email import charset as CharsetModule
4665        CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
4666        c = Charset('fake')
4667        eq('hello world', c.body_encode('hello world'))
4668
4669    def test_unicode_charset_name(self):
4670        charset = Charset('us-ascii')
4671        self.assertEqual(str(charset), 'us-ascii')
4672        self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4673
4674
4675
4676# Test multilingual MIME headers.
4677class TestHeader(TestEmailBase):
4678    def test_simple(self):
4679        eq = self.ndiffAssertEqual
4680        h = Header('Hello World!')
4681        eq(h.encode(), 'Hello World!')
4682        h.append(' Goodbye World!')
4683        eq(h.encode(), 'Hello World!  Goodbye World!')
4684
4685    def test_simple_surprise(self):
4686        eq = self.ndiffAssertEqual
4687        h = Header('Hello World!')
4688        eq(h.encode(), 'Hello World!')
4689        h.append('Goodbye World!')
4690        eq(h.encode(), 'Hello World! Goodbye World!')
4691
4692    def test_header_needs_no_decoding(self):
4693        h = 'no decoding needed'
4694        self.assertEqual(decode_header(h), [(h, None)])
4695
4696    def test_long(self):
4697        h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4698                   maxlinelen=76)
4699        for l in h.encode(splitchars=' ').split('\n '):
4700            self.assertLessEqual(len(l), 76)
4701
4702    def test_multilingual(self):
4703        eq = self.ndiffAssertEqual
4704        g = Charset("iso-8859-1")
4705        cz = Charset("iso-8859-2")
4706        utf8 = Charset("utf-8")
4707        g_head = (b'Die Mieter treten hier ein werden mit einem '
4708                  b'Foerderband komfortabel den Korridor entlang, '
4709                  b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4710                  b'gegen die rotierenden Klingen bef\xf6rdert. ')
4711        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4712                   b'd\xf9vtipu.. ')
4713        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4714                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4715                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4716                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4717                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4718                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4719                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4720                     '\u3044\u307e\u3059\u3002')
4721        h = Header(g_head, g)
4722        h.append(cz_head, cz)
4723        h.append(utf8_head, utf8)
4724        enc = h.encode(maxlinelen=76)
4725        eq(enc, """\
4726=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4727 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4728 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4729 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
4730 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4731 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4732 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4733 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
4734 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4735 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4736 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4737        decoded = decode_header(enc)
4738        eq(len(decoded), 3)
4739        eq(decoded[0], (g_head, 'iso-8859-1'))
4740        eq(decoded[1], (cz_head, 'iso-8859-2'))
4741        eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
4742        ustr = str(h)
4743        eq(ustr,
4744           (b'Die Mieter treten hier ein werden mit einem Foerderband '
4745            b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4746            b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4747            b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4748            b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4749            b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4750            b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4751            b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4752            b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4753            b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4754            b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4755            b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4756            b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4757            b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4758            b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4759            b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4760            ).decode('utf-8'))
4761        # Test make_header()
4762        newh = make_header(decode_header(enc))
4763        eq(newh, h)
4764
4765    def test_empty_header_encode(self):
4766        h = Header()
4767        self.assertEqual(h.encode(), '')
4768
4769    def test_header_ctor_default_args(self):
4770        eq = self.ndiffAssertEqual
4771        h = Header()
4772        eq(h, '')
4773        h.append('foo', Charset('iso-8859-1'))
4774        eq(h, 'foo')
4775
4776    def test_explicit_maxlinelen(self):
4777        eq = self.ndiffAssertEqual
4778        hstr = ('A very long line that must get split to something other '
4779                'than at the 76th character boundary to test the non-default '
4780                'behavior')
4781        h = Header(hstr)
4782        eq(h.encode(), '''\
4783A very long line that must get split to something other than at the 76th
4784 character boundary to test the non-default behavior''')
4785        eq(str(h), hstr)
4786        h = Header(hstr, header_name='Subject')
4787        eq(h.encode(), '''\
4788A very long line that must get split to something other than at the
4789 76th character boundary to test the non-default behavior''')
4790        eq(str(h), hstr)
4791        h = Header(hstr, maxlinelen=1024, header_name='Subject')
4792        eq(h.encode(), hstr)
4793        eq(str(h), hstr)
4794
4795    def test_quopri_splittable(self):
4796        eq = self.ndiffAssertEqual
4797        h = Header(charset='iso-8859-1', maxlinelen=20)
4798        x = 'xxxx ' * 20
4799        h.append(x)
4800        s = h.encode()
4801        eq(s, """\
4802=?iso-8859-1?q?xxx?=
4803 =?iso-8859-1?q?x_?=
4804 =?iso-8859-1?q?xx?=
4805 =?iso-8859-1?q?xx?=
4806 =?iso-8859-1?q?_x?=
4807 =?iso-8859-1?q?xx?=
4808 =?iso-8859-1?q?x_?=
4809 =?iso-8859-1?q?xx?=
4810 =?iso-8859-1?q?xx?=
4811 =?iso-8859-1?q?_x?=
4812 =?iso-8859-1?q?xx?=
4813 =?iso-8859-1?q?x_?=
4814 =?iso-8859-1?q?xx?=
4815 =?iso-8859-1?q?xx?=
4816 =?iso-8859-1?q?_x?=
4817 =?iso-8859-1?q?xx?=
4818 =?iso-8859-1?q?x_?=
4819 =?iso-8859-1?q?xx?=
4820 =?iso-8859-1?q?xx?=
4821 =?iso-8859-1?q?_x?=
4822 =?iso-8859-1?q?xx?=
4823 =?iso-8859-1?q?x_?=
4824 =?iso-8859-1?q?xx?=
4825 =?iso-8859-1?q?xx?=
4826 =?iso-8859-1?q?_x?=
4827 =?iso-8859-1?q?xx?=
4828 =?iso-8859-1?q?x_?=
4829 =?iso-8859-1?q?xx?=
4830 =?iso-8859-1?q?xx?=
4831 =?iso-8859-1?q?_x?=
4832 =?iso-8859-1?q?xx?=
4833 =?iso-8859-1?q?x_?=
4834 =?iso-8859-1?q?xx?=
4835 =?iso-8859-1?q?xx?=
4836 =?iso-8859-1?q?_x?=
4837 =?iso-8859-1?q?xx?=
4838 =?iso-8859-1?q?x_?=
4839 =?iso-8859-1?q?xx?=
4840 =?iso-8859-1?q?xx?=
4841 =?iso-8859-1?q?_x?=
4842 =?iso-8859-1?q?xx?=
4843 =?iso-8859-1?q?x_?=
4844 =?iso-8859-1?q?xx?=
4845 =?iso-8859-1?q?xx?=
4846 =?iso-8859-1?q?_x?=
4847 =?iso-8859-1?q?xx?=
4848 =?iso-8859-1?q?x_?=
4849 =?iso-8859-1?q?xx?=
4850 =?iso-8859-1?q?xx?=
4851 =?iso-8859-1?q?_?=""")
4852        eq(x, str(make_header(decode_header(s))))
4853        h = Header(charset='iso-8859-1', maxlinelen=40)
4854        h.append('xxxx ' * 20)
4855        s = h.encode()
4856        eq(s, """\
4857=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4858 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4859 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4860 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4861 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4862        eq(x, str(make_header(decode_header(s))))
4863
4864    def test_base64_splittable(self):
4865        eq = self.ndiffAssertEqual
4866        h = Header(charset='koi8-r', maxlinelen=20)
4867        x = 'xxxx ' * 20
4868        h.append(x)
4869        s = h.encode()
4870        eq(s, """\
4871=?koi8-r?b?eHh4?=
4872 =?koi8-r?b?eCB4?=
4873 =?koi8-r?b?eHh4?=
4874 =?koi8-r?b?IHh4?=
4875 =?koi8-r?b?eHgg?=
4876 =?koi8-r?b?eHh4?=
4877 =?koi8-r?b?eCB4?=
4878 =?koi8-r?b?eHh4?=
4879 =?koi8-r?b?IHh4?=
4880 =?koi8-r?b?eHgg?=
4881 =?koi8-r?b?eHh4?=
4882 =?koi8-r?b?eCB4?=
4883 =?koi8-r?b?eHh4?=
4884 =?koi8-r?b?IHh4?=
4885 =?koi8-r?b?eHgg?=
4886 =?koi8-r?b?eHh4?=
4887 =?koi8-r?b?eCB4?=
4888 =?koi8-r?b?eHh4?=
4889 =?koi8-r?b?IHh4?=
4890 =?koi8-r?b?eHgg?=
4891 =?koi8-r?b?eHh4?=
4892 =?koi8-r?b?eCB4?=
4893 =?koi8-r?b?eHh4?=
4894 =?koi8-r?b?IHh4?=
4895 =?koi8-r?b?eHgg?=
4896 =?koi8-r?b?eHh4?=
4897 =?koi8-r?b?eCB4?=
4898 =?koi8-r?b?eHh4?=
4899 =?koi8-r?b?IHh4?=
4900 =?koi8-r?b?eHgg?=
4901 =?koi8-r?b?eHh4?=
4902 =?koi8-r?b?eCB4?=
4903 =?koi8-r?b?eHh4?=
4904 =?koi8-r?b?IA==?=""")
4905        eq(x, str(make_header(decode_header(s))))
4906        h = Header(charset='koi8-r', maxlinelen=40)
4907        h.append(x)
4908        s = h.encode()
4909        eq(s, """\
4910=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4911 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4912 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4913 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4914 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4915 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4916        eq(x, str(make_header(decode_header(s))))
4917
4918    def test_us_ascii_header(self):
4919        eq = self.assertEqual
4920        s = 'hello'
4921        x = decode_header(s)
4922        eq(x, [('hello', None)])
4923        h = make_header(x)
4924        eq(s, h.encode())
4925
4926    def test_string_charset(self):
4927        eq = self.assertEqual
4928        h = Header()
4929        h.append('hello', 'iso-8859-1')
4930        eq(h, 'hello')
4931
4932##    def test_unicode_error(self):
4933##        raises = self.assertRaises
4934##        raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4935##        raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4936##        h = Header()
4937##        raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4938##        raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4939##        raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4940
4941    def test_utf8_shortest(self):
4942        eq = self.assertEqual
4943        h = Header('p\xf6stal', 'utf-8')
4944        eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4945        h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4946        eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4947
4948    def test_bad_8bit_header(self):
4949        raises = self.assertRaises
4950        eq = self.assertEqual
4951        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4952        raises(UnicodeError, Header, x)
4953        h = Header()
4954        raises(UnicodeError, h.append, x)
4955        e = x.decode('utf-8', 'replace')
4956        eq(str(Header(x, errors='replace')), e)
4957        h.append(x, errors='replace')
4958        eq(str(h), e)
4959
4960    def test_escaped_8bit_header(self):
4961        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4962        e = x.decode('ascii', 'surrogateescape')
4963        h = Header(e, charset=email.charset.UNKNOWN8BIT)
4964        self.assertEqual(str(h),
4965                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4966        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4967
4968    def test_header_handles_binary_unknown8bit(self):
4969        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4970        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4971        self.assertEqual(str(h),
4972                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4973        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4974
4975    def test_make_header_handles_binary_unknown8bit(self):
4976        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4977        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4978        h2 = email.header.make_header(email.header.decode_header(h))
4979        self.assertEqual(str(h2),
4980                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4981        self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4982
4983    def test_modify_returned_list_does_not_change_header(self):
4984        h = Header('test')
4985        chunks = email.header.decode_header(h)
4986        chunks.append(('ascii', 'test2'))
4987        self.assertEqual(str(h), 'test')
4988
4989    def test_encoded_adjacent_nonencoded(self):
4990        eq = self.assertEqual
4991        h = Header()
4992        h.append('hello', 'iso-8859-1')
4993        h.append('world')
4994        s = h.encode()
4995        eq(s, '=?iso-8859-1?q?hello?= world')
4996        h = make_header(decode_header(s))
4997        eq(h.encode(), s)
4998
4999    def test_whitespace_keeper(self):
5000        eq = self.assertEqual
5001        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
5002        parts = decode_header(s)
5003        eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
5004        hdr = make_header(parts)
5005        eq(hdr.encode(),
5006           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
5007
5008    def test_broken_base64_header(self):
5009        raises = self.assertRaises
5010        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
5011        raises(errors.HeaderParseError, decode_header, s)
5012
5013    def test_shift_jis_charset(self):
5014        h = Header('文', charset='shift_jis')
5015        self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
5016
5017    def test_flatten_header_with_no_value(self):
5018        # Issue 11401 (regression from email 4.x)  Note that the space after
5019        # the header doesn't reflect the input, but this is also the way
5020        # email 4.x behaved.  At some point it would be nice to fix that.
5021        msg = email.message_from_string("EmptyHeader:")
5022        self.assertEqual(str(msg), "EmptyHeader: \n\n")
5023
5024    def test_encode_preserves_leading_ws_on_value(self):
5025        msg = Message()
5026        msg['SomeHeader'] = '   value with leading ws'
5027        self.assertEqual(str(msg), "SomeHeader:    value with leading ws\n\n")
5028
5029    def test_whitespace_header(self):
5030        self.assertEqual(Header(' ').encode(), ' ')
5031
5032
5033
5034# Test RFC 2231 header parameters (en/de)coding
5035class TestRFC2231(TestEmailBase):
5036
5037    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5038    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5039    def test_get_param(self):
5040        eq = self.assertEqual
5041        msg = self._msgobj('msg_29.txt')
5042        eq(msg.get_param('title'),
5043           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5044        eq(msg.get_param('title', unquote=False),
5045           ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
5046
5047    def test_set_param(self):
5048        eq = self.ndiffAssertEqual
5049        msg = Message()
5050        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5051                      charset='us-ascii')
5052        eq(msg.get_param('title'),
5053           ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
5054        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5055                      charset='us-ascii', language='en')
5056        eq(msg.get_param('title'),
5057           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5058        msg = self._msgobj('msg_01.txt')
5059        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5060                      charset='us-ascii', language='en')
5061        eq(msg.as_string(maxheaderlen=78), """\
5062Return-Path: <bbb@zzz.org>
5063Delivered-To: bbb@zzz.org
5064Received: by mail.zzz.org (Postfix, from userid 889)
5065\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5066MIME-Version: 1.0
5067Content-Transfer-Encoding: 7bit
5068Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5069From: bbb@ddd.com (John X. Doe)
5070To: bbb@zzz.org
5071Subject: This is a test message
5072Date: Fri, 4 May 2001 14:05:44 -0400
5073Content-Type: text/plain; charset=us-ascii;
5074 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5075
5076
5077Hi,
5078
5079Do you like this message?
5080
5081-Me
5082""")
5083
5084    def test_set_param_requote(self):
5085        msg = Message()
5086        msg.set_param('title', 'foo')
5087        self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
5088        msg.set_param('title', 'bar', requote=False)
5089        self.assertEqual(msg['content-type'], 'text/plain; title=bar')
5090        # tspecial is still quoted.
5091        msg.set_param('title', "(bar)bell", requote=False)
5092        self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
5093
5094    def test_del_param(self):
5095        eq = self.ndiffAssertEqual
5096        msg = self._msgobj('msg_01.txt')
5097        msg.set_param('foo', 'bar', charset='us-ascii', language='en')
5098        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5099            charset='us-ascii', language='en')
5100        msg.del_param('foo', header='Content-Type')
5101        eq(msg.as_string(maxheaderlen=78), """\
5102Return-Path: <bbb@zzz.org>
5103Delivered-To: bbb@zzz.org
5104Received: by mail.zzz.org (Postfix, from userid 889)
5105\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5106MIME-Version: 1.0
5107Content-Transfer-Encoding: 7bit
5108Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5109From: bbb@ddd.com (John X. Doe)
5110To: bbb@zzz.org
5111Subject: This is a test message
5112Date: Fri, 4 May 2001 14:05:44 -0400
5113Content-Type: text/plain; charset="us-ascii";
5114 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5115
5116
5117Hi,
5118
5119Do you like this message?
5120
5121-Me
5122""")
5123
5124    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
5125    # I changed the charset name, though, because the one in the file isn't
5126    # a legal charset name.  Should add a test for an illegal charset.
5127    def test_rfc2231_get_content_charset(self):
5128        eq = self.assertEqual
5129        msg = self._msgobj('msg_32.txt')
5130        eq(msg.get_content_charset(), 'us-ascii')
5131
5132    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
5133    def test_rfc2231_parse_rfc_quoting(self):
5134        m = textwrap.dedent('''\
5135            Content-Disposition: inline;
5136            \tfilename*0*=''This%20is%20even%20more%20;
5137            \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
5138            \tfilename*2="is it not.pdf"
5139
5140            ''')
5141        msg = email.message_from_string(m)
5142        self.assertEqual(msg.get_filename(),
5143                         'This is even more ***fun*** is it not.pdf')
5144        self.assertEqual(m, msg.as_string())
5145
5146    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5147    def test_rfc2231_parse_extra_quoting(self):
5148        m = textwrap.dedent('''\
5149            Content-Disposition: inline;
5150            \tfilename*0*="''This%20is%20even%20more%20";
5151            \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5152            \tfilename*2="is it not.pdf"
5153
5154            ''')
5155        msg = email.message_from_string(m)
5156        self.assertEqual(msg.get_filename(),
5157                         'This is even more ***fun*** is it not.pdf')
5158        self.assertEqual(m, msg.as_string())
5159
5160    # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
5161    # but new test uses *0* because otherwise lang/charset is not valid.
5162    # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
5163    def test_rfc2231_no_language_or_charset(self):
5164        m = '''\
5165Content-Transfer-Encoding: 8bit
5166Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
5167Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
5168
5169'''
5170        msg = email.message_from_string(m)
5171        param = msg.get_param('NAME')
5172        self.assertNotIsInstance(param, tuple)
5173        self.assertEqual(
5174            param,
5175            'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
5176
5177    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
5178    def test_rfc2231_no_language_or_charset_in_filename(self):
5179        m = '''\
5180Content-Disposition: inline;
5181\tfilename*0*="''This%20is%20even%20more%20";
5182\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5183\tfilename*2="is it not.pdf"
5184
5185'''
5186        msg = email.message_from_string(m)
5187        self.assertEqual(msg.get_filename(),
5188                         'This is even more ***fun*** is it not.pdf')
5189
5190    # Duplicate of previous test?
5191    def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
5192        m = '''\
5193Content-Disposition: inline;
5194\tfilename*0*="''This%20is%20even%20more%20";
5195\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5196\tfilename*2="is it not.pdf"
5197
5198'''
5199        msg = email.message_from_string(m)
5200        self.assertEqual(msg.get_filename(),
5201                         'This is even more ***fun*** is it not.pdf')
5202
5203    # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
5204    # but the test below is wrong (the first part should be decoded).
5205    def test_rfc2231_partly_encoded(self):
5206        m = '''\
5207Content-Disposition: inline;
5208\tfilename*0="''This%20is%20even%20more%20";
5209\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5210\tfilename*2="is it not.pdf"
5211
5212'''
5213        msg = email.message_from_string(m)
5214        self.assertEqual(
5215            msg.get_filename(),
5216            'This%20is%20even%20more%20***fun*** is it not.pdf')
5217
5218    def test_rfc2231_partly_nonencoded(self):
5219        m = '''\
5220Content-Disposition: inline;
5221\tfilename*0="This%20is%20even%20more%20";
5222\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
5223\tfilename*2="is it not.pdf"
5224
5225'''
5226        msg = email.message_from_string(m)
5227        self.assertEqual(
5228            msg.get_filename(),
5229            'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
5230
5231    def test_rfc2231_no_language_or_charset_in_boundary(self):
5232        m = '''\
5233Content-Type: multipart/alternative;
5234\tboundary*0*="''This%20is%20even%20more%20";
5235\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
5236\tboundary*2="is it not.pdf"
5237
5238'''
5239        msg = email.message_from_string(m)
5240        self.assertEqual(msg.get_boundary(),
5241                         'This is even more ***fun*** is it not.pdf')
5242
5243    def test_rfc2231_no_language_or_charset_in_charset(self):
5244        # This is a nonsensical charset value, but tests the code anyway
5245        m = '''\
5246Content-Type: text/plain;
5247\tcharset*0*="This%20is%20even%20more%20";
5248\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
5249\tcharset*2="is it not.pdf"
5250
5251'''
5252        msg = email.message_from_string(m)
5253        self.assertEqual(msg.get_content_charset(),
5254                         'this is even more ***fun*** is it not.pdf')
5255
5256    # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
5257    def test_rfc2231_bad_encoding_in_filename(self):
5258        m = '''\
5259Content-Disposition: inline;
5260\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
5261\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5262\tfilename*2="is it not.pdf"
5263
5264'''
5265        msg = email.message_from_string(m)
5266        self.assertEqual(msg.get_filename(),
5267                         'This is even more ***fun*** is it not.pdf')
5268
5269    def test_rfc2231_bad_encoding_in_charset(self):
5270        m = """\
5271Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
5272
5273"""
5274        msg = email.message_from_string(m)
5275        # This should return None because non-ascii characters in the charset
5276        # are not allowed.
5277        self.assertEqual(msg.get_content_charset(), None)
5278
5279    def test_rfc2231_bad_character_in_charset(self):
5280        m = """\
5281Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
5282
5283"""
5284        msg = email.message_from_string(m)
5285        # This should return None because non-ascii characters in the charset
5286        # are not allowed.
5287        self.assertEqual(msg.get_content_charset(), None)
5288
5289    def test_rfc2231_bad_character_in_filename(self):
5290        m = '''\
5291Content-Disposition: inline;
5292\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
5293\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5294\tfilename*2*="is it not.pdf%E2"
5295
5296'''
5297        msg = email.message_from_string(m)
5298        self.assertEqual(msg.get_filename(),
5299                         'This is even more ***fun*** is it not.pdf\ufffd')
5300
5301    def test_rfc2231_unknown_encoding(self):
5302        m = """\
5303Content-Transfer-Encoding: 8bit
5304Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
5305
5306"""
5307        msg = email.message_from_string(m)
5308        self.assertEqual(msg.get_filename(), 'myfile.txt')
5309
5310    def test_rfc2231_single_tick_in_filename_extended(self):
5311        eq = self.assertEqual
5312        m = """\
5313Content-Type: application/x-foo;
5314\tname*0*=\"Frank's\"; name*1*=\" Document\"
5315
5316"""
5317        msg = email.message_from_string(m)
5318        charset, language, s = msg.get_param('name')
5319        eq(charset, None)
5320        eq(language, None)
5321        eq(s, "Frank's Document")
5322
5323    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5324    def test_rfc2231_single_tick_in_filename(self):
5325        m = """\
5326Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5327
5328"""
5329        msg = email.message_from_string(m)
5330        param = msg.get_param('name')
5331        self.assertNotIsInstance(param, tuple)
5332        self.assertEqual(param, "Frank's Document")
5333
5334    def test_rfc2231_missing_tick(self):
5335        m = '''\
5336Content-Disposition: inline;
5337\tfilename*0*="'This%20is%20broken";
5338'''
5339        msg = email.message_from_string(m)
5340        self.assertEqual(
5341            msg.get_filename(),
5342            "'This is broken")
5343
5344    def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
5345        m = '''\
5346Content-Disposition: inline;
5347\tfilename*0*="'This%20is%E2broken";
5348'''
5349        msg = email.message_from_string(m)
5350        self.assertEqual(
5351            msg.get_filename(),
5352            "'This is\ufffdbroken")
5353
5354    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
5355    def test_rfc2231_tick_attack_extended(self):
5356        eq = self.assertEqual
5357        m = """\
5358Content-Type: application/x-foo;
5359\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5360
5361"""
5362        msg = email.message_from_string(m)
5363        charset, language, s = msg.get_param('name')
5364        eq(charset, 'us-ascii')
5365        eq(language, 'en-us')
5366        eq(s, "Frank's Document")
5367
5368    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
5369    def test_rfc2231_tick_attack(self):
5370        m = """\
5371Content-Type: application/x-foo;
5372\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5373
5374"""
5375        msg = email.message_from_string(m)
5376        param = msg.get_param('name')
5377        self.assertNotIsInstance(param, tuple)
5378        self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5379
5380    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
5381    def test_rfc2231_no_extended_values(self):
5382        eq = self.assertEqual
5383        m = """\
5384Content-Type: application/x-foo; name=\"Frank's Document\"
5385
5386"""
5387        msg = email.message_from_string(m)
5388        eq(msg.get_param('name'), "Frank's Document")
5389
5390    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
5391    def test_rfc2231_encoded_then_unencoded_segments(self):
5392        eq = self.assertEqual
5393        m = """\
5394Content-Type: application/x-foo;
5395\tname*0*=\"us-ascii'en-us'My\";
5396\tname*1=\" Document\";
5397\tname*2*=\" For You\"
5398
5399"""
5400        msg = email.message_from_string(m)
5401        charset, language, s = msg.get_param('name')
5402        eq(charset, 'us-ascii')
5403        eq(language, 'en-us')
5404        eq(s, 'My Document For You')
5405
5406    # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5407    # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
5408    def test_rfc2231_unencoded_then_encoded_segments(self):
5409        eq = self.assertEqual
5410        m = """\
5411Content-Type: application/x-foo;
5412\tname*0=\"us-ascii'en-us'My\";
5413\tname*1*=\" Document\";
5414\tname*2*=\" For You\"
5415
5416"""
5417        msg = email.message_from_string(m)
5418        charset, language, s = msg.get_param('name')
5419        eq(charset, 'us-ascii')
5420        eq(language, 'en-us')
5421        eq(s, 'My Document For You')
5422
5423    def test_should_not_hang_on_invalid_ew_messages(self):
5424        messages = ["""From: user@host.com
5425To: user@host.com
5426Bad-Header:
5427 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
5428 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
5429 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
5430
5431Hello!
5432""", """From: ����� �������� <xxx@xxx>
5433To: "xxx" <xxx@xxx>
5434Subject:   ��� ���������� ����� ����� � ��������� �� ����
5435MIME-Version: 1.0
5436Content-Type: text/plain; charset="windows-1251";
5437Content-Transfer-Encoding: 8bit
5438
5439�� ����� � ���� ������ ��� ��������
5440"""]
5441        for m in messages:
5442            with self.subTest(m=m):
5443                msg = email.message_from_string(m)
5444
5445
5446# Tests to ensure that signed parts of an email are completely preserved, as
5447# required by RFC1847 section 2.1.  Note that these are incomplete, because the
5448# email package does not currently always preserve the body.  See issue 1670765.
5449class TestSigned(TestEmailBase):
5450
5451    def _msg_and_obj(self, filename):
5452        with openfile(filename) as fp:
5453            original = fp.read()
5454            msg = email.message_from_string(original)
5455        return original, msg
5456
5457    def _signed_parts_eq(self, original, result):
5458        # Extract the first mime part of each message
5459        import re
5460        repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5461        inpart = repart.search(original).group(2)
5462        outpart = repart.search(result).group(2)
5463        self.assertEqual(outpart, inpart)
5464
5465    def test_long_headers_as_string(self):
5466        original, msg = self._msg_and_obj('msg_45.txt')
5467        result = msg.as_string()
5468        self._signed_parts_eq(original, result)
5469
5470    def test_long_headers_as_string_maxheaderlen(self):
5471        original, msg = self._msg_and_obj('msg_45.txt')
5472        result = msg.as_string(maxheaderlen=60)
5473        self._signed_parts_eq(original, result)
5474
5475    def test_long_headers_flatten(self):
5476        original, msg = self._msg_and_obj('msg_45.txt')
5477        fp = StringIO()
5478        Generator(fp).flatten(msg)
5479        result = fp.getvalue()
5480        self._signed_parts_eq(original, result)
5481
5482
5483
5484if __name__ == '__main__':
5485    unittest.main()
5486