1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3# email package unit tests
4
5import re
6import time
7import base64
8import unittest
9import textwrap
10
11from io import StringIO, BytesIO
12from itertools import chain
13from random import choice
14from threading import Thread
15from unittest.mock import patch
16
17import email
18import email.policy
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator, BytesGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email.mime.nonmultipart import MIMENonMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
40from test.support import unlink, start_threads
41from test.test_email import openfile, TestEmailBase
42
43# These imports are documented to work, but we are testing them using a
44# different path, so we import them here just to make sure they are importable.
45from email.parser import FeedParser, BytesFeedParser
46
47NL = '\n'
48EMPTYSTRING = ''
49SPACE = ' '
50
51
52# Test various aspects of the Message class's API
53class TestMessageAPI(TestEmailBase):
54    def test_get_all(self):
55        eq = self.assertEqual
56        msg = self._msgobj('msg_20.txt')
57        eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
58        eq(msg.get_all('xx', 'n/a'), 'n/a')
59
60    def test_getset_charset(self):
61        eq = self.assertEqual
62        msg = Message()
63        eq(msg.get_charset(), None)
64        charset = Charset('iso-8859-1')
65        msg.set_charset(charset)
66        eq(msg['mime-version'], '1.0')
67        eq(msg.get_content_type(), 'text/plain')
68        eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
69        eq(msg.get_param('charset'), 'iso-8859-1')
70        eq(msg['content-transfer-encoding'], 'quoted-printable')
71        eq(msg.get_charset().input_charset, 'iso-8859-1')
72        # Remove the charset
73        msg.set_charset(None)
74        eq(msg.get_charset(), None)
75        eq(msg['content-type'], 'text/plain')
76        # Try adding a charset when there's already MIME headers present
77        msg = Message()
78        msg['MIME-Version'] = '2.0'
79        msg['Content-Type'] = 'text/x-weird'
80        msg['Content-Transfer-Encoding'] = 'quinted-puntable'
81        msg.set_charset(charset)
82        eq(msg['mime-version'], '2.0')
83        eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
84        eq(msg['content-transfer-encoding'], 'quinted-puntable')
85
86    def test_set_charset_from_string(self):
87        eq = self.assertEqual
88        msg = Message()
89        msg.set_charset('us-ascii')
90        eq(msg.get_charset().input_charset, 'us-ascii')
91        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
92
93    def test_set_payload_with_charset(self):
94        msg = Message()
95        charset = Charset('iso-8859-1')
96        msg.set_payload('This is a string payload', charset)
97        self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
98
99    def test_set_payload_with_8bit_data_and_charset(self):
100        data = b'\xd0\x90\xd0\x91\xd0\x92'
101        charset = Charset('utf-8')
102        msg = Message()
103        msg.set_payload(data, charset)
104        self.assertEqual(msg['content-transfer-encoding'], 'base64')
105        self.assertEqual(msg.get_payload(decode=True), data)
106        self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
107
108    def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
109        data = b'\xd0\x90\xd0\x91\xd0\x92'
110        charset = Charset('utf-8')
111        charset.body_encoding = None # Disable base64 encoding
112        msg = Message()
113        msg.set_payload(data.decode('utf-8'), charset)
114        self.assertEqual(msg['content-transfer-encoding'], '8bit')
115        self.assertEqual(msg.get_payload(decode=True), data)
116
117    def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
118        data = b'\xd0\x90\xd0\x91\xd0\x92'
119        charset = Charset('utf-8')
120        charset.body_encoding = None # Disable base64 encoding
121        msg = Message()
122        msg.set_payload(data, charset)
123        self.assertEqual(msg['content-transfer-encoding'], '8bit')
124        self.assertEqual(msg.get_payload(decode=True), data)
125
126    def test_set_payload_to_list(self):
127        msg = Message()
128        msg.set_payload([])
129        self.assertEqual(msg.get_payload(), [])
130
131    def test_attach_when_payload_is_string(self):
132        msg = Message()
133        msg['Content-Type'] = 'multipart/mixed'
134        msg.set_payload('string payload')
135        sub_msg = MIMEMessage(Message())
136        self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart",
137                               msg.attach, sub_msg)
138
139    def test_get_charsets(self):
140        eq = self.assertEqual
141
142        msg = self._msgobj('msg_08.txt')
143        charsets = msg.get_charsets()
144        eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
145
146        msg = self._msgobj('msg_09.txt')
147        charsets = msg.get_charsets('dingbat')
148        eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
149                      'koi8-r'])
150
151        msg = self._msgobj('msg_12.txt')
152        charsets = msg.get_charsets()
153        eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
154                      'iso-8859-3', 'us-ascii', 'koi8-r'])
155
156    def test_get_filename(self):
157        eq = self.assertEqual
158
159        msg = self._msgobj('msg_04.txt')
160        filenames = [p.get_filename() for p in msg.get_payload()]
161        eq(filenames, ['msg.txt', 'msg.txt'])
162
163        msg = self._msgobj('msg_07.txt')
164        subpart = msg.get_payload(1)
165        eq(subpart.get_filename(), 'dingusfish.gif')
166
167    def test_get_filename_with_name_parameter(self):
168        eq = self.assertEqual
169
170        msg = self._msgobj('msg_44.txt')
171        filenames = [p.get_filename() for p in msg.get_payload()]
172        eq(filenames, ['msg.txt', 'msg.txt'])
173
174    def test_get_boundary(self):
175        eq = self.assertEqual
176        msg = self._msgobj('msg_07.txt')
177        # No quotes!
178        eq(msg.get_boundary(), 'BOUNDARY')
179
180    def test_set_boundary(self):
181        eq = self.assertEqual
182        # This one has no existing boundary parameter, but the Content-Type:
183        # header appears fifth.
184        msg = self._msgobj('msg_01.txt')
185        msg.set_boundary('BOUNDARY')
186        header, value = msg.items()[4]
187        eq(header.lower(), 'content-type')
188        eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
189        # This one has a Content-Type: header, with a boundary, stuck in the
190        # middle of its headers.  Make sure the order is preserved; it should
191        # be fifth.
192        msg = self._msgobj('msg_04.txt')
193        msg.set_boundary('BOUNDARY')
194        header, value = msg.items()[4]
195        eq(header.lower(), 'content-type')
196        eq(value, 'multipart/mixed; boundary="BOUNDARY"')
197        # And this one has no Content-Type: header at all.
198        msg = self._msgobj('msg_03.txt')
199        self.assertRaises(errors.HeaderParseError,
200                          msg.set_boundary, 'BOUNDARY')
201
202    def test_make_boundary(self):
203        msg = MIMEMultipart('form-data')
204        # Note that when the boundary gets created is an implementation
205        # detail and might change.
206        self.assertEqual(msg.items()[0][1], 'multipart/form-data')
207        # Trigger creation of boundary
208        msg.as_string()
209        self.assertEqual(msg.items()[0][1][:33],
210                        'multipart/form-data; boundary="==')
211        # XXX: there ought to be tests of the uniqueness of the boundary, too.
212
213    def test_message_rfc822_only(self):
214        # Issue 7970: message/rfc822 not in multipart parsed by
215        # HeaderParser caused an exception when flattened.
216        with openfile('msg_46.txt') as fp:
217            msgdata = fp.read()
218        parser = HeaderParser()
219        msg = parser.parsestr(msgdata)
220        out = StringIO()
221        gen = Generator(out, True, 0)
222        gen.flatten(msg, False)
223        self.assertEqual(out.getvalue(), msgdata)
224
225    def test_byte_message_rfc822_only(self):
226        # Make sure new bytes header parser also passes this.
227        with openfile('msg_46.txt') as fp:
228            msgdata = fp.read().encode('ascii')
229        parser = email.parser.BytesHeaderParser()
230        msg = parser.parsebytes(msgdata)
231        out = BytesIO()
232        gen = email.generator.BytesGenerator(out)
233        gen.flatten(msg)
234        self.assertEqual(out.getvalue(), msgdata)
235
236    def test_get_decoded_payload(self):
237        eq = self.assertEqual
238        msg = self._msgobj('msg_10.txt')
239        # The outer message is a multipart
240        eq(msg.get_payload(decode=True), None)
241        # Subpart 1 is 7bit encoded
242        eq(msg.get_payload(0).get_payload(decode=True),
243           b'This is a 7bit encoded message.\n')
244        # Subpart 2 is quopri
245        eq(msg.get_payload(1).get_payload(decode=True),
246           b'\xa1This is a Quoted Printable encoded message!\n')
247        # Subpart 3 is base64
248        eq(msg.get_payload(2).get_payload(decode=True),
249           b'This is a Base64 encoded message.')
250        # Subpart 4 is base64 with a trailing newline, which
251        # used to be stripped (issue 7143).
252        eq(msg.get_payload(3).get_payload(decode=True),
253           b'This is a Base64 encoded message.\n')
254        # Subpart 5 has no Content-Transfer-Encoding: header.
255        eq(msg.get_payload(4).get_payload(decode=True),
256           b'This has no Content-Transfer-Encoding: header.\n')
257
258    def test_get_decoded_uu_payload(self):
259        eq = self.assertEqual
260        msg = Message()
261        msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
262        for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
263            msg['content-transfer-encoding'] = cte
264            eq(msg.get_payload(decode=True), b'hello world')
265        # Now try some bogus data
266        msg.set_payload('foo')
267        eq(msg.get_payload(decode=True), b'foo')
268
269    def test_get_payload_n_raises_on_non_multipart(self):
270        msg = Message()
271        self.assertRaises(TypeError, msg.get_payload, 1)
272
273    def test_decoded_generator(self):
274        eq = self.assertEqual
275        msg = self._msgobj('msg_07.txt')
276        with openfile('msg_17.txt') as fp:
277            text = fp.read()
278        s = StringIO()
279        g = DecodedGenerator(s)
280        g.flatten(msg)
281        eq(s.getvalue(), text)
282
283    def test__contains__(self):
284        msg = Message()
285        msg['From'] = 'Me'
286        msg['to'] = 'You'
287        # Check for case insensitivity
288        self.assertIn('from', msg)
289        self.assertIn('From', msg)
290        self.assertIn('FROM', msg)
291        self.assertIn('to', msg)
292        self.assertIn('To', msg)
293        self.assertIn('TO', msg)
294
295    def test_as_string(self):
296        msg = self._msgobj('msg_01.txt')
297        with openfile('msg_01.txt') as fp:
298            text = fp.read()
299        self.assertEqual(text, str(msg))
300        fullrepr = msg.as_string(unixfrom=True)
301        lines = fullrepr.split('\n')
302        self.assertTrue(lines[0].startswith('From '))
303        self.assertEqual(text, NL.join(lines[1:]))
304
305    def test_as_string_policy(self):
306        msg = self._msgobj('msg_01.txt')
307        newpolicy = msg.policy.clone(linesep='\r\n')
308        fullrepr = msg.as_string(policy=newpolicy)
309        s = StringIO()
310        g = Generator(s, policy=newpolicy)
311        g.flatten(msg)
312        self.assertEqual(fullrepr, s.getvalue())
313
314    def test_nonascii_as_string_without_cte(self):
315        m = textwrap.dedent("""\
316            MIME-Version: 1.0
317            Content-type: text/plain; charset="iso-8859-1"
318
319            Test if non-ascii messages with no Content-Transfer-Encoding set
320            can be as_string'd:
321            Föö bär
322            """)
323        source = m.encode('iso-8859-1')
324        expected = textwrap.dedent("""\
325            MIME-Version: 1.0
326            Content-type: text/plain; charset="iso-8859-1"
327            Content-Transfer-Encoding: quoted-printable
328
329            Test if non-ascii messages with no Content-Transfer-Encoding set
330            can be as_string'd:
331            F=F6=F6 b=E4r
332            """)
333        msg = email.message_from_bytes(source)
334        self.assertEqual(msg.as_string(), expected)
335
336    def test_nonascii_as_string_without_content_type_and_cte(self):
337        m = textwrap.dedent("""\
338            MIME-Version: 1.0
339
340            Test if non-ascii messages with no Content-Type nor
341            Content-Transfer-Encoding set can be as_string'd:
342            Föö bär
343            """)
344        source = m.encode('iso-8859-1')
345        expected = source.decode('ascii', 'replace')
346        msg = email.message_from_bytes(source)
347        self.assertEqual(msg.as_string(), expected)
348
349    def test_as_bytes(self):
350        msg = self._msgobj('msg_01.txt')
351        with openfile('msg_01.txt') as fp:
352            data = fp.read().encode('ascii')
353        self.assertEqual(data, bytes(msg))
354        fullrepr = msg.as_bytes(unixfrom=True)
355        lines = fullrepr.split(b'\n')
356        self.assertTrue(lines[0].startswith(b'From '))
357        self.assertEqual(data, b'\n'.join(lines[1:]))
358
359    def test_as_bytes_policy(self):
360        msg = self._msgobj('msg_01.txt')
361        newpolicy = msg.policy.clone(linesep='\r\n')
362        fullrepr = msg.as_bytes(policy=newpolicy)
363        s = BytesIO()
364        g = BytesGenerator(s,policy=newpolicy)
365        g.flatten(msg)
366        self.assertEqual(fullrepr, s.getvalue())
367
368    # test_headerregistry.TestContentTypeHeader.bad_params
369    def test_bad_param(self):
370        msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
371        self.assertEqual(msg.get_param('baz'), '')
372
373    def test_missing_filename(self):
374        msg = email.message_from_string("From: foo\n")
375        self.assertEqual(msg.get_filename(), None)
376
377    def test_bogus_filename(self):
378        msg = email.message_from_string(
379        "Content-Disposition: blarg; filename\n")
380        self.assertEqual(msg.get_filename(), '')
381
382    def test_missing_boundary(self):
383        msg = email.message_from_string("From: foo\n")
384        self.assertEqual(msg.get_boundary(), None)
385
386    def test_get_params(self):
387        eq = self.assertEqual
388        msg = email.message_from_string(
389            'X-Header: foo=one; bar=two; baz=three\n')
390        eq(msg.get_params(header='x-header'),
391           [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
392        msg = email.message_from_string(
393            'X-Header: foo; bar=one; baz=two\n')
394        eq(msg.get_params(header='x-header'),
395           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
396        eq(msg.get_params(), None)
397        msg = email.message_from_string(
398            'X-Header: foo; bar="one"; baz=two\n')
399        eq(msg.get_params(header='x-header'),
400           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
401
402    # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
403    def test_get_param_liberal(self):
404        msg = Message()
405        msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
406        self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
407
408    def test_get_param(self):
409        eq = self.assertEqual
410        msg = email.message_from_string(
411            "X-Header: foo=one; bar=two; baz=three\n")
412        eq(msg.get_param('bar', header='x-header'), 'two')
413        eq(msg.get_param('quuz', header='x-header'), None)
414        eq(msg.get_param('quuz'), None)
415        msg = email.message_from_string(
416            'X-Header: foo; bar="one"; baz=two\n')
417        eq(msg.get_param('foo', header='x-header'), '')
418        eq(msg.get_param('bar', header='x-header'), 'one')
419        eq(msg.get_param('baz', header='x-header'), 'two')
420        # XXX: We are not RFC-2045 compliant!  We cannot parse:
421        # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
422        # msg.get_param("weird")
423        # yet.
424
425    # test_headerregistry.TestContentTypeHeader.spaces_around_semis
426    def test_get_param_funky_continuation_lines(self):
427        msg = self._msgobj('msg_22.txt')
428        self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
429
430    # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
431    def test_get_param_with_semis_in_quotes(self):
432        msg = email.message_from_string(
433            'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
434        self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
435        self.assertEqual(msg.get_param('name', unquote=False),
436                         '"Jim&amp;&amp;Jill"')
437
438    # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
439    def test_get_param_with_quotes(self):
440        msg = email.message_from_string(
441            'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
442        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
443        msg = email.message_from_string(
444            "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
445        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
446
447    def test_field_containment(self):
448        msg = email.message_from_string('Header: exists')
449        self.assertIn('header', msg)
450        self.assertIn('Header', msg)
451        self.assertIn('HEADER', msg)
452        self.assertNotIn('headerx', msg)
453
454    def test_set_param(self):
455        eq = self.assertEqual
456        msg = Message()
457        msg.set_param('charset', 'iso-2022-jp')
458        eq(msg.get_param('charset'), 'iso-2022-jp')
459        msg.set_param('importance', 'high value')
460        eq(msg.get_param('importance'), 'high value')
461        eq(msg.get_param('importance', unquote=False), '"high value"')
462        eq(msg.get_params(), [('text/plain', ''),
463                              ('charset', 'iso-2022-jp'),
464                              ('importance', 'high value')])
465        eq(msg.get_params(unquote=False), [('text/plain', ''),
466                                       ('charset', '"iso-2022-jp"'),
467                                       ('importance', '"high value"')])
468        msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
469        eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
470
471    def test_del_param(self):
472        eq = self.assertEqual
473        msg = self._msgobj('msg_05.txt')
474        eq(msg.get_params(),
475           [('multipart/report', ''), ('report-type', 'delivery-status'),
476            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
477        old_val = msg.get_param("report-type")
478        msg.del_param("report-type")
479        eq(msg.get_params(),
480           [('multipart/report', ''),
481            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
482        msg.set_param("report-type", old_val)
483        eq(msg.get_params(),
484           [('multipart/report', ''),
485            ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
486            ('report-type', old_val)])
487
488    def test_del_param_on_other_header(self):
489        msg = Message()
490        msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
491        msg.del_param('filename', 'content-disposition')
492        self.assertEqual(msg['content-disposition'], 'attachment')
493
494    def test_del_param_on_nonexistent_header(self):
495        msg = Message()
496        # Deleting param on empty msg should not raise exception.
497        msg.del_param('filename', 'content-disposition')
498
499    def test_del_nonexistent_param(self):
500        msg = Message()
501        msg.add_header('Content-Type', 'text/plain', charset='utf-8')
502        existing_header = msg['Content-Type']
503        msg.del_param('foobar', header='Content-Type')
504        self.assertEqual(msg['Content-Type'], existing_header)
505
506    def test_set_type(self):
507        eq = self.assertEqual
508        msg = Message()
509        self.assertRaises(ValueError, msg.set_type, 'text')
510        msg.set_type('text/plain')
511        eq(msg['content-type'], 'text/plain')
512        msg.set_param('charset', 'us-ascii')
513        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
514        msg.set_type('text/html')
515        eq(msg['content-type'], 'text/html; charset="us-ascii"')
516
517    def test_set_type_on_other_header(self):
518        msg = Message()
519        msg['X-Content-Type'] = 'text/plain'
520        msg.set_type('application/octet-stream', 'X-Content-Type')
521        self.assertEqual(msg['x-content-type'], 'application/octet-stream')
522
523    def test_get_content_type_missing(self):
524        msg = Message()
525        self.assertEqual(msg.get_content_type(), 'text/plain')
526
527    def test_get_content_type_missing_with_default_type(self):
528        msg = Message()
529        msg.set_default_type('message/rfc822')
530        self.assertEqual(msg.get_content_type(), 'message/rfc822')
531
532    def test_get_content_type_from_message_implicit(self):
533        msg = self._msgobj('msg_30.txt')
534        self.assertEqual(msg.get_payload(0).get_content_type(),
535                         'message/rfc822')
536
537    def test_get_content_type_from_message_explicit(self):
538        msg = self._msgobj('msg_28.txt')
539        self.assertEqual(msg.get_payload(0).get_content_type(),
540                         'message/rfc822')
541
542    def test_get_content_type_from_message_text_plain_implicit(self):
543        msg = self._msgobj('msg_03.txt')
544        self.assertEqual(msg.get_content_type(), 'text/plain')
545
546    def test_get_content_type_from_message_text_plain_explicit(self):
547        msg = self._msgobj('msg_01.txt')
548        self.assertEqual(msg.get_content_type(), 'text/plain')
549
550    def test_get_content_maintype_missing(self):
551        msg = Message()
552        self.assertEqual(msg.get_content_maintype(), 'text')
553
554    def test_get_content_maintype_missing_with_default_type(self):
555        msg = Message()
556        msg.set_default_type('message/rfc822')
557        self.assertEqual(msg.get_content_maintype(), 'message')
558
559    def test_get_content_maintype_from_message_implicit(self):
560        msg = self._msgobj('msg_30.txt')
561        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
562
563    def test_get_content_maintype_from_message_explicit(self):
564        msg = self._msgobj('msg_28.txt')
565        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
566
567    def test_get_content_maintype_from_message_text_plain_implicit(self):
568        msg = self._msgobj('msg_03.txt')
569        self.assertEqual(msg.get_content_maintype(), 'text')
570
571    def test_get_content_maintype_from_message_text_plain_explicit(self):
572        msg = self._msgobj('msg_01.txt')
573        self.assertEqual(msg.get_content_maintype(), 'text')
574
575    def test_get_content_subtype_missing(self):
576        msg = Message()
577        self.assertEqual(msg.get_content_subtype(), 'plain')
578
579    def test_get_content_subtype_missing_with_default_type(self):
580        msg = Message()
581        msg.set_default_type('message/rfc822')
582        self.assertEqual(msg.get_content_subtype(), 'rfc822')
583
584    def test_get_content_subtype_from_message_implicit(self):
585        msg = self._msgobj('msg_30.txt')
586        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
587
588    def test_get_content_subtype_from_message_explicit(self):
589        msg = self._msgobj('msg_28.txt')
590        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
591
592    def test_get_content_subtype_from_message_text_plain_implicit(self):
593        msg = self._msgobj('msg_03.txt')
594        self.assertEqual(msg.get_content_subtype(), 'plain')
595
596    def test_get_content_subtype_from_message_text_plain_explicit(self):
597        msg = self._msgobj('msg_01.txt')
598        self.assertEqual(msg.get_content_subtype(), 'plain')
599
600    def test_get_content_maintype_error(self):
601        msg = Message()
602        msg['Content-Type'] = 'no-slash-in-this-string'
603        self.assertEqual(msg.get_content_maintype(), 'text')
604
605    def test_get_content_subtype_error(self):
606        msg = Message()
607        msg['Content-Type'] = 'no-slash-in-this-string'
608        self.assertEqual(msg.get_content_subtype(), 'plain')
609
610    def test_replace_header(self):
611        eq = self.assertEqual
612        msg = Message()
613        msg.add_header('First', 'One')
614        msg.add_header('Second', 'Two')
615        msg.add_header('Third', 'Three')
616        eq(msg.keys(), ['First', 'Second', 'Third'])
617        eq(msg.values(), ['One', 'Two', 'Three'])
618        msg.replace_header('Second', 'Twenty')
619        eq(msg.keys(), ['First', 'Second', 'Third'])
620        eq(msg.values(), ['One', 'Twenty', 'Three'])
621        msg.add_header('First', 'Eleven')
622        msg.replace_header('First', 'One Hundred')
623        eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
624        eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
625        self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
626
627    def test_get_content_disposition(self):
628        msg = Message()
629        self.assertIsNone(msg.get_content_disposition())
630        msg.add_header('Content-Disposition', 'attachment',
631                       filename='random.avi')
632        self.assertEqual(msg.get_content_disposition(), 'attachment')
633        msg.replace_header('Content-Disposition', 'inline')
634        self.assertEqual(msg.get_content_disposition(), 'inline')
635        msg.replace_header('Content-Disposition', 'InlinE')
636        self.assertEqual(msg.get_content_disposition(), 'inline')
637
638    # test_defect_handling:test_invalid_chars_in_base64_payload
639    def test_broken_base64_payload(self):
640        x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
641        msg = Message()
642        msg['content-type'] = 'audio/x-midi'
643        msg['content-transfer-encoding'] = 'base64'
644        msg.set_payload(x)
645        self.assertEqual(msg.get_payload(decode=True),
646                         (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
647                          b'\xa1\x00p\xf6\xbf\xe9\x0f'))
648        self.assertIsInstance(msg.defects[0],
649                              errors.InvalidBase64CharactersDefect)
650
651    def test_broken_unicode_payload(self):
652        # This test improves coverage but is not a compliance test.
653        # The behavior in this situation is currently undefined by the API.
654        x = 'this is a br\xf6ken thing to do'
655        msg = Message()
656        msg['content-type'] = 'text/plain'
657        msg['content-transfer-encoding'] = '8bit'
658        msg.set_payload(x)
659        self.assertEqual(msg.get_payload(decode=True),
660                         bytes(x, 'raw-unicode-escape'))
661
662    def test_questionable_bytes_payload(self):
663        # This test improves coverage but is not a compliance test,
664        # since it involves poking inside the black box.
665        x = 'this is a quéstionable thing to do'.encode('utf-8')
666        msg = Message()
667        msg['content-type'] = 'text/plain; charset="utf-8"'
668        msg['content-transfer-encoding'] = '8bit'
669        msg._payload = x
670        self.assertEqual(msg.get_payload(decode=True), x)
671
672    # Issue 1078919
673    def test_ascii_add_header(self):
674        msg = Message()
675        msg.add_header('Content-Disposition', 'attachment',
676                       filename='bud.gif')
677        self.assertEqual('attachment; filename="bud.gif"',
678            msg['Content-Disposition'])
679
680    def test_noascii_add_header(self):
681        msg = Message()
682        msg.add_header('Content-Disposition', 'attachment',
683            filename="Fußballer.ppt")
684        self.assertEqual(
685            'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
686            msg['Content-Disposition'])
687
688    def test_nonascii_add_header_via_triple(self):
689        msg = Message()
690        msg.add_header('Content-Disposition', 'attachment',
691            filename=('iso-8859-1', '', 'Fußballer.ppt'))
692        self.assertEqual(
693            'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
694            msg['Content-Disposition'])
695
696    def test_ascii_add_header_with_tspecial(self):
697        msg = Message()
698        msg.add_header('Content-Disposition', 'attachment',
699            filename="windows [filename].ppt")
700        self.assertEqual(
701            'attachment; filename="windows [filename].ppt"',
702            msg['Content-Disposition'])
703
704    def test_nonascii_add_header_with_tspecial(self):
705        msg = Message()
706        msg.add_header('Content-Disposition', 'attachment',
707            filename="Fußballer [filename].ppt")
708        self.assertEqual(
709            "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
710            msg['Content-Disposition'])
711
712    def test_binary_quopri_payload(self):
713        for charset in ('latin-1', 'ascii'):
714            msg = Message()
715            msg['content-type'] = 'text/plain; charset=%s' % charset
716            msg['content-transfer-encoding'] = 'quoted-printable'
717            msg.set_payload(b'foo=e6=96=87bar')
718            self.assertEqual(
719                msg.get_payload(decode=True),
720                b'foo\xe6\x96\x87bar',
721                'get_payload returns wrong result with charset %s.' % charset)
722
723    def test_binary_base64_payload(self):
724        for charset in ('latin-1', 'ascii'):
725            msg = Message()
726            msg['content-type'] = 'text/plain; charset=%s' % charset
727            msg['content-transfer-encoding'] = 'base64'
728            msg.set_payload(b'Zm9v5paHYmFy')
729            self.assertEqual(
730                msg.get_payload(decode=True),
731                b'foo\xe6\x96\x87bar',
732                'get_payload returns wrong result with charset %s.' % charset)
733
734    def test_binary_uuencode_payload(self):
735        for charset in ('latin-1', 'ascii'):
736            for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
737                msg = Message()
738                msg['content-type'] = 'text/plain; charset=%s' % charset
739                msg['content-transfer-encoding'] = encoding
740                msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
741                self.assertEqual(
742                    msg.get_payload(decode=True),
743                    b'foo\xe6\x96\x87bar',
744                    str(('get_payload returns wrong result ',
745                         'with charset {0} and encoding {1}.')).\
746                        format(charset, encoding))
747
748    def test_add_header_with_name_only_param(self):
749        msg = Message()
750        msg.add_header('Content-Disposition', 'inline', foo_bar=None)
751        self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
752
753    def test_add_header_with_no_value(self):
754        msg = Message()
755        msg.add_header('X-Status', None)
756        self.assertEqual('', msg['X-Status'])
757
758    # Issue 5871: reject an attempt to embed a header inside a header value
759    # (header injection attack).
760    def test_embedded_header_via_Header_rejected(self):
761        msg = Message()
762        msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
763        self.assertRaises(errors.HeaderParseError, msg.as_string)
764
765    def test_embedded_header_via_string_rejected(self):
766        msg = Message()
767        msg['Dummy'] = 'dummy\nX-Injected-Header: test'
768        self.assertRaises(errors.HeaderParseError, msg.as_string)
769
770    def test_unicode_header_defaults_to_utf8_encoding(self):
771        # Issue 14291
772        m = MIMEText('abc\n')
773        m['Subject'] = 'É test'
774        self.assertEqual(str(m),textwrap.dedent("""\
775            Content-Type: text/plain; charset="us-ascii"
776            MIME-Version: 1.0
777            Content-Transfer-Encoding: 7bit
778            Subject: =?utf-8?q?=C3=89_test?=
779
780            abc
781            """))
782
783    def test_unicode_body_defaults_to_utf8_encoding(self):
784        # Issue 14291
785        m = MIMEText('É testabc\n')
786        self.assertEqual(str(m),textwrap.dedent("""\
787            Content-Type: text/plain; charset="utf-8"
788            MIME-Version: 1.0
789            Content-Transfer-Encoding: base64
790
791            w4kgdGVzdGFiYwo=
792            """))
793
794
795# Test the email.encoders module
796class TestEncoders(unittest.TestCase):
797
798    def test_EncodersEncode_base64(self):
799        with openfile('PyBanner048.gif', 'rb') as fp:
800            bindata = fp.read()
801        mimed = email.mime.image.MIMEImage(bindata)
802        base64ed = mimed.get_payload()
803        # the transfer-encoded body lines should all be <=76 characters
804        lines = base64ed.split('\n')
805        self.assertLessEqual(max([ len(x) for x in lines ]), 76)
806
807    def test_encode_empty_payload(self):
808        eq = self.assertEqual
809        msg = Message()
810        msg.set_charset('us-ascii')
811        eq(msg['content-transfer-encoding'], '7bit')
812
813    def test_default_cte(self):
814        eq = self.assertEqual
815        # 7bit data and the default us-ascii _charset
816        msg = MIMEText('hello world')
817        eq(msg['content-transfer-encoding'], '7bit')
818        # Similar, but with 8bit data
819        msg = MIMEText('hello \xf8 world')
820        eq(msg['content-transfer-encoding'], 'base64')
821        # And now with a different charset
822        msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
823        eq(msg['content-transfer-encoding'], 'quoted-printable')
824
825    def test_encode7or8bit(self):
826        # Make sure a charset whose input character set is 8bit but
827        # whose output character set is 7bit gets a transfer-encoding
828        # of 7bit.
829        eq = self.assertEqual
830        msg = MIMEText('文\n', _charset='euc-jp')
831        eq(msg['content-transfer-encoding'], '7bit')
832        eq(msg.as_string(), textwrap.dedent("""\
833            MIME-Version: 1.0
834            Content-Type: text/plain; charset="iso-2022-jp"
835            Content-Transfer-Encoding: 7bit
836
837            \x1b$BJ8\x1b(B
838            """))
839
840    def test_qp_encode_latin1(self):
841        msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
842        self.assertEqual(str(msg), textwrap.dedent("""\
843            MIME-Version: 1.0
844            Content-Type: text/text; charset="iso-8859-1"
845            Content-Transfer-Encoding: quoted-printable
846
847            =E1=F6
848            """))
849
850    def test_qp_encode_non_latin1(self):
851        # Issue 16948
852        msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
853        self.assertEqual(str(msg), textwrap.dedent("""\
854            MIME-Version: 1.0
855            Content-Type: text/text; charset="iso-8859-2"
856            Content-Transfer-Encoding: quoted-printable
857
858            =BF
859            """))
860
861
862# Test long header wrapping
863class TestLongHeaders(TestEmailBase):
864
865    maxDiff = None
866
867    def test_split_long_continuation(self):
868        eq = self.ndiffAssertEqual
869        msg = email.message_from_string("""\
870Subject: bug demonstration
871\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
872\tmore text
873
874test
875""")
876        sfp = StringIO()
877        g = Generator(sfp)
878        g.flatten(msg)
879        eq(sfp.getvalue(), """\
880Subject: bug demonstration
881\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
882\tmore text
883
884test
885""")
886
887    def test_another_long_almost_unsplittable_header(self):
888        eq = self.ndiffAssertEqual
889        hstr = """\
890bug demonstration
891\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
892\tmore text"""
893        h = Header(hstr, continuation_ws='\t')
894        eq(h.encode(), """\
895bug demonstration
896\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
897\tmore text""")
898        h = Header(hstr.replace('\t', ' '))
899        eq(h.encode(), """\
900bug demonstration
901 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
902 more text""")
903
904    def test_long_nonstring(self):
905        eq = self.ndiffAssertEqual
906        g = Charset("iso-8859-1")
907        cz = Charset("iso-8859-2")
908        utf8 = Charset("utf-8")
909        g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
910                  b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
911                  b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
912                  b'bef\xf6rdert. ')
913        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
914                   b'd\xf9vtipu.. ')
915        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
916                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
917                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
918                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
919                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
920                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
921                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
922                     '\u3044\u307e\u3059\u3002')
923        h = Header(g_head, g, header_name='Subject')
924        h.append(cz_head, cz)
925        h.append(utf8_head, utf8)
926        msg = Message()
927        msg['Subject'] = h
928        sfp = StringIO()
929        g = Generator(sfp)
930        g.flatten(msg)
931        eq(sfp.getvalue(), """\
932Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
933 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
934 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
935 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
936 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
937 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
938 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
939 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
940 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
941 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
942 =?utf-8?b?44CC?=
943
944""")
945        eq(h.encode(maxlinelen=76), """\
946=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
947 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
948 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
949 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
950 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
951 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
952 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
953 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
954 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
955 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
956 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
957
958    def test_long_header_encode(self):
959        eq = self.ndiffAssertEqual
960        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
961                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
962                   header_name='X-Foobar-Spoink-Defrobnit')
963        eq(h.encode(), '''\
964wasnipoop; giraffes="very-long-necked-animals";
965 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
966
967    def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
968        eq = self.ndiffAssertEqual
969        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
970                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
971                   header_name='X-Foobar-Spoink-Defrobnit',
972                   continuation_ws='\t')
973        eq(h.encode(), '''\
974wasnipoop; giraffes="very-long-necked-animals";
975 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
976
977    def test_long_header_encode_with_tab_continuation(self):
978        eq = self.ndiffAssertEqual
979        h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
980                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
981                   header_name='X-Foobar-Spoink-Defrobnit',
982                   continuation_ws='\t')
983        eq(h.encode(), '''\
984wasnipoop; giraffes="very-long-necked-animals";
985\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
986
987    def test_header_encode_with_different_output_charset(self):
988        h = Header('文', 'euc-jp')
989        self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
990
991    def test_long_header_encode_with_different_output_charset(self):
992        h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
993            b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
994            b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
995            b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
996        res = """\
997=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
998 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
999        self.assertEqual(h.encode(), res)
1000
1001    def test_header_splitter(self):
1002        eq = self.ndiffAssertEqual
1003        msg = MIMEText('')
1004        # It'd be great if we could use add_header() here, but that doesn't
1005        # guarantee an order of the parameters.
1006        msg['X-Foobar-Spoink-Defrobnit'] = (
1007            'wasnipoop; giraffes="very-long-necked-animals"; '
1008            'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
1009        sfp = StringIO()
1010        g = Generator(sfp)
1011        g.flatten(msg)
1012        eq(sfp.getvalue(), '''\
1013Content-Type: text/plain; charset="us-ascii"
1014MIME-Version: 1.0
1015Content-Transfer-Encoding: 7bit
1016X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
1017 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
1018
1019''')
1020
1021    def test_no_semis_header_splitter(self):
1022        eq = self.ndiffAssertEqual
1023        msg = Message()
1024        msg['From'] = 'test@dom.ain'
1025        msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
1026        msg.set_payload('Test')
1027        sfp = StringIO()
1028        g = Generator(sfp)
1029        g.flatten(msg)
1030        eq(sfp.getvalue(), """\
1031From: test@dom.ain
1032References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
1033 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
1034
1035Test""")
1036
1037    def test_last_split_chunk_does_not_fit(self):
1038        eq = self.ndiffAssertEqual
1039        h = Header('Subject: the first part of this is short, but_the_second'
1040            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1041            '_all_by_itself')
1042        eq(h.encode(), """\
1043Subject: the first part of this is short,
1044 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1045
1046    def test_splittable_leading_char_followed_by_overlong_unsplittable(self):
1047        eq = self.ndiffAssertEqual
1048        h = Header(', but_the_second'
1049            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1050            '_all_by_itself')
1051        eq(h.encode(), """\
1052,
1053 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1054
1055    def test_multiple_splittable_leading_char_followed_by_overlong_unsplittable(self):
1056        eq = self.ndiffAssertEqual
1057        h = Header(', , but_the_second'
1058            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1059            '_all_by_itself')
1060        eq(h.encode(), """\
1061, ,
1062 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1063
1064    def test_trailing_splittable_on_overlong_unsplittable(self):
1065        eq = self.ndiffAssertEqual
1066        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1067            'be_on_a_line_all_by_itself;')
1068        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
1069            "be_on_a_line_all_by_itself;")
1070
1071    def test_trailing_splittable_on_overlong_unsplittable_with_leading_splittable(self):
1072        eq = self.ndiffAssertEqual
1073        h = Header('; '
1074            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1075            'be_on_a_line_all_by_itself; ')
1076        eq(h.encode(), """\
1077;
1078 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1079
1080    def test_long_header_with_multiple_sequential_split_chars(self):
1081        eq = self.ndiffAssertEqual
1082        h = Header('This is a long line that has two whitespaces  in a row.  '
1083            'This used to cause truncation of the header when folded')
1084        eq(h.encode(), """\
1085This is a long line that has two whitespaces  in a row.  This used to cause
1086 truncation of the header when folded""")
1087
1088    def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
1089        eq = self.ndiffAssertEqual
1090        h = Header('thisverylongheaderhas;semicolons;and,commas,but'
1091            'they;arenotlegal;fold,points')
1092        eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
1093                        "arenotlegal;fold,points")
1094
1095    def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
1096        eq = self.ndiffAssertEqual
1097        h = Header('this is a  test where we need to have more than one line '
1098            'before; our final line that is just too big to fit;; '
1099            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1100            'be_on_a_line_all_by_itself;')
1101        eq(h.encode(), """\
1102this is a  test where we need to have more than one line before;
1103 our final line that is just too big to fit;;
1104 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1105
1106    def test_overlong_last_part_followed_by_split_point(self):
1107        eq = self.ndiffAssertEqual
1108        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1109            'be_on_a_line_all_by_itself ')
1110        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1111                        "should_be_on_a_line_all_by_itself ")
1112
1113    def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1114        eq = self.ndiffAssertEqual
1115        h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1116            'before_our_final_line_; ; '
1117            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1118            'be_on_a_line_all_by_itself; ')
1119        eq(h.encode(), """\
1120this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1121 ;
1122 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1123
1124    def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1125        eq = self.ndiffAssertEqual
1126        h = Header('this is a test where we need to have more than one line '
1127            'before our final line; ; '
1128            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1129            'be_on_a_line_all_by_itself; ')
1130        eq(h.encode(), """\
1131this is a test where we need to have more than one line before our final line;
1132 ;
1133 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1134
1135    def test_long_header_with_whitespace_runs(self):
1136        eq = self.ndiffAssertEqual
1137        msg = Message()
1138        msg['From'] = 'test@dom.ain'
1139        msg['References'] = SPACE.join(['<foo@dom.ain>  '] * 10)
1140        msg.set_payload('Test')
1141        sfp = StringIO()
1142        g = Generator(sfp)
1143        g.flatten(msg)
1144        eq(sfp.getvalue(), """\
1145From: test@dom.ain
1146References: <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1147   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>   <foo@dom.ain>
1148   <foo@dom.ain>   <foo@dom.ain>\x20\x20
1149
1150Test""")
1151
1152    def test_long_run_with_semi_header_splitter(self):
1153        eq = self.ndiffAssertEqual
1154        msg = Message()
1155        msg['From'] = 'test@dom.ain'
1156        msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1157        msg.set_payload('Test')
1158        sfp = StringIO()
1159        g = Generator(sfp)
1160        g.flatten(msg)
1161        eq(sfp.getvalue(), """\
1162From: test@dom.ain
1163References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1164 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1165 <foo@dom.ain>; abc
1166
1167Test""")
1168
1169    def test_splitter_split_on_punctuation_only_if_fws(self):
1170        eq = self.ndiffAssertEqual
1171        msg = Message()
1172        msg['From'] = 'test@dom.ain'
1173        msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1174            'they;arenotlegal;fold,points')
1175        msg.set_payload('Test')
1176        sfp = StringIO()
1177        g = Generator(sfp)
1178        g.flatten(msg)
1179        # XXX the space after the header should not be there.
1180        eq(sfp.getvalue(), """\
1181From: test@dom.ain
1182References:\x20
1183 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1184
1185Test""")
1186
1187    def test_no_split_long_header(self):
1188        eq = self.ndiffAssertEqual
1189        hstr = 'References: ' + 'x' * 80
1190        h = Header(hstr)
1191        # These come on two lines because Headers are really field value
1192        # classes and don't really know about their field names.
1193        eq(h.encode(), """\
1194References:
1195 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1196        h = Header('x' * 80)
1197        eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
1198
1199    def test_splitting_multiple_long_lines(self):
1200        eq = self.ndiffAssertEqual
1201        hstr = """\
1202from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1203\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1204\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1205"""
1206        h = Header(hstr, continuation_ws='\t')
1207        eq(h.encode(), """\
1208from babylon.socal-raves.org (localhost [127.0.0.1]);
1209 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1210 for <mailman-admin@babylon.socal-raves.org>;
1211 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1212\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1213 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1214 for <mailman-admin@babylon.socal-raves.org>;
1215 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1216\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1217 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1218 for <mailman-admin@babylon.socal-raves.org>;
1219 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1220
1221    def test_splitting_first_line_only_is_long(self):
1222        eq = self.ndiffAssertEqual
1223        hstr = """\
1224from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1225\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1226\tid 17k4h5-00034i-00
1227\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1228        h = Header(hstr, maxlinelen=78, header_name='Received',
1229                   continuation_ws='\t')
1230        eq(h.encode(), """\
1231from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1232 helo=cthulhu.gerg.ca)
1233\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1234\tid 17k4h5-00034i-00
1235\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1236
1237    def test_long_8bit_header(self):
1238        eq = self.ndiffAssertEqual
1239        msg = Message()
1240        h = Header('Britische Regierung gibt', 'iso-8859-1',
1241                    header_name='Subject')
1242        h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
1243        eq(h.encode(maxlinelen=76), """\
1244=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1245 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
1246        msg['Subject'] = h
1247        eq(msg.as_string(maxheaderlen=76), """\
1248Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1249 =?iso-8859-1?q?hore-Windkraftprojekte?=
1250
1251""")
1252        eq(msg.as_string(maxheaderlen=0), """\
1253Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
1254
1255""")
1256
1257    def test_long_8bit_header_no_charset(self):
1258        eq = self.ndiffAssertEqual
1259        msg = Message()
1260        header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1261                         'f\xfcr Offshore-Windkraftprojekte '
1262                         '<a-very-long-address@example.com>')
1263        msg['Reply-To'] = header_string
1264        eq(msg.as_string(maxheaderlen=78), """\
1265Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1266 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1267
1268""")
1269        msg = Message()
1270        msg['Reply-To'] = Header(header_string,
1271                                 header_name='Reply-To')
1272        eq(msg.as_string(maxheaderlen=78), """\
1273Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1274 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1275
1276""")
1277
1278    def test_long_to_header(self):
1279        eq = self.ndiffAssertEqual
1280        to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
1281              '<someone@eecs.umich.edu>, '
1282              '"Someone Test #B" <someone@umich.edu>, '
1283              '"Someone Test #C" <someone@eecs.umich.edu>, '
1284              '"Someone Test #D" <someone@eecs.umich.edu>')
1285        msg = Message()
1286        msg['To'] = to
1287        eq(msg.as_string(maxheaderlen=78), '''\
1288To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
1289 "Someone Test #B" <someone@umich.edu>,
1290 "Someone Test #C" <someone@eecs.umich.edu>,
1291 "Someone Test #D" <someone@eecs.umich.edu>
1292
1293''')
1294
1295    def test_long_line_after_append(self):
1296        eq = self.ndiffAssertEqual
1297        s = 'This is an example of string which has almost the limit of header length.'
1298        h = Header(s)
1299        h.append('Add another line.')
1300        eq(h.encode(maxlinelen=76), """\
1301This is an example of string which has almost the limit of header length.
1302 Add another line.""")
1303
1304    def test_shorter_line_with_append(self):
1305        eq = self.ndiffAssertEqual
1306        s = 'This is a shorter line.'
1307        h = Header(s)
1308        h.append('Add another sentence. (Surprise?)')
1309        eq(h.encode(),
1310           'This is a shorter line. Add another sentence. (Surprise?)')
1311
1312    def test_long_field_name(self):
1313        eq = self.ndiffAssertEqual
1314        fn = 'X-Very-Very-Very-Long-Header-Name'
1315        gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1316              'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1317              'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1318              'bef\xf6rdert. ')
1319        h = Header(gs, 'iso-8859-1', header_name=fn)
1320        # BAW: this seems broken because the first line is too long
1321        eq(h.encode(maxlinelen=76), """\
1322=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1323 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1324 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1325 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
1326
1327    def test_long_received_header(self):
1328        h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1329             'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1330             'Wed, 05 Mar 2003 18:10:18 -0700')
1331        msg = Message()
1332        msg['Received-1'] = Header(h, continuation_ws='\t')
1333        msg['Received-2'] = h
1334        # This should be splitting on spaces not semicolons.
1335        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1336Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1337 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1338 Wed, 05 Mar 2003 18:10:18 -0700
1339Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1340 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1341 Wed, 05 Mar 2003 18:10:18 -0700
1342
1343""")
1344
1345    def test_string_headerinst_eq(self):
1346        h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1347             'tu-muenchen.de> (David Bremner\'s message of '
1348             '"Thu, 6 Mar 2003 13:58:21 +0100")')
1349        msg = Message()
1350        msg['Received-1'] = Header(h, header_name='Received-1',
1351                                   continuation_ws='\t')
1352        msg['Received-2'] = h
1353        # XXX The space after the ':' should not be there.
1354        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1355Received-1:\x20
1356 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1357 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1358Received-2:\x20
1359 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1360 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1361
1362""")
1363
1364    def test_long_unbreakable_lines_with_continuation(self):
1365        eq = self.ndiffAssertEqual
1366        msg = Message()
1367        t = """\
1368iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1369 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1370        msg['Face-1'] = t
1371        msg['Face-2'] = Header(t, header_name='Face-2')
1372        msg['Face-3'] = ' ' + t
1373        # XXX This splitting is all wrong.  It the first value line should be
1374        # snug against the field name or the space after the header not there.
1375        eq(msg.as_string(maxheaderlen=78), """\
1376Face-1:\x20
1377 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1378 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1379Face-2:\x20
1380 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1381 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1382Face-3:\x20
1383 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1384 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1385
1386""")
1387
1388    def test_another_long_multiline_header(self):
1389        eq = self.ndiffAssertEqual
1390        m = ('Received: from siimage.com '
1391             '([172.25.1.3]) by zima.siliconimage.com with '
1392             'Microsoft SMTPSVC(5.0.2195.4905); '
1393             'Wed, 16 Oct 2002 07:41:11 -0700')
1394        msg = email.message_from_string(m)
1395        eq(msg.as_string(maxheaderlen=78), '''\
1396Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1397 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
1398
1399''')
1400
1401    def test_long_lines_with_different_header(self):
1402        eq = self.ndiffAssertEqual
1403        h = ('List-Unsubscribe: '
1404             '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1405             '        <mailto:spamassassin-talk-request@lists.sourceforge.net'
1406             '?subject=unsubscribe>')
1407        msg = Message()
1408        msg['List'] = h
1409        msg['List'] = Header(h, header_name='List')
1410        eq(msg.as_string(maxheaderlen=78), """\
1411List: List-Unsubscribe:
1412 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1413        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1414List: List-Unsubscribe:
1415 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1416        <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
1417
1418""")
1419
1420    def test_long_rfc2047_header_with_embedded_fws(self):
1421        h = Header(textwrap.dedent("""\
1422            We're going to pretend this header is in a non-ascii character set
1423            \tto see if line wrapping with encoded words and embedded
1424               folding white space works"""),
1425                   charset='utf-8',
1426                   header_name='Test')
1427        self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1428            =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1429             =?utf-8?q?cter_set?=
1430             =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1431             =?utf-8?q?_folding_white_space_works?=""")+'\n')
1432
1433
1434
1435# Test mangling of "From " lines in the body of a message
1436class TestFromMangling(unittest.TestCase):
1437    def setUp(self):
1438        self.msg = Message()
1439        self.msg['From'] = 'aaa@bbb.org'
1440        self.msg.set_payload("""\
1441From the desk of A.A.A.:
1442Blah blah blah
1443""")
1444
1445    def test_mangled_from(self):
1446        s = StringIO()
1447        g = Generator(s, mangle_from_=True)
1448        g.flatten(self.msg)
1449        self.assertEqual(s.getvalue(), """\
1450From: aaa@bbb.org
1451
1452>From the desk of A.A.A.:
1453Blah blah blah
1454""")
1455
1456    def test_dont_mangle_from(self):
1457        s = StringIO()
1458        g = Generator(s, mangle_from_=False)
1459        g.flatten(self.msg)
1460        self.assertEqual(s.getvalue(), """\
1461From: aaa@bbb.org
1462
1463From the desk of A.A.A.:
1464Blah blah blah
1465""")
1466
1467    def test_mangle_from_in_preamble_and_epilog(self):
1468        s = StringIO()
1469        g = Generator(s, mangle_from_=True)
1470        msg = email.message_from_string(textwrap.dedent("""\
1471            From: foo@bar.com
1472            Mime-Version: 1.0
1473            Content-Type: multipart/mixed; boundary=XXX
1474
1475            From somewhere unknown
1476
1477            --XXX
1478            Content-Type: text/plain
1479
1480            foo
1481
1482            --XXX--
1483
1484            From somewhere unknowable
1485            """))
1486        g.flatten(msg)
1487        self.assertEqual(len([1 for x in s.getvalue().split('\n')
1488                                  if x.startswith('>From ')]), 2)
1489
1490    def test_mangled_from_with_bad_bytes(self):
1491        source = textwrap.dedent("""\
1492            Content-Type: text/plain; charset="utf-8"
1493            MIME-Version: 1.0
1494            Content-Transfer-Encoding: 8bit
1495            From: aaa@bbb.org
1496
1497        """).encode('utf-8')
1498        msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1499        b = BytesIO()
1500        g = BytesGenerator(b, mangle_from_=True)
1501        g.flatten(msg)
1502        self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1503
1504    def test_multipart_with_bad_bytes_in_cte(self):
1505        # bpo30835
1506        source = textwrap.dedent("""\
1507            From: aperson@example.com
1508            Content-Type: multipart/mixed; boundary="1"
1509            Content-Transfer-Encoding: \xc8
1510        """).encode('utf-8')
1511        msg = email.message_from_bytes(source)
1512
1513
1514# Test the basic MIMEAudio class
1515class TestMIMEAudio(unittest.TestCase):
1516    def setUp(self):
1517        with openfile('audiotest.au', 'rb') as fp:
1518            self._audiodata = fp.read()
1519        self._au = MIMEAudio(self._audiodata)
1520
1521    def test_guess_minor_type(self):
1522        self.assertEqual(self._au.get_content_type(), 'audio/basic')
1523
1524    def test_encoding(self):
1525        payload = self._au.get_payload()
1526        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1527                self._audiodata)
1528
1529    def test_checkSetMinor(self):
1530        au = MIMEAudio(self._audiodata, 'fish')
1531        self.assertEqual(au.get_content_type(), 'audio/fish')
1532
1533    def test_add_header(self):
1534        eq = self.assertEqual
1535        self._au.add_header('Content-Disposition', 'attachment',
1536                            filename='audiotest.au')
1537        eq(self._au['content-disposition'],
1538           'attachment; filename="audiotest.au"')
1539        eq(self._au.get_params(header='content-disposition'),
1540           [('attachment', ''), ('filename', 'audiotest.au')])
1541        eq(self._au.get_param('filename', header='content-disposition'),
1542           'audiotest.au')
1543        missing = []
1544        eq(self._au.get_param('attachment', header='content-disposition'), '')
1545        self.assertIs(self._au.get_param('foo', failobj=missing,
1546                                         header='content-disposition'), missing)
1547        # Try some missing stuff
1548        self.assertIs(self._au.get_param('foobar', missing), missing)
1549        self.assertIs(self._au.get_param('attachment', missing,
1550                                         header='foobar'), missing)
1551
1552
1553
1554# Test the basic MIMEImage class
1555class TestMIMEImage(unittest.TestCase):
1556    def setUp(self):
1557        with openfile('PyBanner048.gif', 'rb') as fp:
1558            self._imgdata = fp.read()
1559        self._im = MIMEImage(self._imgdata)
1560
1561    def test_guess_minor_type(self):
1562        self.assertEqual(self._im.get_content_type(), 'image/gif')
1563
1564    def test_encoding(self):
1565        payload = self._im.get_payload()
1566        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1567                self._imgdata)
1568
1569    def test_checkSetMinor(self):
1570        im = MIMEImage(self._imgdata, 'fish')
1571        self.assertEqual(im.get_content_type(), 'image/fish')
1572
1573    def test_add_header(self):
1574        eq = self.assertEqual
1575        self._im.add_header('Content-Disposition', 'attachment',
1576                            filename='dingusfish.gif')
1577        eq(self._im['content-disposition'],
1578           'attachment; filename="dingusfish.gif"')
1579        eq(self._im.get_params(header='content-disposition'),
1580           [('attachment', ''), ('filename', 'dingusfish.gif')])
1581        eq(self._im.get_param('filename', header='content-disposition'),
1582           'dingusfish.gif')
1583        missing = []
1584        eq(self._im.get_param('attachment', header='content-disposition'), '')
1585        self.assertIs(self._im.get_param('foo', failobj=missing,
1586                                         header='content-disposition'), missing)
1587        # Try some missing stuff
1588        self.assertIs(self._im.get_param('foobar', missing), missing)
1589        self.assertIs(self._im.get_param('attachment', missing,
1590                                         header='foobar'), missing)
1591
1592
1593
1594# Test the basic MIMEApplication class
1595class TestMIMEApplication(unittest.TestCase):
1596    def test_headers(self):
1597        eq = self.assertEqual
1598        msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
1599        eq(msg.get_content_type(), 'application/octet-stream')
1600        eq(msg['content-transfer-encoding'], 'base64')
1601
1602    def test_body(self):
1603        eq = self.assertEqual
1604        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1605        msg = MIMEApplication(bytesdata)
1606        # whitespace in the cte encoded block is RFC-irrelevant.
1607        eq(msg.get_payload().strip(), '+vv8/f7/')
1608        eq(msg.get_payload(decode=True), bytesdata)
1609
1610    def test_binary_body_with_encode_7or8bit(self):
1611        # Issue 17171.
1612        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1613        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1614        # Treated as a string, this will be invalid code points.
1615        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1616        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1617        self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1618        s = BytesIO()
1619        g = BytesGenerator(s)
1620        g.flatten(msg)
1621        wireform = s.getvalue()
1622        msg2 = email.message_from_bytes(wireform)
1623        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1624        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1625        self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1626
1627    def test_binary_body_with_encode_noop(self):
1628        # Issue 16564: This does not produce an RFC valid message, since to be
1629        # valid it should have a CTE of binary.  But the below works in
1630        # Python2, and is documented as working this way.
1631        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1632        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1633        # Treated as a string, this will be invalid code points.
1634        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1635        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1636        s = BytesIO()
1637        g = BytesGenerator(s)
1638        g.flatten(msg)
1639        wireform = s.getvalue()
1640        msg2 = email.message_from_bytes(wireform)
1641        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1642        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1643
1644    def test_binary_body_with_unicode_linend_encode_noop(self):
1645        # Issue 19003: This is a variation on #16564.
1646        bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff'
1647        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1648        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1649        s = BytesIO()
1650        g = BytesGenerator(s)
1651        g.flatten(msg)
1652        wireform = s.getvalue()
1653        msg2 = email.message_from_bytes(wireform)
1654        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1655
1656    def test_binary_body_with_encode_quopri(self):
1657        # Issue 14360.
1658        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1659        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1660        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1661        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1662        self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1663        s = BytesIO()
1664        g = BytesGenerator(s)
1665        g.flatten(msg)
1666        wireform = s.getvalue()
1667        msg2 = email.message_from_bytes(wireform)
1668        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1669        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1670        self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1671
1672    def test_binary_body_with_encode_base64(self):
1673        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1674        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1675        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1676        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1677        s = BytesIO()
1678        g = BytesGenerator(s)
1679        g.flatten(msg)
1680        wireform = s.getvalue()
1681        msg2 = email.message_from_bytes(wireform)
1682        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1683        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1684
1685
1686# Test the basic MIMEText class
1687class TestMIMEText(unittest.TestCase):
1688    def setUp(self):
1689        self._msg = MIMEText('hello there')
1690
1691    def test_types(self):
1692        eq = self.assertEqual
1693        eq(self._msg.get_content_type(), 'text/plain')
1694        eq(self._msg.get_param('charset'), 'us-ascii')
1695        missing = []
1696        self.assertIs(self._msg.get_param('foobar', missing), missing)
1697        self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1698                      missing)
1699
1700    def test_payload(self):
1701        self.assertEqual(self._msg.get_payload(), 'hello there')
1702        self.assertFalse(self._msg.is_multipart())
1703
1704    def test_charset(self):
1705        eq = self.assertEqual
1706        msg = MIMEText('hello there', _charset='us-ascii')
1707        eq(msg.get_charset().input_charset, 'us-ascii')
1708        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1709        # Also accept a Charset instance
1710        charset = Charset('utf-8')
1711        charset.body_encoding = None
1712        msg = MIMEText('hello there', _charset=charset)
1713        eq(msg.get_charset().input_charset, 'utf-8')
1714        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1715        eq(msg.get_payload(), 'hello there')
1716
1717    def test_7bit_input(self):
1718        eq = self.assertEqual
1719        msg = MIMEText('hello there', _charset='us-ascii')
1720        eq(msg.get_charset().input_charset, 'us-ascii')
1721        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1722
1723    def test_7bit_input_no_charset(self):
1724        eq = self.assertEqual
1725        msg = MIMEText('hello there')
1726        eq(msg.get_charset(), 'us-ascii')
1727        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1728        self.assertIn('hello there', msg.as_string())
1729
1730    def test_utf8_input(self):
1731        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1732        eq = self.assertEqual
1733        msg = MIMEText(teststr, _charset='utf-8')
1734        eq(msg.get_charset().output_charset, 'utf-8')
1735        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1736        eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1737
1738    @unittest.skip("can't fix because of backward compat in email5, "
1739        "will fix in email6")
1740    def test_utf8_input_no_charset(self):
1741        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1742        self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1743
1744
1745
1746# Test complicated multipart/* messages
1747class TestMultipart(TestEmailBase):
1748    def setUp(self):
1749        with openfile('PyBanner048.gif', 'rb') as fp:
1750            data = fp.read()
1751        container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1752        image = MIMEImage(data, name='dingusfish.gif')
1753        image.add_header('content-disposition', 'attachment',
1754                         filename='dingusfish.gif')
1755        intro = MIMEText('''\
1756Hi there,
1757
1758This is the dingus fish.
1759''')
1760        container.attach(intro)
1761        container.attach(image)
1762        container['From'] = 'Barry <barry@digicool.com>'
1763        container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1764        container['Subject'] = 'Here is your dingus fish'
1765
1766        now = 987809702.54848599
1767        timetuple = time.localtime(now)
1768        if timetuple[-1] == 0:
1769            tzsecs = time.timezone
1770        else:
1771            tzsecs = time.altzone
1772        if tzsecs > 0:
1773            sign = '-'
1774        else:
1775            sign = '+'
1776        tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1777        container['Date'] = time.strftime(
1778            '%a, %d %b %Y %H:%M:%S',
1779            time.localtime(now)) + tzoffset
1780        self._msg = container
1781        self._im = image
1782        self._txt = intro
1783
1784    def test_hierarchy(self):
1785        # convenience
1786        eq = self.assertEqual
1787        raises = self.assertRaises
1788        # tests
1789        m = self._msg
1790        self.assertTrue(m.is_multipart())
1791        eq(m.get_content_type(), 'multipart/mixed')
1792        eq(len(m.get_payload()), 2)
1793        raises(IndexError, m.get_payload, 2)
1794        m0 = m.get_payload(0)
1795        m1 = m.get_payload(1)
1796        self.assertIs(m0, self._txt)
1797        self.assertIs(m1, self._im)
1798        eq(m.get_payload(), [m0, m1])
1799        self.assertFalse(m0.is_multipart())
1800        self.assertFalse(m1.is_multipart())
1801
1802    def test_empty_multipart_idempotent(self):
1803        text = """\
1804Content-Type: multipart/mixed; boundary="BOUNDARY"
1805MIME-Version: 1.0
1806Subject: A subject
1807To: aperson@dom.ain
1808From: bperson@dom.ain
1809
1810
1811--BOUNDARY
1812
1813
1814--BOUNDARY--
1815"""
1816        msg = Parser().parsestr(text)
1817        self.ndiffAssertEqual(text, msg.as_string())
1818
1819    def test_no_parts_in_a_multipart_with_none_epilogue(self):
1820        outer = MIMEBase('multipart', 'mixed')
1821        outer['Subject'] = 'A subject'
1822        outer['To'] = 'aperson@dom.ain'
1823        outer['From'] = 'bperson@dom.ain'
1824        outer.set_boundary('BOUNDARY')
1825        self.ndiffAssertEqual(outer.as_string(), '''\
1826Content-Type: multipart/mixed; boundary="BOUNDARY"
1827MIME-Version: 1.0
1828Subject: A subject
1829To: aperson@dom.ain
1830From: bperson@dom.ain
1831
1832--BOUNDARY
1833
1834--BOUNDARY--
1835''')
1836
1837    def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1838        outer = MIMEBase('multipart', 'mixed')
1839        outer['Subject'] = 'A subject'
1840        outer['To'] = 'aperson@dom.ain'
1841        outer['From'] = 'bperson@dom.ain'
1842        outer.preamble = ''
1843        outer.epilogue = ''
1844        outer.set_boundary('BOUNDARY')
1845        self.ndiffAssertEqual(outer.as_string(), '''\
1846Content-Type: multipart/mixed; boundary="BOUNDARY"
1847MIME-Version: 1.0
1848Subject: A subject
1849To: aperson@dom.ain
1850From: bperson@dom.ain
1851
1852
1853--BOUNDARY
1854
1855--BOUNDARY--
1856''')
1857
1858    def test_one_part_in_a_multipart(self):
1859        eq = self.ndiffAssertEqual
1860        outer = MIMEBase('multipart', 'mixed')
1861        outer['Subject'] = 'A subject'
1862        outer['To'] = 'aperson@dom.ain'
1863        outer['From'] = 'bperson@dom.ain'
1864        outer.set_boundary('BOUNDARY')
1865        msg = MIMEText('hello world')
1866        outer.attach(msg)
1867        eq(outer.as_string(), '''\
1868Content-Type: multipart/mixed; boundary="BOUNDARY"
1869MIME-Version: 1.0
1870Subject: A subject
1871To: aperson@dom.ain
1872From: bperson@dom.ain
1873
1874--BOUNDARY
1875Content-Type: text/plain; charset="us-ascii"
1876MIME-Version: 1.0
1877Content-Transfer-Encoding: 7bit
1878
1879hello world
1880--BOUNDARY--
1881''')
1882
1883    def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1884        eq = self.ndiffAssertEqual
1885        outer = MIMEBase('multipart', 'mixed')
1886        outer['Subject'] = 'A subject'
1887        outer['To'] = 'aperson@dom.ain'
1888        outer['From'] = 'bperson@dom.ain'
1889        outer.preamble = ''
1890        msg = MIMEText('hello world')
1891        outer.attach(msg)
1892        outer.set_boundary('BOUNDARY')
1893        eq(outer.as_string(), '''\
1894Content-Type: multipart/mixed; boundary="BOUNDARY"
1895MIME-Version: 1.0
1896Subject: A subject
1897To: aperson@dom.ain
1898From: bperson@dom.ain
1899
1900
1901--BOUNDARY
1902Content-Type: text/plain; charset="us-ascii"
1903MIME-Version: 1.0
1904Content-Transfer-Encoding: 7bit
1905
1906hello world
1907--BOUNDARY--
1908''')
1909
1910
1911    def test_seq_parts_in_a_multipart_with_none_preamble(self):
1912        eq = self.ndiffAssertEqual
1913        outer = MIMEBase('multipart', 'mixed')
1914        outer['Subject'] = 'A subject'
1915        outer['To'] = 'aperson@dom.ain'
1916        outer['From'] = 'bperson@dom.ain'
1917        outer.preamble = None
1918        msg = MIMEText('hello world')
1919        outer.attach(msg)
1920        outer.set_boundary('BOUNDARY')
1921        eq(outer.as_string(), '''\
1922Content-Type: multipart/mixed; boundary="BOUNDARY"
1923MIME-Version: 1.0
1924Subject: A subject
1925To: aperson@dom.ain
1926From: bperson@dom.ain
1927
1928--BOUNDARY
1929Content-Type: text/plain; charset="us-ascii"
1930MIME-Version: 1.0
1931Content-Transfer-Encoding: 7bit
1932
1933hello world
1934--BOUNDARY--
1935''')
1936
1937
1938    def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1939        eq = self.ndiffAssertEqual
1940        outer = MIMEBase('multipart', 'mixed')
1941        outer['Subject'] = 'A subject'
1942        outer['To'] = 'aperson@dom.ain'
1943        outer['From'] = 'bperson@dom.ain'
1944        outer.epilogue = None
1945        msg = MIMEText('hello world')
1946        outer.attach(msg)
1947        outer.set_boundary('BOUNDARY')
1948        eq(outer.as_string(), '''\
1949Content-Type: multipart/mixed; boundary="BOUNDARY"
1950MIME-Version: 1.0
1951Subject: A subject
1952To: aperson@dom.ain
1953From: bperson@dom.ain
1954
1955--BOUNDARY
1956Content-Type: text/plain; charset="us-ascii"
1957MIME-Version: 1.0
1958Content-Transfer-Encoding: 7bit
1959
1960hello world
1961--BOUNDARY--
1962''')
1963
1964
1965    def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1966        eq = self.ndiffAssertEqual
1967        outer = MIMEBase('multipart', 'mixed')
1968        outer['Subject'] = 'A subject'
1969        outer['To'] = 'aperson@dom.ain'
1970        outer['From'] = 'bperson@dom.ain'
1971        outer.epilogue = ''
1972        msg = MIMEText('hello world')
1973        outer.attach(msg)
1974        outer.set_boundary('BOUNDARY')
1975        eq(outer.as_string(), '''\
1976Content-Type: multipart/mixed; boundary="BOUNDARY"
1977MIME-Version: 1.0
1978Subject: A subject
1979To: aperson@dom.ain
1980From: bperson@dom.ain
1981
1982--BOUNDARY
1983Content-Type: text/plain; charset="us-ascii"
1984MIME-Version: 1.0
1985Content-Transfer-Encoding: 7bit
1986
1987hello world
1988--BOUNDARY--
1989''')
1990
1991
1992    def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1993        eq = self.ndiffAssertEqual
1994        outer = MIMEBase('multipart', 'mixed')
1995        outer['Subject'] = 'A subject'
1996        outer['To'] = 'aperson@dom.ain'
1997        outer['From'] = 'bperson@dom.ain'
1998        outer.epilogue = '\n'
1999        msg = MIMEText('hello world')
2000        outer.attach(msg)
2001        outer.set_boundary('BOUNDARY')
2002        eq(outer.as_string(), '''\
2003Content-Type: multipart/mixed; boundary="BOUNDARY"
2004MIME-Version: 1.0
2005Subject: A subject
2006To: aperson@dom.ain
2007From: bperson@dom.ain
2008
2009--BOUNDARY
2010Content-Type: text/plain; charset="us-ascii"
2011MIME-Version: 1.0
2012Content-Transfer-Encoding: 7bit
2013
2014hello world
2015--BOUNDARY--
2016
2017''')
2018
2019    def test_message_external_body(self):
2020        eq = self.assertEqual
2021        msg = self._msgobj('msg_36.txt')
2022        eq(len(msg.get_payload()), 2)
2023        msg1 = msg.get_payload(1)
2024        eq(msg1.get_content_type(), 'multipart/alternative')
2025        eq(len(msg1.get_payload()), 2)
2026        for subpart in msg1.get_payload():
2027            eq(subpart.get_content_type(), 'message/external-body')
2028            eq(len(subpart.get_payload()), 1)
2029            subsubpart = subpart.get_payload(0)
2030            eq(subsubpart.get_content_type(), 'text/plain')
2031
2032    def test_double_boundary(self):
2033        # msg_37.txt is a multipart that contains two dash-boundary's in a
2034        # row.  Our interpretation of RFC 2046 calls for ignoring the second
2035        # and subsequent boundaries.
2036        msg = self._msgobj('msg_37.txt')
2037        self.assertEqual(len(msg.get_payload()), 3)
2038
2039    def test_nested_inner_contains_outer_boundary(self):
2040        eq = self.ndiffAssertEqual
2041        # msg_38.txt has an inner part that contains outer boundaries.  My
2042        # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
2043        # these are illegal and should be interpreted as unterminated inner
2044        # parts.
2045        msg = self._msgobj('msg_38.txt')
2046        sfp = StringIO()
2047        iterators._structure(msg, sfp)
2048        eq(sfp.getvalue(), """\
2049multipart/mixed
2050    multipart/mixed
2051        multipart/alternative
2052            text/plain
2053        text/plain
2054    text/plain
2055    text/plain
2056""")
2057
2058    def test_nested_with_same_boundary(self):
2059        eq = self.ndiffAssertEqual
2060        # msg 39.txt is similarly evil in that it's got inner parts that use
2061        # the same boundary as outer parts.  Again, I believe the way this is
2062        # parsed is closest to the spirit of RFC 2046
2063        msg = self._msgobj('msg_39.txt')
2064        sfp = StringIO()
2065        iterators._structure(msg, sfp)
2066        eq(sfp.getvalue(), """\
2067multipart/mixed
2068    multipart/mixed
2069        multipart/alternative
2070        application/octet-stream
2071        application/octet-stream
2072    text/plain
2073""")
2074
2075    def test_boundary_in_non_multipart(self):
2076        msg = self._msgobj('msg_40.txt')
2077        self.assertEqual(msg.as_string(), '''\
2078MIME-Version: 1.0
2079Content-Type: text/html; boundary="--961284236552522269"
2080
2081----961284236552522269
2082Content-Type: text/html;
2083Content-Transfer-Encoding: 7Bit
2084
2085<html></html>
2086
2087----961284236552522269--
2088''')
2089
2090    def test_boundary_with_leading_space(self):
2091        eq = self.assertEqual
2092        msg = email.message_from_string('''\
2093MIME-Version: 1.0
2094Content-Type: multipart/mixed; boundary="    XXXX"
2095
2096--    XXXX
2097Content-Type: text/plain
2098
2099
2100--    XXXX
2101Content-Type: text/plain
2102
2103--    XXXX--
2104''')
2105        self.assertTrue(msg.is_multipart())
2106        eq(msg.get_boundary(), '    XXXX')
2107        eq(len(msg.get_payload()), 2)
2108
2109    def test_boundary_without_trailing_newline(self):
2110        m = Parser().parsestr("""\
2111Content-Type: multipart/mixed; boundary="===============0012394164=="
2112MIME-Version: 1.0
2113
2114--===============0012394164==
2115Content-Type: image/file1.jpg
2116MIME-Version: 1.0
2117Content-Transfer-Encoding: base64
2118
2119YXNkZg==
2120--===============0012394164==--""")
2121        self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
2122
2123    def test_mimebase_default_policy(self):
2124        m = MIMEBase('multipart', 'mixed')
2125        self.assertIs(m.policy, email.policy.compat32)
2126
2127    def test_mimebase_custom_policy(self):
2128        m = MIMEBase('multipart', 'mixed', policy=email.policy.default)
2129        self.assertIs(m.policy, email.policy.default)
2130
2131# Test some badly formatted messages
2132class TestNonConformant(TestEmailBase):
2133
2134    def test_parse_missing_minor_type(self):
2135        eq = self.assertEqual
2136        msg = self._msgobj('msg_14.txt')
2137        eq(msg.get_content_type(), 'text/plain')
2138        eq(msg.get_content_maintype(), 'text')
2139        eq(msg.get_content_subtype(), 'plain')
2140
2141    # test_defect_handling
2142    def test_same_boundary_inner_outer(self):
2143        msg = self._msgobj('msg_15.txt')
2144        # XXX We can probably eventually do better
2145        inner = msg.get_payload(0)
2146        self.assertTrue(hasattr(inner, 'defects'))
2147        self.assertEqual(len(inner.defects), 1)
2148        self.assertIsInstance(inner.defects[0],
2149                              errors.StartBoundaryNotFoundDefect)
2150
2151    # test_defect_handling
2152    def test_multipart_no_boundary(self):
2153        msg = self._msgobj('msg_25.txt')
2154        self.assertIsInstance(msg.get_payload(), str)
2155        self.assertEqual(len(msg.defects), 2)
2156        self.assertIsInstance(msg.defects[0],
2157                              errors.NoBoundaryInMultipartDefect)
2158        self.assertIsInstance(msg.defects[1],
2159                              errors.MultipartInvariantViolationDefect)
2160
2161    multipart_msg = textwrap.dedent("""\
2162        Date: Wed, 14 Nov 2007 12:56:23 GMT
2163        From: foo@bar.invalid
2164        To: foo@bar.invalid
2165        Subject: Content-Transfer-Encoding: base64 and multipart
2166        MIME-Version: 1.0
2167        Content-Type: multipart/mixed;
2168            boundary="===============3344438784458119861=="{}
2169
2170        --===============3344438784458119861==
2171        Content-Type: text/plain
2172
2173        Test message
2174
2175        --===============3344438784458119861==
2176        Content-Type: application/octet-stream
2177        Content-Transfer-Encoding: base64
2178
2179        YWJj
2180
2181        --===============3344438784458119861==--
2182        """)
2183
2184    # test_defect_handling
2185    def test_multipart_invalid_cte(self):
2186        msg = self._str_msg(
2187            self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2188        self.assertEqual(len(msg.defects), 1)
2189        self.assertIsInstance(msg.defects[0],
2190            errors.InvalidMultipartContentTransferEncodingDefect)
2191
2192    # test_defect_handling
2193    def test_multipart_no_cte_no_defect(self):
2194        msg = self._str_msg(self.multipart_msg.format(''))
2195        self.assertEqual(len(msg.defects), 0)
2196
2197    # test_defect_handling
2198    def test_multipart_valid_cte_no_defect(self):
2199        for cte in ('7bit', '8bit', 'BINary'):
2200            msg = self._str_msg(
2201                self.multipart_msg.format(
2202                    "\nContent-Transfer-Encoding: {}".format(cte)))
2203            self.assertEqual(len(msg.defects), 0)
2204
2205    # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
2206    def test_invalid_content_type(self):
2207        eq = self.assertEqual
2208        neq = self.ndiffAssertEqual
2209        msg = Message()
2210        # RFC 2045, $5.2 says invalid yields text/plain
2211        msg['Content-Type'] = 'text'
2212        eq(msg.get_content_maintype(), 'text')
2213        eq(msg.get_content_subtype(), 'plain')
2214        eq(msg.get_content_type(), 'text/plain')
2215        # Clear the old value and try something /really/ invalid
2216        del msg['content-type']
2217        msg['Content-Type'] = 'foo'
2218        eq(msg.get_content_maintype(), 'text')
2219        eq(msg.get_content_subtype(), 'plain')
2220        eq(msg.get_content_type(), 'text/plain')
2221        # Still, make sure that the message is idempotently generated
2222        s = StringIO()
2223        g = Generator(s)
2224        g.flatten(msg)
2225        neq(s.getvalue(), 'Content-Type: foo\n\n')
2226
2227    def test_no_start_boundary(self):
2228        eq = self.ndiffAssertEqual
2229        msg = self._msgobj('msg_31.txt')
2230        eq(msg.get_payload(), """\
2231--BOUNDARY
2232Content-Type: text/plain
2233
2234message 1
2235
2236--BOUNDARY
2237Content-Type: text/plain
2238
2239message 2
2240
2241--BOUNDARY--
2242""")
2243
2244    def test_no_separating_blank_line(self):
2245        eq = self.ndiffAssertEqual
2246        msg = self._msgobj('msg_35.txt')
2247        eq(msg.as_string(), """\
2248From: aperson@dom.ain
2249To: bperson@dom.ain
2250Subject: here's something interesting
2251
2252counter to RFC 2822, there's no separating newline here
2253""")
2254
2255    # test_defect_handling
2256    def test_lying_multipart(self):
2257        msg = self._msgobj('msg_41.txt')
2258        self.assertTrue(hasattr(msg, 'defects'))
2259        self.assertEqual(len(msg.defects), 2)
2260        self.assertIsInstance(msg.defects[0],
2261                              errors.NoBoundaryInMultipartDefect)
2262        self.assertIsInstance(msg.defects[1],
2263                              errors.MultipartInvariantViolationDefect)
2264
2265    # test_defect_handling
2266    def test_missing_start_boundary(self):
2267        outer = self._msgobj('msg_42.txt')
2268        # The message structure is:
2269        #
2270        # multipart/mixed
2271        #    text/plain
2272        #    message/rfc822
2273        #        multipart/mixed [*]
2274        #
2275        # [*] This message is missing its start boundary
2276        bad = outer.get_payload(1).get_payload(0)
2277        self.assertEqual(len(bad.defects), 1)
2278        self.assertIsInstance(bad.defects[0],
2279                              errors.StartBoundaryNotFoundDefect)
2280
2281    # test_defect_handling
2282    def test_first_line_is_continuation_header(self):
2283        eq = self.assertEqual
2284        m = ' Line 1\nSubject: test\n\nbody'
2285        msg = email.message_from_string(m)
2286        eq(msg.keys(), ['Subject'])
2287        eq(msg.get_payload(), 'body')
2288        eq(len(msg.defects), 1)
2289        self.assertDefectsEqual(msg.defects,
2290                                 [errors.FirstHeaderLineIsContinuationDefect])
2291        eq(msg.defects[0].line, ' Line 1\n')
2292
2293    # test_defect_handling
2294    def test_missing_header_body_separator(self):
2295        # Our heuristic if we see a line that doesn't look like a header (no
2296        # leading whitespace but no ':') is to assume that the blank line that
2297        # separates the header from the body is missing, and to stop parsing
2298        # headers and start parsing the body.
2299        msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2300        self.assertEqual(msg.keys(), ['Subject'])
2301        self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2302        self.assertDefectsEqual(msg.defects,
2303                                [errors.MissingHeaderBodySeparatorDefect])
2304
2305
2306# Test RFC 2047 header encoding and decoding
2307class TestRFC2047(TestEmailBase):
2308    def test_rfc2047_multiline(self):
2309        eq = self.assertEqual
2310        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2311 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2312        dh = decode_header(s)
2313        eq(dh, [
2314            (b'Re: ', None),
2315            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2316            (b' baz foo bar ', None),
2317            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2318        header = make_header(dh)
2319        eq(str(header),
2320           'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
2321        self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
2322Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2323 =?mac-iceland?q?=9Arg=8Cs?=""")
2324
2325    def test_whitespace_keeper_unicode(self):
2326        eq = self.assertEqual
2327        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2328        dh = decode_header(s)
2329        eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2330                (b' Pirard <pirard@dom.ain>', None)])
2331        header = str(make_header(dh))
2332        eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2333
2334    def test_whitespace_keeper_unicode_2(self):
2335        eq = self.assertEqual
2336        s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2337        dh = decode_header(s)
2338        eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2339                (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
2340        hu = str(make_header(dh))
2341        eq(hu, 'The quick brown fox jumped over the lazy dog')
2342
2343    def test_rfc2047_missing_whitespace(self):
2344        s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2345        dh = decode_header(s)
2346        self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2347                              (b'rg', None), (b'\xe5', 'iso-8859-1'),
2348                              (b'sbord', None)])
2349
2350    def test_rfc2047_with_whitespace(self):
2351        s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2352        dh = decode_header(s)
2353        self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2354                              (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2355                              (b' sbord', None)])
2356
2357    def test_rfc2047_B_bad_padding(self):
2358        s = '=?iso-8859-1?B?%s?='
2359        data = [                                # only test complete bytes
2360            ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2361            ('dmk=', b'vi'), ('dmk', b'vi')
2362          ]
2363        for q, a in data:
2364            dh = decode_header(s % q)
2365            self.assertEqual(dh, [(a, 'iso-8859-1')])
2366
2367    def test_rfc2047_Q_invalid_digits(self):
2368        # issue 10004.
2369        s = '=?iso-8859-1?Q?andr=e9=zz?='
2370        self.assertEqual(decode_header(s),
2371                        [(b'andr\xe9=zz', 'iso-8859-1')])
2372
2373    def test_rfc2047_rfc2047_1(self):
2374        # 1st testcase at end of rfc2047
2375        s = '(=?ISO-8859-1?Q?a?=)'
2376        self.assertEqual(decode_header(s),
2377            [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2378
2379    def test_rfc2047_rfc2047_2(self):
2380        # 2nd testcase at end of rfc2047
2381        s = '(=?ISO-8859-1?Q?a?= b)'
2382        self.assertEqual(decode_header(s),
2383            [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2384
2385    def test_rfc2047_rfc2047_3(self):
2386        # 3rd testcase at end of rfc2047
2387        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2388        self.assertEqual(decode_header(s),
2389            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2390
2391    def test_rfc2047_rfc2047_4(self):
2392        # 4th testcase at end of rfc2047
2393        s = '(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)'
2394        self.assertEqual(decode_header(s),
2395            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2396
2397    def test_rfc2047_rfc2047_5a(self):
2398        # 5th testcase at end of rfc2047 newline is \r\n
2399        s = '(=?ISO-8859-1?Q?a?=\r\n    =?ISO-8859-1?Q?b?=)'
2400        self.assertEqual(decode_header(s),
2401            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2402
2403    def test_rfc2047_rfc2047_5b(self):
2404        # 5th testcase at end of rfc2047 newline is \n
2405        s = '(=?ISO-8859-1?Q?a?=\n    =?ISO-8859-1?Q?b?=)'
2406        self.assertEqual(decode_header(s),
2407            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2408
2409    def test_rfc2047_rfc2047_6(self):
2410        # 6th testcase at end of rfc2047
2411        s = '(=?ISO-8859-1?Q?a_b?=)'
2412        self.assertEqual(decode_header(s),
2413            [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2414
2415    def test_rfc2047_rfc2047_7(self):
2416        # 7th testcase at end of rfc2047
2417        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2418        self.assertEqual(decode_header(s),
2419            [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2420             (b')', None)])
2421        self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2422        self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2423
2424    def test_multiline_header(self):
2425        s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2426        self.assertEqual(decode_header(s),
2427            [(b'"M\xfcller T"', 'windows-1252'),
2428             (b'<T.Mueller@xxx.com>', None)])
2429        self.assertEqual(make_header(decode_header(s)).encode(),
2430                         ''.join(s.splitlines()))
2431        self.assertEqual(str(make_header(decode_header(s))),
2432                         '"Müller T" <T.Mueller@xxx.com>')
2433
2434
2435# Test the MIMEMessage class
2436class TestMIMEMessage(TestEmailBase):
2437    def setUp(self):
2438        with openfile('msg_11.txt') as fp:
2439            self._text = fp.read()
2440
2441    def test_type_error(self):
2442        self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2443
2444    def test_valid_argument(self):
2445        eq = self.assertEqual
2446        subject = 'A sub-message'
2447        m = Message()
2448        m['Subject'] = subject
2449        r = MIMEMessage(m)
2450        eq(r.get_content_type(), 'message/rfc822')
2451        payload = r.get_payload()
2452        self.assertIsInstance(payload, list)
2453        eq(len(payload), 1)
2454        subpart = payload[0]
2455        self.assertIs(subpart, m)
2456        eq(subpart['subject'], subject)
2457
2458    def test_bad_multipart(self):
2459        msg1 = Message()
2460        msg1['Subject'] = 'subpart 1'
2461        msg2 = Message()
2462        msg2['Subject'] = 'subpart 2'
2463        r = MIMEMessage(msg1)
2464        self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2465
2466    def test_generate(self):
2467        # First craft the message to be encapsulated
2468        m = Message()
2469        m['Subject'] = 'An enclosed message'
2470        m.set_payload('Here is the body of the message.\n')
2471        r = MIMEMessage(m)
2472        r['Subject'] = 'The enclosing message'
2473        s = StringIO()
2474        g = Generator(s)
2475        g.flatten(r)
2476        self.assertEqual(s.getvalue(), """\
2477Content-Type: message/rfc822
2478MIME-Version: 1.0
2479Subject: The enclosing message
2480
2481Subject: An enclosed message
2482
2483Here is the body of the message.
2484""")
2485
2486    def test_parse_message_rfc822(self):
2487        eq = self.assertEqual
2488        msg = self._msgobj('msg_11.txt')
2489        eq(msg.get_content_type(), 'message/rfc822')
2490        payload = msg.get_payload()
2491        self.assertIsInstance(payload, list)
2492        eq(len(payload), 1)
2493        submsg = payload[0]
2494        self.assertIsInstance(submsg, Message)
2495        eq(submsg['subject'], 'An enclosed message')
2496        eq(submsg.get_payload(), 'Here is the body of the message.\n')
2497
2498    def test_dsn(self):
2499        eq = self.assertEqual
2500        # msg 16 is a Delivery Status Notification, see RFC 1894
2501        msg = self._msgobj('msg_16.txt')
2502        eq(msg.get_content_type(), 'multipart/report')
2503        self.assertTrue(msg.is_multipart())
2504        eq(len(msg.get_payload()), 3)
2505        # Subpart 1 is a text/plain, human readable section
2506        subpart = msg.get_payload(0)
2507        eq(subpart.get_content_type(), 'text/plain')
2508        eq(subpart.get_payload(), """\
2509This report relates to a message you sent with the following header fields:
2510
2511  Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2512  Date: Sun, 23 Sep 2001 20:10:55 -0700
2513  From: "Ian T. Henry" <henryi@oxy.edu>
2514  To: SoCal Raves <scr@socal-raves.org>
2515  Subject: [scr] yeah for Ians!!
2516
2517Your message cannot be delivered to the following recipients:
2518
2519  Recipient address: jangel1@cougar.noc.ucla.edu
2520  Reason: recipient reached disk quota
2521
2522""")
2523        # Subpart 2 contains the machine parsable DSN information.  It
2524        # consists of two blocks of headers, represented by two nested Message
2525        # objects.
2526        subpart = msg.get_payload(1)
2527        eq(subpart.get_content_type(), 'message/delivery-status')
2528        eq(len(subpart.get_payload()), 2)
2529        # message/delivery-status should treat each block as a bunch of
2530        # headers, i.e. a bunch of Message objects.
2531        dsn1 = subpart.get_payload(0)
2532        self.assertIsInstance(dsn1, Message)
2533        eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2534        eq(dsn1.get_param('dns', header='reporting-mta'), '')
2535        # Try a missing one <wink>
2536        eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2537        dsn2 = subpart.get_payload(1)
2538        self.assertIsInstance(dsn2, Message)
2539        eq(dsn2['action'], 'failed')
2540        eq(dsn2.get_params(header='original-recipient'),
2541           [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2542        eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2543        # Subpart 3 is the original message
2544        subpart = msg.get_payload(2)
2545        eq(subpart.get_content_type(), 'message/rfc822')
2546        payload = subpart.get_payload()
2547        self.assertIsInstance(payload, list)
2548        eq(len(payload), 1)
2549        subsubpart = payload[0]
2550        self.assertIsInstance(subsubpart, Message)
2551        eq(subsubpart.get_content_type(), 'text/plain')
2552        eq(subsubpart['message-id'],
2553           '<002001c144a6$8752e060$56104586@oxy.edu>')
2554
2555    def test_epilogue(self):
2556        eq = self.ndiffAssertEqual
2557        with openfile('msg_21.txt') as fp:
2558            text = fp.read()
2559        msg = Message()
2560        msg['From'] = 'aperson@dom.ain'
2561        msg['To'] = 'bperson@dom.ain'
2562        msg['Subject'] = 'Test'
2563        msg.preamble = 'MIME message'
2564        msg.epilogue = 'End of MIME message\n'
2565        msg1 = MIMEText('One')
2566        msg2 = MIMEText('Two')
2567        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2568        msg.attach(msg1)
2569        msg.attach(msg2)
2570        sfp = StringIO()
2571        g = Generator(sfp)
2572        g.flatten(msg)
2573        eq(sfp.getvalue(), text)
2574
2575    def test_no_nl_preamble(self):
2576        eq = self.ndiffAssertEqual
2577        msg = Message()
2578        msg['From'] = 'aperson@dom.ain'
2579        msg['To'] = 'bperson@dom.ain'
2580        msg['Subject'] = 'Test'
2581        msg.preamble = 'MIME message'
2582        msg.epilogue = ''
2583        msg1 = MIMEText('One')
2584        msg2 = MIMEText('Two')
2585        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2586        msg.attach(msg1)
2587        msg.attach(msg2)
2588        eq(msg.as_string(), """\
2589From: aperson@dom.ain
2590To: bperson@dom.ain
2591Subject: Test
2592Content-Type: multipart/mixed; boundary="BOUNDARY"
2593
2594MIME message
2595--BOUNDARY
2596Content-Type: text/plain; charset="us-ascii"
2597MIME-Version: 1.0
2598Content-Transfer-Encoding: 7bit
2599
2600One
2601--BOUNDARY
2602Content-Type: text/plain; charset="us-ascii"
2603MIME-Version: 1.0
2604Content-Transfer-Encoding: 7bit
2605
2606Two
2607--BOUNDARY--
2608""")
2609
2610    def test_default_type(self):
2611        eq = self.assertEqual
2612        with openfile('msg_30.txt') as fp:
2613            msg = email.message_from_file(fp)
2614        container1 = msg.get_payload(0)
2615        eq(container1.get_default_type(), 'message/rfc822')
2616        eq(container1.get_content_type(), 'message/rfc822')
2617        container2 = msg.get_payload(1)
2618        eq(container2.get_default_type(), 'message/rfc822')
2619        eq(container2.get_content_type(), 'message/rfc822')
2620        container1a = container1.get_payload(0)
2621        eq(container1a.get_default_type(), 'text/plain')
2622        eq(container1a.get_content_type(), 'text/plain')
2623        container2a = container2.get_payload(0)
2624        eq(container2a.get_default_type(), 'text/plain')
2625        eq(container2a.get_content_type(), 'text/plain')
2626
2627    def test_default_type_with_explicit_container_type(self):
2628        eq = self.assertEqual
2629        with openfile('msg_28.txt') as fp:
2630            msg = email.message_from_file(fp)
2631        container1 = msg.get_payload(0)
2632        eq(container1.get_default_type(), 'message/rfc822')
2633        eq(container1.get_content_type(), 'message/rfc822')
2634        container2 = msg.get_payload(1)
2635        eq(container2.get_default_type(), 'message/rfc822')
2636        eq(container2.get_content_type(), 'message/rfc822')
2637        container1a = container1.get_payload(0)
2638        eq(container1a.get_default_type(), 'text/plain')
2639        eq(container1a.get_content_type(), 'text/plain')
2640        container2a = container2.get_payload(0)
2641        eq(container2a.get_default_type(), 'text/plain')
2642        eq(container2a.get_content_type(), 'text/plain')
2643
2644    def test_default_type_non_parsed(self):
2645        eq = self.assertEqual
2646        neq = self.ndiffAssertEqual
2647        # Set up container
2648        container = MIMEMultipart('digest', 'BOUNDARY')
2649        container.epilogue = ''
2650        # Set up subparts
2651        subpart1a = MIMEText('message 1\n')
2652        subpart2a = MIMEText('message 2\n')
2653        subpart1 = MIMEMessage(subpart1a)
2654        subpart2 = MIMEMessage(subpart2a)
2655        container.attach(subpart1)
2656        container.attach(subpart2)
2657        eq(subpart1.get_content_type(), 'message/rfc822')
2658        eq(subpart1.get_default_type(), 'message/rfc822')
2659        eq(subpart2.get_content_type(), 'message/rfc822')
2660        eq(subpart2.get_default_type(), 'message/rfc822')
2661        neq(container.as_string(0), '''\
2662Content-Type: multipart/digest; boundary="BOUNDARY"
2663MIME-Version: 1.0
2664
2665--BOUNDARY
2666Content-Type: message/rfc822
2667MIME-Version: 1.0
2668
2669Content-Type: text/plain; charset="us-ascii"
2670MIME-Version: 1.0
2671Content-Transfer-Encoding: 7bit
2672
2673message 1
2674
2675--BOUNDARY
2676Content-Type: message/rfc822
2677MIME-Version: 1.0
2678
2679Content-Type: text/plain; charset="us-ascii"
2680MIME-Version: 1.0
2681Content-Transfer-Encoding: 7bit
2682
2683message 2
2684
2685--BOUNDARY--
2686''')
2687        del subpart1['content-type']
2688        del subpart1['mime-version']
2689        del subpart2['content-type']
2690        del subpart2['mime-version']
2691        eq(subpart1.get_content_type(), 'message/rfc822')
2692        eq(subpart1.get_default_type(), 'message/rfc822')
2693        eq(subpart2.get_content_type(), 'message/rfc822')
2694        eq(subpart2.get_default_type(), 'message/rfc822')
2695        neq(container.as_string(0), '''\
2696Content-Type: multipart/digest; boundary="BOUNDARY"
2697MIME-Version: 1.0
2698
2699--BOUNDARY
2700
2701Content-Type: text/plain; charset="us-ascii"
2702MIME-Version: 1.0
2703Content-Transfer-Encoding: 7bit
2704
2705message 1
2706
2707--BOUNDARY
2708
2709Content-Type: text/plain; charset="us-ascii"
2710MIME-Version: 1.0
2711Content-Transfer-Encoding: 7bit
2712
2713message 2
2714
2715--BOUNDARY--
2716''')
2717
2718    def test_mime_attachments_in_constructor(self):
2719        eq = self.assertEqual
2720        text1 = MIMEText('')
2721        text2 = MIMEText('')
2722        msg = MIMEMultipart(_subparts=(text1, text2))
2723        eq(len(msg.get_payload()), 2)
2724        eq(msg.get_payload(0), text1)
2725        eq(msg.get_payload(1), text2)
2726
2727    def test_default_multipart_constructor(self):
2728        msg = MIMEMultipart()
2729        self.assertTrue(msg.is_multipart())
2730
2731    def test_multipart_default_policy(self):
2732        msg = MIMEMultipart()
2733        msg['To'] = 'a@b.com'
2734        msg['To'] = 'c@d.com'
2735        self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com'])
2736
2737    def test_multipart_custom_policy(self):
2738        msg = MIMEMultipart(policy=email.policy.default)
2739        msg['To'] = 'a@b.com'
2740        with self.assertRaises(ValueError) as cm:
2741            msg['To'] = 'c@d.com'
2742        self.assertEqual(str(cm.exception),
2743                         'There may be at most 1 To headers in a message')
2744
2745# A general test of parser->model->generator idempotency.  IOW, read a message
2746# in, parse it into a message object tree, then without touching the tree,
2747# regenerate the plain text.  The original text and the transformed text
2748# should be identical.  Note: that we ignore the Unix-From since that may
2749# contain a changed date.
2750class TestIdempotent(TestEmailBase):
2751
2752    linesep = '\n'
2753
2754    def _msgobj(self, filename):
2755        with openfile(filename) as fp:
2756            data = fp.read()
2757        msg = email.message_from_string(data)
2758        return msg, data
2759
2760    def _idempotent(self, msg, text, unixfrom=False):
2761        eq = self.ndiffAssertEqual
2762        s = StringIO()
2763        g = Generator(s, maxheaderlen=0)
2764        g.flatten(msg, unixfrom=unixfrom)
2765        eq(text, s.getvalue())
2766
2767    def test_parse_text_message(self):
2768        eq = self.assertEqual
2769        msg, text = self._msgobj('msg_01.txt')
2770        eq(msg.get_content_type(), 'text/plain')
2771        eq(msg.get_content_maintype(), 'text')
2772        eq(msg.get_content_subtype(), 'plain')
2773        eq(msg.get_params()[1], ('charset', 'us-ascii'))
2774        eq(msg.get_param('charset'), 'us-ascii')
2775        eq(msg.preamble, None)
2776        eq(msg.epilogue, None)
2777        self._idempotent(msg, text)
2778
2779    def test_parse_untyped_message(self):
2780        eq = self.assertEqual
2781        msg, text = self._msgobj('msg_03.txt')
2782        eq(msg.get_content_type(), 'text/plain')
2783        eq(msg.get_params(), None)
2784        eq(msg.get_param('charset'), None)
2785        self._idempotent(msg, text)
2786
2787    def test_simple_multipart(self):
2788        msg, text = self._msgobj('msg_04.txt')
2789        self._idempotent(msg, text)
2790
2791    def test_MIME_digest(self):
2792        msg, text = self._msgobj('msg_02.txt')
2793        self._idempotent(msg, text)
2794
2795    def test_long_header(self):
2796        msg, text = self._msgobj('msg_27.txt')
2797        self._idempotent(msg, text)
2798
2799    def test_MIME_digest_with_part_headers(self):
2800        msg, text = self._msgobj('msg_28.txt')
2801        self._idempotent(msg, text)
2802
2803    def test_mixed_with_image(self):
2804        msg, text = self._msgobj('msg_06.txt')
2805        self._idempotent(msg, text)
2806
2807    def test_multipart_report(self):
2808        msg, text = self._msgobj('msg_05.txt')
2809        self._idempotent(msg, text)
2810
2811    def test_dsn(self):
2812        msg, text = self._msgobj('msg_16.txt')
2813        self._idempotent(msg, text)
2814
2815    def test_preamble_epilogue(self):
2816        msg, text = self._msgobj('msg_21.txt')
2817        self._idempotent(msg, text)
2818
2819    def test_multipart_one_part(self):
2820        msg, text = self._msgobj('msg_23.txt')
2821        self._idempotent(msg, text)
2822
2823    def test_multipart_no_parts(self):
2824        msg, text = self._msgobj('msg_24.txt')
2825        self._idempotent(msg, text)
2826
2827    def test_no_start_boundary(self):
2828        msg, text = self._msgobj('msg_31.txt')
2829        self._idempotent(msg, text)
2830
2831    def test_rfc2231_charset(self):
2832        msg, text = self._msgobj('msg_32.txt')
2833        self._idempotent(msg, text)
2834
2835    def test_more_rfc2231_parameters(self):
2836        msg, text = self._msgobj('msg_33.txt')
2837        self._idempotent(msg, text)
2838
2839    def test_text_plain_in_a_multipart_digest(self):
2840        msg, text = self._msgobj('msg_34.txt')
2841        self._idempotent(msg, text)
2842
2843    def test_nested_multipart_mixeds(self):
2844        msg, text = self._msgobj('msg_12a.txt')
2845        self._idempotent(msg, text)
2846
2847    def test_message_external_body_idempotent(self):
2848        msg, text = self._msgobj('msg_36.txt')
2849        self._idempotent(msg, text)
2850
2851    def test_message_delivery_status(self):
2852        msg, text = self._msgobj('msg_43.txt')
2853        self._idempotent(msg, text, unixfrom=True)
2854
2855    def test_message_signed_idempotent(self):
2856        msg, text = self._msgobj('msg_45.txt')
2857        self._idempotent(msg, text)
2858
2859    def test_content_type(self):
2860        eq = self.assertEqual
2861        # Get a message object and reset the seek pointer for other tests
2862        msg, text = self._msgobj('msg_05.txt')
2863        eq(msg.get_content_type(), 'multipart/report')
2864        # Test the Content-Type: parameters
2865        params = {}
2866        for pk, pv in msg.get_params():
2867            params[pk] = pv
2868        eq(params['report-type'], 'delivery-status')
2869        eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2870        eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2871        eq(msg.epilogue, self.linesep)
2872        eq(len(msg.get_payload()), 3)
2873        # Make sure the subparts are what we expect
2874        msg1 = msg.get_payload(0)
2875        eq(msg1.get_content_type(), 'text/plain')
2876        eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
2877        msg2 = msg.get_payload(1)
2878        eq(msg2.get_content_type(), 'text/plain')
2879        eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
2880        msg3 = msg.get_payload(2)
2881        eq(msg3.get_content_type(), 'message/rfc822')
2882        self.assertIsInstance(msg3, Message)
2883        payload = msg3.get_payload()
2884        self.assertIsInstance(payload, list)
2885        eq(len(payload), 1)
2886        msg4 = payload[0]
2887        self.assertIsInstance(msg4, Message)
2888        eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
2889
2890    def test_parser(self):
2891        eq = self.assertEqual
2892        msg, text = self._msgobj('msg_06.txt')
2893        # Check some of the outer headers
2894        eq(msg.get_content_type(), 'message/rfc822')
2895        # Make sure the payload is a list of exactly one sub-Message, and that
2896        # that submessage has a type of text/plain
2897        payload = msg.get_payload()
2898        self.assertIsInstance(payload, list)
2899        eq(len(payload), 1)
2900        msg1 = payload[0]
2901        self.assertIsInstance(msg1, Message)
2902        eq(msg1.get_content_type(), 'text/plain')
2903        self.assertIsInstance(msg1.get_payload(), str)
2904        eq(msg1.get_payload(), self.linesep)
2905
2906
2907
2908# Test various other bits of the package's functionality
2909class TestMiscellaneous(TestEmailBase):
2910    def test_message_from_string(self):
2911        with openfile('msg_01.txt') as fp:
2912            text = fp.read()
2913        msg = email.message_from_string(text)
2914        s = StringIO()
2915        # Don't wrap/continue long headers since we're trying to test
2916        # idempotency.
2917        g = Generator(s, maxheaderlen=0)
2918        g.flatten(msg)
2919        self.assertEqual(text, s.getvalue())
2920
2921    def test_message_from_file(self):
2922        with openfile('msg_01.txt') as fp:
2923            text = fp.read()
2924            fp.seek(0)
2925            msg = email.message_from_file(fp)
2926            s = StringIO()
2927            # Don't wrap/continue long headers since we're trying to test
2928            # idempotency.
2929            g = Generator(s, maxheaderlen=0)
2930            g.flatten(msg)
2931            self.assertEqual(text, s.getvalue())
2932
2933    def test_message_from_string_with_class(self):
2934        with openfile('msg_01.txt') as fp:
2935            text = fp.read()
2936
2937        # Create a subclass
2938        class MyMessage(Message):
2939            pass
2940
2941        msg = email.message_from_string(text, MyMessage)
2942        self.assertIsInstance(msg, MyMessage)
2943        # Try something more complicated
2944        with openfile('msg_02.txt') as fp:
2945            text = fp.read()
2946        msg = email.message_from_string(text, MyMessage)
2947        for subpart in msg.walk():
2948            self.assertIsInstance(subpart, MyMessage)
2949
2950    def test_message_from_file_with_class(self):
2951        # Create a subclass
2952        class MyMessage(Message):
2953            pass
2954
2955        with openfile('msg_01.txt') as fp:
2956            msg = email.message_from_file(fp, MyMessage)
2957        self.assertIsInstance(msg, MyMessage)
2958        # Try something more complicated
2959        with openfile('msg_02.txt') as fp:
2960            msg = email.message_from_file(fp, MyMessage)
2961        for subpart in msg.walk():
2962            self.assertIsInstance(subpart, MyMessage)
2963
2964    def test_custom_message_does_not_require_arguments(self):
2965        class MyMessage(Message):
2966            def __init__(self):
2967                super().__init__()
2968        msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2969        self.assertIsInstance(msg, MyMessage)
2970
2971    def test__all__(self):
2972        module = __import__('email')
2973        self.assertEqual(sorted(module.__all__), [
2974            'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2975            'generator', 'header', 'iterators', 'message',
2976            'message_from_binary_file', 'message_from_bytes',
2977            'message_from_file', 'message_from_string', 'mime', 'parser',
2978            'quoprimime', 'utils',
2979            ])
2980
2981    def test_formatdate(self):
2982        now = time.time()
2983        self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2984                         time.gmtime(now)[:6])
2985
2986    def test_formatdate_localtime(self):
2987        now = time.time()
2988        self.assertEqual(
2989            utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2990            time.localtime(now)[:6])
2991
2992    def test_formatdate_usegmt(self):
2993        now = time.time()
2994        self.assertEqual(
2995            utils.formatdate(now, localtime=False),
2996            time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2997        self.assertEqual(
2998            utils.formatdate(now, localtime=False, usegmt=True),
2999            time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
3000
3001    # parsedate and parsedate_tz will become deprecated interfaces someday
3002    def test_parsedate_returns_None_for_invalid_strings(self):
3003        self.assertIsNone(utils.parsedate(''))
3004        self.assertIsNone(utils.parsedate_tz(''))
3005        self.assertIsNone(utils.parsedate(' '))
3006        self.assertIsNone(utils.parsedate_tz(' '))
3007        self.assertIsNone(utils.parsedate('0'))
3008        self.assertIsNone(utils.parsedate_tz('0'))
3009        self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
3010        self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
3011        self.assertIsNone(utils.parsedate_tz('Wed, 3 Apr 2002 12.34.56.78+0800'))
3012        # Not a part of the spec but, but this has historically worked:
3013        self.assertIsNone(utils.parsedate(None))
3014        self.assertIsNone(utils.parsedate_tz(None))
3015
3016    def test_parsedate_compact(self):
3017        # The FWS after the comma is optional
3018        self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
3019                         utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
3020
3021    def test_parsedate_no_dayofweek(self):
3022        eq = self.assertEqual
3023        eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
3024           (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
3025
3026    def test_parsedate_compact_no_dayofweek(self):
3027        eq = self.assertEqual
3028        eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
3029           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3030
3031    def test_parsedate_no_space_before_positive_offset(self):
3032        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
3033           (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3034
3035    def test_parsedate_no_space_before_negative_offset(self):
3036        # Issue 1155362: we already handled '+' for this case.
3037        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
3038           (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
3039
3040
3041    def test_parsedate_accepts_time_with_dots(self):
3042        eq = self.assertEqual
3043        eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
3044           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3045        eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
3046           (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
3047
3048    def test_parsedate_acceptable_to_time_functions(self):
3049        eq = self.assertEqual
3050        timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
3051        t = int(time.mktime(timetup))
3052        eq(time.localtime(t)[:6], timetup[:6])
3053        eq(int(time.strftime('%Y', timetup)), 2003)
3054        timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
3055        t = int(time.mktime(timetup[:9]))
3056        eq(time.localtime(t)[:6], timetup[:6])
3057        eq(int(time.strftime('%Y', timetup[:9])), 2003)
3058
3059    def test_mktime_tz(self):
3060        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3061                                          -1, -1, -1, 0)), 0)
3062        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3063                                          -1, -1, -1, 1234)), -1234)
3064
3065    def test_parsedate_y2k(self):
3066        """Test for parsing a date with a two-digit year.
3067
3068        Parsing a date with a two-digit year should return the correct
3069        four-digit year. RFC822 allows two-digit years, but RFC2822 (which
3070        obsoletes RFC822) requires four-digit years.
3071
3072        """
3073        self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
3074                         utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
3075        self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
3076                         utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
3077
3078    def test_parseaddr_empty(self):
3079        self.assertEqual(utils.parseaddr('<>'), ('', ''))
3080        self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
3081
3082    def test_parseaddr_multiple_domains(self):
3083        self.assertEqual(
3084            utils.parseaddr('a@b@c'),
3085            ('', '')
3086        )
3087        self.assertEqual(
3088            utils.parseaddr('a@b.c@c'),
3089            ('', '')
3090        )
3091        self.assertEqual(
3092            utils.parseaddr('a@172.17.0.1@c'),
3093            ('', '')
3094        )
3095
3096    def test_noquote_dump(self):
3097        self.assertEqual(
3098            utils.formataddr(('A Silly Person', 'person@dom.ain')),
3099            'A Silly Person <person@dom.ain>')
3100
3101    def test_escape_dump(self):
3102        self.assertEqual(
3103            utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
3104            r'"A (Very) Silly Person" <person@dom.ain>')
3105        self.assertEqual(
3106            utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
3107            ('A (Very) Silly Person', 'person@dom.ain'))
3108        a = r'A \(Special\) Person'
3109        b = 'person@dom.ain'
3110        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3111
3112    def test_escape_backslashes(self):
3113        self.assertEqual(
3114            utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')),
3115            r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
3116        a = r'Arthur \Backslash\ Foobar'
3117        b = 'person@dom.ain'
3118        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3119
3120    def test_quotes_unicode_names(self):
3121        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3122        name = "H\u00e4ns W\u00fcrst"
3123        addr = 'person@dom.ain'
3124        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3125        latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
3126        self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
3127        self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
3128            latin1_quopri)
3129
3130    def test_accepts_any_charset_like_object(self):
3131        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3132        name = "H\u00e4ns W\u00fcrst"
3133        addr = 'person@dom.ain'
3134        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
3135        foobar = "FOOBAR"
3136        class CharsetMock:
3137            def header_encode(self, string):
3138                return foobar
3139        mock = CharsetMock()
3140        mock_expected = "%s <%s>" % (foobar, addr)
3141        self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
3142        self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
3143            utf8_base64)
3144
3145    def test_invalid_charset_like_object_raises_error(self):
3146        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3147        name = "H\u00e4ns W\u00fcrst"
3148        addr = 'person@dom.ain'
3149        # An object without a header_encode method:
3150        bad_charset = object()
3151        self.assertRaises(AttributeError, utils.formataddr, (name, addr),
3152            bad_charset)
3153
3154    def test_unicode_address_raises_error(self):
3155        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3156        addr = 'pers\u00f6n@dom.in'
3157        self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
3158        self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
3159
3160    def test_name_with_dot(self):
3161        x = 'John X. Doe <jxd@example.com>'
3162        y = '"John X. Doe" <jxd@example.com>'
3163        a, b = ('John X. Doe', 'jxd@example.com')
3164        self.assertEqual(utils.parseaddr(x), (a, b))
3165        self.assertEqual(utils.parseaddr(y), (a, b))
3166        # formataddr() quotes the name if there's a dot in it
3167        self.assertEqual(utils.formataddr((a, b)), y)
3168
3169    def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3170        # issue 10005.  Note that in the third test the second pair of
3171        # backslashes is not actually a quoted pair because it is not inside a
3172        # comment or quoted string: the address being parsed has a quoted
3173        # string containing a quoted backslash, followed by 'example' and two
3174        # backslashes, followed by another quoted string containing a space and
3175        # the word 'example'.  parseaddr copies those two backslashes
3176        # literally.  Per rfc5322 this is not technically correct since a \ may
3177        # not appear in an address outside of a quoted string.  It is probably
3178        # a sensible Postel interpretation, though.
3179        eq = self.assertEqual
3180        eq(utils.parseaddr('""example" example"@example.com'),
3181          ('', '""example" example"@example.com'))
3182        eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3183          ('', '"\\"example\\" example"@example.com'))
3184        eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3185          ('', '"\\\\"example\\\\" example"@example.com'))
3186
3187    def test_parseaddr_preserves_spaces_in_local_part(self):
3188        # issue 9286.  A normal RFC5322 local part should not contain any
3189        # folding white space, but legacy local parts can (they are a sequence
3190        # of atoms, not dotatoms).  On the other hand we strip whitespace from
3191        # before the @ and around dots, on the assumption that the whitespace
3192        # around the punctuation is a mistake in what would otherwise be
3193        # an RFC5322 local part.  Leading whitespace is, usual, stripped as well.
3194        self.assertEqual(('', "merwok wok@xample.com"),
3195            utils.parseaddr("merwok wok@xample.com"))
3196        self.assertEqual(('', "merwok  wok@xample.com"),
3197            utils.parseaddr("merwok  wok@xample.com"))
3198        self.assertEqual(('', "merwok  wok@xample.com"),
3199            utils.parseaddr(" merwok  wok  @xample.com"))
3200        self.assertEqual(('', 'merwok"wok"  wok@xample.com'),
3201            utils.parseaddr('merwok"wok"  wok@xample.com'))
3202        self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3203            utils.parseaddr('merwok. wok .  wok@xample.com'))
3204
3205    def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3206        addr = ("'foo@example.com' (foo@example.com)",
3207                'foo@example.com')
3208        addrstr = ('"\'foo@example.com\' '
3209                            '(foo@example.com)" <foo@example.com>')
3210        self.assertEqual(utils.parseaddr(addrstr), addr)
3211        self.assertEqual(utils.formataddr(addr), addrstr)
3212
3213
3214    def test_multiline_from_comment(self):
3215        x = """\
3216Foo
3217\tBar <foo@example.com>"""
3218        self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3219
3220    def test_quote_dump(self):
3221        self.assertEqual(
3222            utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3223            r'"A Silly; Person" <person@dom.ain>')
3224
3225    def test_charset_richcomparisons(self):
3226        eq = self.assertEqual
3227        ne = self.assertNotEqual
3228        cset1 = Charset()
3229        cset2 = Charset()
3230        eq(cset1, 'us-ascii')
3231        eq(cset1, 'US-ASCII')
3232        eq(cset1, 'Us-AsCiI')
3233        eq('us-ascii', cset1)
3234        eq('US-ASCII', cset1)
3235        eq('Us-AsCiI', cset1)
3236        ne(cset1, 'usascii')
3237        ne(cset1, 'USASCII')
3238        ne(cset1, 'UsAsCiI')
3239        ne('usascii', cset1)
3240        ne('USASCII', cset1)
3241        ne('UsAsCiI', cset1)
3242        eq(cset1, cset2)
3243        eq(cset2, cset1)
3244
3245    def test_getaddresses(self):
3246        eq = self.assertEqual
3247        eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3248                               'Bud Person <bperson@dom.ain>']),
3249           [('Al Person', 'aperson@dom.ain'),
3250            ('Bud Person', 'bperson@dom.ain')])
3251
3252    def test_getaddresses_nasty(self):
3253        eq = self.assertEqual
3254        eq(utils.getaddresses(['foo: ;']), [('', '')])
3255        eq(utils.getaddresses(
3256           ['[]*-- =~$']),
3257           [('', ''), ('', ''), ('', '*--')])
3258        eq(utils.getaddresses(
3259           ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3260           [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3261
3262    def test_getaddresses_embedded_comment(self):
3263        """Test proper handling of a nested comment"""
3264        eq = self.assertEqual
3265        addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3266        eq(addrs[0][1], 'foo@bar.com')
3267
3268    def test_getaddresses_header_obj(self):
3269        """Test the handling of a Header object."""
3270        addrs = utils.getaddresses([Header('Al Person <aperson@dom.ain>')])
3271        self.assertEqual(addrs[0][1], 'aperson@dom.ain')
3272
3273    def test_make_msgid_collisions(self):
3274        # Test make_msgid uniqueness, even with multiple threads
3275        class MsgidsThread(Thread):
3276            def run(self):
3277                # generate msgids for 3 seconds
3278                self.msgids = []
3279                append = self.msgids.append
3280                make_msgid = utils.make_msgid
3281                clock = time.monotonic
3282                tfin = clock() + 3.0
3283                while clock() < tfin:
3284                    append(make_msgid(domain='testdomain-string'))
3285
3286        threads = [MsgidsThread() for i in range(5)]
3287        with start_threads(threads):
3288            pass
3289        all_ids = sum([t.msgids for t in threads], [])
3290        self.assertEqual(len(set(all_ids)), len(all_ids))
3291
3292    def test_utils_quote_unquote(self):
3293        eq = self.assertEqual
3294        msg = Message()
3295        msg.add_header('content-disposition', 'attachment',
3296                       filename='foo\\wacky"name')
3297        eq(msg.get_filename(), 'foo\\wacky"name')
3298
3299    def test_get_body_encoding_with_bogus_charset(self):
3300        charset = Charset('not a charset')
3301        self.assertEqual(charset.get_body_encoding(), 'base64')
3302
3303    def test_get_body_encoding_with_uppercase_charset(self):
3304        eq = self.assertEqual
3305        msg = Message()
3306        msg['Content-Type'] = 'text/plain; charset=UTF-8'
3307        eq(msg['content-type'], 'text/plain; charset=UTF-8')
3308        charsets = msg.get_charsets()
3309        eq(len(charsets), 1)
3310        eq(charsets[0], 'utf-8')
3311        charset = Charset(charsets[0])
3312        eq(charset.get_body_encoding(), 'base64')
3313        msg.set_payload(b'hello world', charset=charset)
3314        eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3315        eq(msg.get_payload(decode=True), b'hello world')
3316        eq(msg['content-transfer-encoding'], 'base64')
3317        # Try another one
3318        msg = Message()
3319        msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3320        charsets = msg.get_charsets()
3321        eq(len(charsets), 1)
3322        eq(charsets[0], 'us-ascii')
3323        charset = Charset(charsets[0])
3324        eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3325        msg.set_payload('hello world', charset=charset)
3326        eq(msg.get_payload(), 'hello world')
3327        eq(msg['content-transfer-encoding'], '7bit')
3328
3329    def test_charsets_case_insensitive(self):
3330        lc = Charset('us-ascii')
3331        uc = Charset('US-ASCII')
3332        self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3333
3334    def test_partial_falls_inside_message_delivery_status(self):
3335        eq = self.ndiffAssertEqual
3336        # The Parser interface provides chunks of data to FeedParser in 8192
3337        # byte gulps.  SF bug #1076485 found one of those chunks inside
3338        # message/delivery-status header block, which triggered an
3339        # unreadline() of NeedMoreData.
3340        msg = self._msgobj('msg_43.txt')
3341        sfp = StringIO()
3342        iterators._structure(msg, sfp)
3343        eq(sfp.getvalue(), """\
3344multipart/report
3345    text/plain
3346    message/delivery-status
3347        text/plain
3348        text/plain
3349        text/plain
3350        text/plain
3351        text/plain
3352        text/plain
3353        text/plain
3354        text/plain
3355        text/plain
3356        text/plain
3357        text/plain
3358        text/plain
3359        text/plain
3360        text/plain
3361        text/plain
3362        text/plain
3363        text/plain
3364        text/plain
3365        text/plain
3366        text/plain
3367        text/plain
3368        text/plain
3369        text/plain
3370        text/plain
3371        text/plain
3372        text/plain
3373    text/rfc822-headers
3374""")
3375
3376    def test_make_msgid_domain(self):
3377        self.assertEqual(
3378            email.utils.make_msgid(domain='testdomain-string')[-19:],
3379            '@testdomain-string>')
3380
3381    def test_make_msgid_idstring(self):
3382        self.assertEqual(
3383            email.utils.make_msgid(idstring='test-idstring',
3384                domain='testdomain-string')[-33:],
3385            '.test-idstring@testdomain-string>')
3386
3387    def test_make_msgid_default_domain(self):
3388        with patch('socket.getfqdn') as mock_getfqdn:
3389            mock_getfqdn.return_value = domain = 'pythontest.example.com'
3390            self.assertTrue(
3391                email.utils.make_msgid().endswith(
3392                    '@' + domain + '>'))
3393
3394    def test_Generator_linend(self):
3395        # Issue 14645.
3396        with openfile('msg_26.txt', newline='\n') as f:
3397            msgtxt = f.read()
3398        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3399        msg = email.message_from_string(msgtxt)
3400        s = StringIO()
3401        g = email.generator.Generator(s)
3402        g.flatten(msg)
3403        self.assertEqual(s.getvalue(), msgtxt_nl)
3404
3405    def test_BytesGenerator_linend(self):
3406        # Issue 14645.
3407        with openfile('msg_26.txt', newline='\n') as f:
3408            msgtxt = f.read()
3409        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3410        msg = email.message_from_string(msgtxt_nl)
3411        s = BytesIO()
3412        g = email.generator.BytesGenerator(s)
3413        g.flatten(msg, linesep='\r\n')
3414        self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3415
3416    def test_BytesGenerator_linend_with_non_ascii(self):
3417        # Issue 14645.
3418        with openfile('msg_26.txt', 'rb') as f:
3419            msgtxt = f.read()
3420        msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3421        msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3422        msg = email.message_from_bytes(msgtxt_nl)
3423        s = BytesIO()
3424        g = email.generator.BytesGenerator(s)
3425        g.flatten(msg, linesep='\r\n')
3426        self.assertEqual(s.getvalue(), msgtxt)
3427
3428    def test_mime_classes_policy_argument(self):
3429        with openfile('audiotest.au', 'rb') as fp:
3430            audiodata = fp.read()
3431        with openfile('PyBanner048.gif', 'rb') as fp:
3432            bindata = fp.read()
3433        classes = [
3434            (MIMEApplication, ('',)),
3435            (MIMEAudio, (audiodata,)),
3436            (MIMEImage, (bindata,)),
3437            (MIMEMessage, (Message(),)),
3438            (MIMENonMultipart, ('multipart', 'mixed')),
3439            (MIMEText, ('',)),
3440        ]
3441        for cls, constructor in classes:
3442            with self.subTest(cls=cls.__name__, policy='compat32'):
3443                m = cls(*constructor)
3444                self.assertIs(m.policy, email.policy.compat32)
3445            with self.subTest(cls=cls.__name__, policy='default'):
3446                m = cls(*constructor, policy=email.policy.default)
3447                self.assertIs(m.policy, email.policy.default)
3448
3449
3450# Test the iterator/generators
3451class TestIterators(TestEmailBase):
3452    def test_body_line_iterator(self):
3453        eq = self.assertEqual
3454        neq = self.ndiffAssertEqual
3455        # First a simple non-multipart message
3456        msg = self._msgobj('msg_01.txt')
3457        it = iterators.body_line_iterator(msg)
3458        lines = list(it)
3459        eq(len(lines), 6)
3460        neq(EMPTYSTRING.join(lines), msg.get_payload())
3461        # Now a more complicated multipart
3462        msg = self._msgobj('msg_02.txt')
3463        it = iterators.body_line_iterator(msg)
3464        lines = list(it)
3465        eq(len(lines), 43)
3466        with openfile('msg_19.txt') as fp:
3467            neq(EMPTYSTRING.join(lines), fp.read())
3468
3469    def test_typed_subpart_iterator(self):
3470        eq = self.assertEqual
3471        msg = self._msgobj('msg_04.txt')
3472        it = iterators.typed_subpart_iterator(msg, 'text')
3473        lines = []
3474        subparts = 0
3475        for subpart in it:
3476            subparts += 1
3477            lines.append(subpart.get_payload())
3478        eq(subparts, 2)
3479        eq(EMPTYSTRING.join(lines), """\
3480a simple kind of mirror
3481to reflect upon our own
3482a simple kind of mirror
3483to reflect upon our own
3484""")
3485
3486    def test_typed_subpart_iterator_default_type(self):
3487        eq = self.assertEqual
3488        msg = self._msgobj('msg_03.txt')
3489        it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3490        lines = []
3491        subparts = 0
3492        for subpart in it:
3493            subparts += 1
3494            lines.append(subpart.get_payload())
3495        eq(subparts, 1)
3496        eq(EMPTYSTRING.join(lines), """\
3497
3498Hi,
3499
3500Do you like this message?
3501
3502-Me
3503""")
3504
3505    def test_pushCR_LF(self):
3506        '''FeedParser BufferedSubFile.push() assumed it received complete
3507           line endings.  A CR ending one push() followed by a LF starting
3508           the next push() added an empty line.
3509        '''
3510        imt = [
3511            ("a\r \n",  2),
3512            ("b",       0),
3513            ("c\n",     1),
3514            ("",        0),
3515            ("d\r\n",   1),
3516            ("e\r",     0),
3517            ("\nf",     1),
3518            ("\r\n",    1),
3519          ]
3520        from email.feedparser import BufferedSubFile, NeedMoreData
3521        bsf = BufferedSubFile()
3522        om = []
3523        nt = 0
3524        for il, n in imt:
3525            bsf.push(il)
3526            nt += n
3527            n1 = 0
3528            for ol in iter(bsf.readline, NeedMoreData):
3529                om.append(ol)
3530                n1 += 1
3531            self.assertEqual(n, n1)
3532        self.assertEqual(len(om), nt)
3533        self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
3534
3535    def test_push_random(self):
3536        from email.feedparser import BufferedSubFile, NeedMoreData
3537
3538        n = 10000
3539        chunksize = 5
3540        chars = 'abcd \t\r\n'
3541
3542        s = ''.join(choice(chars) for i in range(n)) + '\n'
3543        target = s.splitlines(True)
3544
3545        bsf = BufferedSubFile()
3546        lines = []
3547        for i in range(0, len(s), chunksize):
3548            chunk = s[i:i+chunksize]
3549            bsf.push(chunk)
3550            lines.extend(iter(bsf.readline, NeedMoreData))
3551        self.assertEqual(lines, target)
3552
3553
3554class TestFeedParsers(TestEmailBase):
3555
3556    def parse(self, chunks):
3557        feedparser = FeedParser()
3558        for chunk in chunks:
3559            feedparser.feed(chunk)
3560        return feedparser.close()
3561
3562    def test_empty_header_name_handled(self):
3563        # Issue 19996
3564        msg = self.parse("First: val\n: bad\nSecond: val")
3565        self.assertEqual(msg['First'], 'val')
3566        self.assertEqual(msg['Second'], 'val')
3567
3568    def test_newlines(self):
3569        m = self.parse(['a:\nb:\rc:\r\nd:\n'])
3570        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3571        m = self.parse(['a:\nb:\rc:\r\nd:'])
3572        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3573        m = self.parse(['a:\rb', 'c:\n'])
3574        self.assertEqual(m.keys(), ['a', 'bc'])
3575        m = self.parse(['a:\r', 'b:\n'])
3576        self.assertEqual(m.keys(), ['a', 'b'])
3577        m = self.parse(['a:\r', '\nb:\n'])
3578        self.assertEqual(m.keys(), ['a', 'b'])
3579
3580        # Only CR and LF should break header fields
3581        m = self.parse(['a:\x85b:\u2028c:\n'])
3582        self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
3583        m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
3584        self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
3585
3586    def test_long_lines(self):
3587        # Expected peak memory use on 32-bit platform: 6*N*M bytes.
3588        M, N = 1000, 20000
3589        m = self.parse(['a:b\n\n'] + ['x'*M] * N)
3590        self.assertEqual(m.items(), [('a', 'b')])
3591        self.assertEqual(m.get_payload(), 'x'*M*N)
3592        m = self.parse(['a:b\r\r'] + ['x'*M] * N)
3593        self.assertEqual(m.items(), [('a', 'b')])
3594        self.assertEqual(m.get_payload(), 'x'*M*N)
3595        m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N)
3596        self.assertEqual(m.items(), [('a', 'b')])
3597        self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N)
3598        m = self.parse(['a:\r', 'b: '] + ['x'*M] * N)
3599        self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)])
3600
3601
3602class TestParsers(TestEmailBase):
3603
3604    def test_header_parser(self):
3605        eq = self.assertEqual
3606        # Parse only the headers of a complex multipart MIME document
3607        with openfile('msg_02.txt') as fp:
3608            msg = HeaderParser().parse(fp)
3609        eq(msg['from'], 'ppp-request@zzz.org')
3610        eq(msg['to'], 'ppp@zzz.org')
3611        eq(msg.get_content_type(), 'multipart/mixed')
3612        self.assertFalse(msg.is_multipart())
3613        self.assertIsInstance(msg.get_payload(), str)
3614
3615    def test_bytes_header_parser(self):
3616        eq = self.assertEqual
3617        # Parse only the headers of a complex multipart MIME document
3618        with openfile('msg_02.txt', 'rb') as fp:
3619            msg = email.parser.BytesHeaderParser().parse(fp)
3620        eq(msg['from'], 'ppp-request@zzz.org')
3621        eq(msg['to'], 'ppp@zzz.org')
3622        eq(msg.get_content_type(), 'multipart/mixed')
3623        self.assertFalse(msg.is_multipart())
3624        self.assertIsInstance(msg.get_payload(), str)
3625        self.assertIsInstance(msg.get_payload(decode=True), bytes)
3626
3627    def test_bytes_parser_does_not_close_file(self):
3628        with openfile('msg_02.txt', 'rb') as fp:
3629            email.parser.BytesParser().parse(fp)
3630            self.assertFalse(fp.closed)
3631
3632    def test_bytes_parser_on_exception_does_not_close_file(self):
3633        with openfile('msg_15.txt', 'rb') as fp:
3634            bytesParser = email.parser.BytesParser
3635            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3636                              bytesParser(policy=email.policy.strict).parse,
3637                              fp)
3638            self.assertFalse(fp.closed)
3639
3640    def test_parser_does_not_close_file(self):
3641        with openfile('msg_02.txt', 'r') as fp:
3642            email.parser.Parser().parse(fp)
3643            self.assertFalse(fp.closed)
3644
3645    def test_parser_on_exception_does_not_close_file(self):
3646        with openfile('msg_15.txt', 'r') as fp:
3647            parser = email.parser.Parser
3648            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3649                              parser(policy=email.policy.strict).parse, fp)
3650            self.assertFalse(fp.closed)
3651
3652    def test_whitespace_continuation(self):
3653        eq = self.assertEqual
3654        # This message contains a line after the Subject: header that has only
3655        # whitespace, but it is not empty!
3656        msg = email.message_from_string("""\
3657From: aperson@dom.ain
3658To: bperson@dom.ain
3659Subject: the next line has a space on it
3660\x20
3661Date: Mon, 8 Apr 2002 15:09:19 -0400
3662Message-ID: spam
3663
3664Here's the message body
3665""")
3666        eq(msg['subject'], 'the next line has a space on it\n ')
3667        eq(msg['message-id'], 'spam')
3668        eq(msg.get_payload(), "Here's the message body\n")
3669
3670    def test_whitespace_continuation_last_header(self):
3671        eq = self.assertEqual
3672        # Like the previous test, but the subject line is the last
3673        # header.
3674        msg = email.message_from_string("""\
3675From: aperson@dom.ain
3676To: bperson@dom.ain
3677Date: Mon, 8 Apr 2002 15:09:19 -0400
3678Message-ID: spam
3679Subject: the next line has a space on it
3680\x20
3681
3682Here's the message body
3683""")
3684        eq(msg['subject'], 'the next line has a space on it\n ')
3685        eq(msg['message-id'], 'spam')
3686        eq(msg.get_payload(), "Here's the message body\n")
3687
3688    def test_crlf_separation(self):
3689        eq = self.assertEqual
3690        with openfile('msg_26.txt', newline='\n') as fp:
3691            msg = Parser().parse(fp)
3692        eq(len(msg.get_payload()), 2)
3693        part1 = msg.get_payload(0)
3694        eq(part1.get_content_type(), 'text/plain')
3695        eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3696        part2 = msg.get_payload(1)
3697        eq(part2.get_content_type(), 'application/riscos')
3698
3699    def test_crlf_flatten(self):
3700        # Using newline='\n' preserves the crlfs in this input file.
3701        with openfile('msg_26.txt', newline='\n') as fp:
3702            text = fp.read()
3703        msg = email.message_from_string(text)
3704        s = StringIO()
3705        g = Generator(s)
3706        g.flatten(msg, linesep='\r\n')
3707        self.assertEqual(s.getvalue(), text)
3708
3709    maxDiff = None
3710
3711    def test_multipart_digest_with_extra_mime_headers(self):
3712        eq = self.assertEqual
3713        neq = self.ndiffAssertEqual
3714        with openfile('msg_28.txt') as fp:
3715            msg = email.message_from_file(fp)
3716        # Structure is:
3717        # multipart/digest
3718        #   message/rfc822
3719        #     text/plain
3720        #   message/rfc822
3721        #     text/plain
3722        eq(msg.is_multipart(), 1)
3723        eq(len(msg.get_payload()), 2)
3724        part1 = msg.get_payload(0)
3725        eq(part1.get_content_type(), 'message/rfc822')
3726        eq(part1.is_multipart(), 1)
3727        eq(len(part1.get_payload()), 1)
3728        part1a = part1.get_payload(0)
3729        eq(part1a.is_multipart(), 0)
3730        eq(part1a.get_content_type(), 'text/plain')
3731        neq(part1a.get_payload(), 'message 1\n')
3732        # next message/rfc822
3733        part2 = msg.get_payload(1)
3734        eq(part2.get_content_type(), 'message/rfc822')
3735        eq(part2.is_multipart(), 1)
3736        eq(len(part2.get_payload()), 1)
3737        part2a = part2.get_payload(0)
3738        eq(part2a.is_multipart(), 0)
3739        eq(part2a.get_content_type(), 'text/plain')
3740        neq(part2a.get_payload(), 'message 2\n')
3741
3742    def test_three_lines(self):
3743        # A bug report by Andrew McNamara
3744        lines = ['From: Andrew Person <aperson@dom.ain',
3745                 'Subject: Test',
3746                 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3747        msg = email.message_from_string(NL.join(lines))
3748        self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3749
3750    def test_strip_line_feed_and_carriage_return_in_headers(self):
3751        eq = self.assertEqual
3752        # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3753        value1 = 'text'
3754        value2 = 'more text'
3755        m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3756            value1, value2)
3757        msg = email.message_from_string(m)
3758        eq(msg.get('Header'), value1)
3759        eq(msg.get('Next-Header'), value2)
3760
3761    def test_rfc2822_header_syntax(self):
3762        eq = self.assertEqual
3763        m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3764        msg = email.message_from_string(m)
3765        eq(len(msg), 3)
3766        eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3767        eq(msg.get_payload(), 'body')
3768
3769    def test_rfc2822_space_not_allowed_in_header(self):
3770        eq = self.assertEqual
3771        m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3772        msg = email.message_from_string(m)
3773        eq(len(msg.keys()), 0)
3774
3775    def test_rfc2822_one_character_header(self):
3776        eq = self.assertEqual
3777        m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3778        msg = email.message_from_string(m)
3779        headers = msg.keys()
3780        headers.sort()
3781        eq(headers, ['A', 'B', 'CC'])
3782        eq(msg.get_payload(), 'body')
3783
3784    def test_CRLFLF_at_end_of_part(self):
3785        # issue 5610: feedparser should not eat two chars from body part ending
3786        # with "\r\n\n".
3787        m = (
3788            "From: foo@bar.com\n"
3789            "To: baz\n"
3790            "Mime-Version: 1.0\n"
3791            "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3792            "\n"
3793            "--BOUNDARY\n"
3794            "Content-Type: text/plain\n"
3795            "\n"
3796            "body ending with CRLF newline\r\n"
3797            "\n"
3798            "--BOUNDARY--\n"
3799          )
3800        msg = email.message_from_string(m)
3801        self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
3802
3803
3804class Test8BitBytesHandling(TestEmailBase):
3805    # In Python3 all input is string, but that doesn't work if the actual input
3806    # uses an 8bit transfer encoding.  To hack around that, in email 5.1 we
3807    # decode byte streams using the surrogateescape error handler, and
3808    # reconvert to binary at appropriate places if we detect surrogates.  This
3809    # doesn't allow us to transform headers with 8bit bytes (they get munged),
3810    # but it does allow us to parse and preserve them, and to decode body
3811    # parts that use an 8bit CTE.
3812
3813    bodytest_msg = textwrap.dedent("""\
3814        From: foo@bar.com
3815        To: baz
3816        Mime-Version: 1.0
3817        Content-Type: text/plain; charset={charset}
3818        Content-Transfer-Encoding: {cte}
3819
3820        {bodyline}
3821        """)
3822
3823    def test_known_8bit_CTE(self):
3824        m = self.bodytest_msg.format(charset='utf-8',
3825                                     cte='8bit',
3826                                     bodyline='pöstal').encode('utf-8')
3827        msg = email.message_from_bytes(m)
3828        self.assertEqual(msg.get_payload(), "pöstal\n")
3829        self.assertEqual(msg.get_payload(decode=True),
3830                         "pöstal\n".encode('utf-8'))
3831
3832    def test_unknown_8bit_CTE(self):
3833        m = self.bodytest_msg.format(charset='notavalidcharset',
3834                                     cte='8bit',
3835                                     bodyline='pöstal').encode('utf-8')
3836        msg = email.message_from_bytes(m)
3837        self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
3838        self.assertEqual(msg.get_payload(decode=True),
3839                         "pöstal\n".encode('utf-8'))
3840
3841    def test_8bit_in_quopri_body(self):
3842        # This is non-RFC compliant data...without 'decode' the library code
3843        # decodes the body using the charset from the headers, and because the
3844        # source byte really is utf-8 this works.  This is likely to fail
3845        # against real dirty data (ie: produce mojibake), but the data is
3846        # invalid anyway so it is as good a guess as any.  But this means that
3847        # this test just confirms the current behavior; that behavior is not
3848        # necessarily the best possible behavior.  With 'decode' it is
3849        # returning the raw bytes, so that test should be of correct behavior,
3850        # or at least produce the same result that email4 did.
3851        m = self.bodytest_msg.format(charset='utf-8',
3852                                     cte='quoted-printable',
3853                                     bodyline='p=C3=B6stál').encode('utf-8')
3854        msg = email.message_from_bytes(m)
3855        self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3856        self.assertEqual(msg.get_payload(decode=True),
3857                         'pöstál\n'.encode('utf-8'))
3858
3859    def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3860        # This is similar to the previous test, but proves that if the 8bit
3861        # byte is undecodeable in the specified charset, it gets replaced
3862        # by the unicode 'unknown' character.  Again, this may or may not
3863        # be the ideal behavior.  Note that if decode=False none of the
3864        # decoders will get involved, so this is the only test we need
3865        # for this behavior.
3866        m = self.bodytest_msg.format(charset='ascii',
3867                                     cte='quoted-printable',
3868                                     bodyline='p=C3=B6stál').encode('utf-8')
3869        msg = email.message_from_bytes(m)
3870        self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
3871        self.assertEqual(msg.get_payload(decode=True),
3872                        'pöstál\n'.encode('utf-8'))
3873
3874    # test_defect_handling:test_invalid_chars_in_base64_payload
3875    def test_8bit_in_base64_body(self):
3876        # If we get 8bit bytes in a base64 body, we can just ignore them
3877        # as being outside the base64 alphabet and decode anyway.  But
3878        # we register a defect.
3879        m = self.bodytest_msg.format(charset='utf-8',
3880                                     cte='base64',
3881                                     bodyline='cMO2c3RhbAá=').encode('utf-8')
3882        msg = email.message_from_bytes(m)
3883        self.assertEqual(msg.get_payload(decode=True),
3884                         'pöstal'.encode('utf-8'))
3885        self.assertIsInstance(msg.defects[0],
3886                              errors.InvalidBase64CharactersDefect)
3887
3888    def test_8bit_in_uuencode_body(self):
3889        # Sticking an 8bit byte in a uuencode block makes it undecodable by
3890        # normal means, so the block is returned undecoded, but as bytes.
3891        m = self.bodytest_msg.format(charset='utf-8',
3892                                     cte='uuencode',
3893                                     bodyline='<,.V<W1A; á ').encode('utf-8')
3894        msg = email.message_from_bytes(m)
3895        self.assertEqual(msg.get_payload(decode=True),
3896                         '<,.V<W1A; á \n'.encode('utf-8'))
3897
3898
3899    headertest_headers = (
3900        ('From: foo@bar.com', ('From', 'foo@bar.com')),
3901        ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3902        ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3903            '\tJean de Baddie',
3904            ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3905                'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3906                ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3907        ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3908        )
3909    headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3910        '\nYes, they are flying.\n').encode('utf-8')
3911
3912    def test_get_8bit_header(self):
3913        msg = email.message_from_bytes(self.headertest_msg)
3914        self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3915        self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
3916
3917    def test_print_8bit_headers(self):
3918        msg = email.message_from_bytes(self.headertest_msg)
3919        self.assertEqual(str(msg),
3920                         textwrap.dedent("""\
3921                            From: {}
3922                            To: {}
3923                            Subject: {}
3924                            From: {}
3925
3926                            Yes, they are flying.
3927                            """).format(*[expected[1] for (_, expected) in
3928                                        self.headertest_headers]))
3929
3930    def test_values_with_8bit_headers(self):
3931        msg = email.message_from_bytes(self.headertest_msg)
3932        self.assertListEqual([str(x) for x in msg.values()],
3933                              ['foo@bar.com',
3934                               'b\uFFFD\uFFFDz',
3935                               'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3936                                   'coll\uFFFD\uFFFDgue, le pouf '
3937                                   'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3938                                   '\tJean de Baddie',
3939                               "g\uFFFD\uFFFDst"])
3940
3941    def test_items_with_8bit_headers(self):
3942        msg = email.message_from_bytes(self.headertest_msg)
3943        self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
3944                              [('From', 'foo@bar.com'),
3945                               ('To', 'b\uFFFD\uFFFDz'),
3946                               ('Subject', 'Maintenant je vous '
3947                                  'pr\uFFFD\uFFFDsente '
3948                                  'mon coll\uFFFD\uFFFDgue, le pouf '
3949                                  'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3950                                  '\tJean de Baddie'),
3951                               ('From', 'g\uFFFD\uFFFDst')])
3952
3953    def test_get_all_with_8bit_headers(self):
3954        msg = email.message_from_bytes(self.headertest_msg)
3955        self.assertListEqual([str(x) for x in msg.get_all('from')],
3956                              ['foo@bar.com',
3957                               'g\uFFFD\uFFFDst'])
3958
3959    def test_get_content_type_with_8bit(self):
3960        msg = email.message_from_bytes(textwrap.dedent("""\
3961            Content-Type: text/pl\xA7in; charset=utf-8
3962            """).encode('latin-1'))
3963        self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3964        self.assertEqual(msg.get_content_maintype(), "text")
3965        self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3966
3967    # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
3968    def test_get_params_with_8bit(self):
3969        msg = email.message_from_bytes(
3970            'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3971        self.assertEqual(msg.get_params(header='x-header'),
3972           [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3973        self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3974        # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3975        self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3976
3977    # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
3978    def test_get_rfc2231_params_with_8bit(self):
3979        msg = email.message_from_bytes(textwrap.dedent("""\
3980            Content-Type: text/plain; charset=us-ascii;
3981             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3982             ).encode('latin-1'))
3983        self.assertEqual(msg.get_param('title'),
3984            ('us-ascii', 'en', 'This is not f\uFFFDn'))
3985
3986    def test_set_rfc2231_params_with_8bit(self):
3987        msg = email.message_from_bytes(textwrap.dedent("""\
3988            Content-Type: text/plain; charset=us-ascii;
3989             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3990             ).encode('latin-1'))
3991        msg.set_param('title', 'test')
3992        self.assertEqual(msg.get_param('title'), 'test')
3993
3994    def test_del_rfc2231_params_with_8bit(self):
3995        msg = email.message_from_bytes(textwrap.dedent("""\
3996            Content-Type: text/plain; charset=us-ascii;
3997             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3998             ).encode('latin-1'))
3999        msg.del_param('title')
4000        self.assertEqual(msg.get_param('title'), None)
4001        self.assertEqual(msg.get_content_maintype(), 'text')
4002
4003    def test_get_payload_with_8bit_cte_header(self):
4004        msg = email.message_from_bytes(textwrap.dedent("""\
4005            Content-Transfer-Encoding: b\xa7se64
4006            Content-Type: text/plain; charset=latin-1
4007
4008            payload
4009            """).encode('latin-1'))
4010        self.assertEqual(msg.get_payload(), 'payload\n')
4011        self.assertEqual(msg.get_payload(decode=True), b'payload\n')
4012
4013    non_latin_bin_msg = textwrap.dedent("""\
4014        From: foo@bar.com
4015        To: báz
4016        Subject: Maintenant je vous présente mon collègue, le pouf célèbre
4017        \tJean de Baddie
4018        Mime-Version: 1.0
4019        Content-Type: text/plain; charset="utf-8"
4020        Content-Transfer-Encoding: 8bit
4021
4022        Да, они летят.
4023        """).encode('utf-8')
4024
4025    def test_bytes_generator(self):
4026        msg = email.message_from_bytes(self.non_latin_bin_msg)
4027        out = BytesIO()
4028        email.generator.BytesGenerator(out).flatten(msg)
4029        self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
4030
4031    def test_bytes_generator_handles_None_body(self):
4032        #Issue 11019
4033        msg = email.message.Message()
4034        out = BytesIO()
4035        email.generator.BytesGenerator(out).flatten(msg)
4036        self.assertEqual(out.getvalue(), b"\n")
4037
4038    non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
4039        From: foo@bar.com
4040        To: =?unknown-8bit?q?b=C3=A1z?=
4041        Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
4042         =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
4043         =?unknown-8bit?q?_Jean_de_Baddie?=
4044        Mime-Version: 1.0
4045        Content-Type: text/plain; charset="utf-8"
4046        Content-Transfer-Encoding: base64
4047
4048        0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
4049        """)
4050
4051    def test_generator_handles_8bit(self):
4052        msg = email.message_from_bytes(self.non_latin_bin_msg)
4053        out = StringIO()
4054        email.generator.Generator(out).flatten(msg)
4055        self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
4056
4057    def test_str_generator_should_not_mutate_msg_when_handling_8bit(self):
4058        msg = email.message_from_bytes(self.non_latin_bin_msg)
4059        out = BytesIO()
4060        BytesGenerator(out).flatten(msg)
4061        orig_value = out.getvalue()
4062        Generator(StringIO()).flatten(msg) # Should not mutate msg!
4063        out = BytesIO()
4064        BytesGenerator(out).flatten(msg)
4065        self.assertEqual(out.getvalue(), orig_value)
4066
4067    def test_bytes_generator_with_unix_from(self):
4068        # The unixfrom contains a current date, so we can't check it
4069        # literally.  Just make sure the first word is 'From' and the
4070        # rest of the message matches the input.
4071        msg = email.message_from_bytes(self.non_latin_bin_msg)
4072        out = BytesIO()
4073        email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
4074        lines = out.getvalue().split(b'\n')
4075        self.assertEqual(lines[0].split()[0], b'From')
4076        self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
4077
4078    non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
4079    non_latin_bin_msg_as7bit[2:4] = [
4080        'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
4081         'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
4082    non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
4083
4084    def test_message_from_binary_file(self):
4085        fn = 'test.msg'
4086        self.addCleanup(unlink, fn)
4087        with open(fn, 'wb') as testfile:
4088            testfile.write(self.non_latin_bin_msg)
4089        with open(fn, 'rb') as testfile:
4090            m = email.parser.BytesParser().parse(testfile)
4091        self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
4092
4093    latin_bin_msg = textwrap.dedent("""\
4094        From: foo@bar.com
4095        To: Dinsdale
4096        Subject: Nudge nudge, wink, wink
4097        Mime-Version: 1.0
4098        Content-Type: text/plain; charset="latin-1"
4099        Content-Transfer-Encoding: 8bit
4100
4101        oh là là, know what I mean, know what I mean?
4102        """).encode('latin-1')
4103
4104    latin_bin_msg_as7bit = textwrap.dedent("""\
4105        From: foo@bar.com
4106        To: Dinsdale
4107        Subject: Nudge nudge, wink, wink
4108        Mime-Version: 1.0
4109        Content-Type: text/plain; charset="iso-8859-1"
4110        Content-Transfer-Encoding: quoted-printable
4111
4112        oh l=E0 l=E0, know what I mean, know what I mean?
4113        """)
4114
4115    def test_string_generator_reencodes_to_quopri_when_appropriate(self):
4116        m = email.message_from_bytes(self.latin_bin_msg)
4117        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4118
4119    def test_decoded_generator_emits_unicode_body(self):
4120        m = email.message_from_bytes(self.latin_bin_msg)
4121        out = StringIO()
4122        email.generator.DecodedGenerator(out).flatten(m)
4123        #DecodedHeader output contains an extra blank line compared
4124        #to the input message.  RDM: not sure if this is a bug or not,
4125        #but it is not specific to the 8bit->7bit conversion.
4126        self.assertEqual(out.getvalue(),
4127            self.latin_bin_msg.decode('latin-1')+'\n')
4128
4129    def test_bytes_feedparser(self):
4130        bfp = email.feedparser.BytesFeedParser()
4131        for i in range(0, len(self.latin_bin_msg), 10):
4132            bfp.feed(self.latin_bin_msg[i:i+10])
4133        m = bfp.close()
4134        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4135
4136    def test_crlf_flatten(self):
4137        with openfile('msg_26.txt', 'rb') as fp:
4138            text = fp.read()
4139        msg = email.message_from_bytes(text)
4140        s = BytesIO()
4141        g = email.generator.BytesGenerator(s)
4142        g.flatten(msg, linesep='\r\n')
4143        self.assertEqual(s.getvalue(), text)
4144
4145    def test_8bit_multipart(self):
4146        # Issue 11605
4147        source = textwrap.dedent("""\
4148            Date: Fri, 18 Mar 2011 17:15:43 +0100
4149            To: foo@example.com
4150            From: foodwatch-Newsletter <bar@example.com>
4151            Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
4152            Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
4153            MIME-Version: 1.0
4154            Content-Type: multipart/alternative;
4155                    boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
4156
4157            --b1_76a486bee62b0d200f33dc2ca08220ad
4158            Content-Type: text/plain; charset="utf-8"
4159            Content-Transfer-Encoding: 8bit
4160
4161            Guten Tag, ,
4162
4163            mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
4164            Nachrichten aus Japan.
4165
4166
4167            --b1_76a486bee62b0d200f33dc2ca08220ad
4168            Content-Type: text/html; charset="utf-8"
4169            Content-Transfer-Encoding: 8bit
4170
4171            <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
4172                "http://www.w3.org/TR/html4/loose.dtd">
4173            <html lang="de">
4174            <head>
4175                    <title>foodwatch - Newsletter</title>
4176            </head>
4177            <body>
4178              <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
4179                 die Nachrichten aus Japan.</p>
4180            </body>
4181            </html>
4182            --b1_76a486bee62b0d200f33dc2ca08220ad--
4183
4184            """).encode('utf-8')
4185        msg = email.message_from_bytes(source)
4186        s = BytesIO()
4187        g = email.generator.BytesGenerator(s)
4188        g.flatten(msg)
4189        self.assertEqual(s.getvalue(), source)
4190
4191    def test_bytes_generator_b_encoding_linesep(self):
4192        # Issue 14062: b encoding was tacking on an extra \n.
4193        m = Message()
4194        # This has enough non-ascii that it should always end up b encoded.
4195        m['Subject'] = Header('žluťoučký kůň')
4196        s = BytesIO()
4197        g = email.generator.BytesGenerator(s)
4198        g.flatten(m, linesep='\r\n')
4199        self.assertEqual(
4200            s.getvalue(),
4201            b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4202
4203    def test_generator_b_encoding_linesep(self):
4204        # Since this broke in ByteGenerator, test Generator for completeness.
4205        m = Message()
4206        # This has enough non-ascii that it should always end up b encoded.
4207        m['Subject'] = Header('žluťoučký kůň')
4208        s = StringIO()
4209        g = email.generator.Generator(s)
4210        g.flatten(m, linesep='\r\n')
4211        self.assertEqual(
4212            s.getvalue(),
4213            'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4214
4215    maxDiff = None
4216
4217
4218class BaseTestBytesGeneratorIdempotent:
4219
4220    maxDiff = None
4221
4222    def _msgobj(self, filename):
4223        with openfile(filename, 'rb') as fp:
4224            data = fp.read()
4225        data = self.normalize_linesep_regex.sub(self.blinesep, data)
4226        msg = email.message_from_bytes(data)
4227        return msg, data
4228
4229    def _idempotent(self, msg, data, unixfrom=False):
4230        b = BytesIO()
4231        g = email.generator.BytesGenerator(b, maxheaderlen=0)
4232        g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
4233        self.assertEqual(data, b.getvalue())
4234
4235
4236class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
4237                                    TestIdempotent):
4238    linesep = '\n'
4239    blinesep = b'\n'
4240    normalize_linesep_regex = re.compile(br'\r\n')
4241
4242
4243class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
4244                                       TestIdempotent):
4245    linesep = '\r\n'
4246    blinesep = b'\r\n'
4247    normalize_linesep_regex = re.compile(br'(?<!\r)\n')
4248
4249
4250class TestBase64(unittest.TestCase):
4251    def test_len(self):
4252        eq = self.assertEqual
4253        eq(base64mime.header_length('hello'),
4254           len(base64mime.body_encode(b'hello', eol='')))
4255        for size in range(15):
4256            if   size == 0 : bsize = 0
4257            elif size <= 3 : bsize = 4
4258            elif size <= 6 : bsize = 8
4259            elif size <= 9 : bsize = 12
4260            elif size <= 12: bsize = 16
4261            else           : bsize = 20
4262            eq(base64mime.header_length('x' * size), bsize)
4263
4264    def test_decode(self):
4265        eq = self.assertEqual
4266        eq(base64mime.decode(''), b'')
4267        eq(base64mime.decode('aGVsbG8='), b'hello')
4268
4269    def test_encode(self):
4270        eq = self.assertEqual
4271        eq(base64mime.body_encode(b''), b'')
4272        eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
4273        # Test the binary flag
4274        eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
4275        # Test the maxlinelen arg
4276        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
4277eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4278eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4279eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4280eHh4eCB4eHh4IA==
4281""")
4282        # Test the eol argument
4283        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4284           """\
4285eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4286eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4287eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4288eHh4eCB4eHh4IA==\r
4289""")
4290
4291    def test_header_encode(self):
4292        eq = self.assertEqual
4293        he = base64mime.header_encode
4294        eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
4295        eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
4296        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4297        # Test the charset option
4298        eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
4299        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4300
4301
4302
4303class TestQuopri(unittest.TestCase):
4304    def setUp(self):
4305        # Set of characters (as byte integers) that don't need to be encoded
4306        # in headers.
4307        self.hlit = list(chain(
4308            range(ord('a'), ord('z') + 1),
4309            range(ord('A'), ord('Z') + 1),
4310            range(ord('0'), ord('9') + 1),
4311            (c for c in b'!*+-/')))
4312        # Set of characters (as byte integers) that do need to be encoded in
4313        # headers.
4314        self.hnon = [c for c in range(256) if c not in self.hlit]
4315        assert len(self.hlit) + len(self.hnon) == 256
4316        # Set of characters (as byte integers) that don't need to be encoded
4317        # in bodies.
4318        self.blit = list(range(ord(' '), ord('~') + 1))
4319        self.blit.append(ord('\t'))
4320        self.blit.remove(ord('='))
4321        # Set of characters (as byte integers) that do need to be encoded in
4322        # bodies.
4323        self.bnon = [c for c in range(256) if c not in self.blit]
4324        assert len(self.blit) + len(self.bnon) == 256
4325
4326    def test_quopri_header_check(self):
4327        for c in self.hlit:
4328            self.assertFalse(quoprimime.header_check(c),
4329                        'Should not be header quopri encoded: %s' % chr(c))
4330        for c in self.hnon:
4331            self.assertTrue(quoprimime.header_check(c),
4332                            'Should be header quopri encoded: %s' % chr(c))
4333
4334    def test_quopri_body_check(self):
4335        for c in self.blit:
4336            self.assertFalse(quoprimime.body_check(c),
4337                        'Should not be body quopri encoded: %s' % chr(c))
4338        for c in self.bnon:
4339            self.assertTrue(quoprimime.body_check(c),
4340                            'Should be body quopri encoded: %s' % chr(c))
4341
4342    def test_header_quopri_len(self):
4343        eq = self.assertEqual
4344        eq(quoprimime.header_length(b'hello'), 5)
4345        # RFC 2047 chrome is not included in header_length().
4346        eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
4347           quoprimime.header_length(b'hello') +
4348           # =?xxx?q?...?= means 10 extra characters
4349           10)
4350        eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4351        # RFC 2047 chrome is not included in header_length().
4352        eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
4353           quoprimime.header_length(b'h@e@l@l@o@') +
4354           # =?xxx?q?...?= means 10 extra characters
4355           10)
4356        for c in self.hlit:
4357            eq(quoprimime.header_length(bytes([c])), 1,
4358               'expected length 1 for %r' % chr(c))
4359        for c in self.hnon:
4360            # Space is special; it's encoded to _
4361            if c == ord(' '):
4362                continue
4363            eq(quoprimime.header_length(bytes([c])), 3,
4364               'expected length 3 for %r' % chr(c))
4365        eq(quoprimime.header_length(b' '), 1)
4366
4367    def test_body_quopri_len(self):
4368        eq = self.assertEqual
4369        for c in self.blit:
4370            eq(quoprimime.body_length(bytes([c])), 1)
4371        for c in self.bnon:
4372            eq(quoprimime.body_length(bytes([c])), 3)
4373
4374    def test_quote_unquote_idempotent(self):
4375        for x in range(256):
4376            c = chr(x)
4377            self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4378
4379    def _test_header_encode(self, header, expected_encoded_header, charset=None):
4380        if charset is None:
4381            encoded_header = quoprimime.header_encode(header)
4382        else:
4383            encoded_header = quoprimime.header_encode(header, charset)
4384        self.assertEqual(encoded_header, expected_encoded_header)
4385
4386    def test_header_encode_null(self):
4387        self._test_header_encode(b'', '')
4388
4389    def test_header_encode_one_word(self):
4390        self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4391
4392    def test_header_encode_two_lines(self):
4393        self._test_header_encode(b'hello\nworld',
4394                                '=?iso-8859-1?q?hello=0Aworld?=')
4395
4396    def test_header_encode_non_ascii(self):
4397        self._test_header_encode(b'hello\xc7there',
4398                                '=?iso-8859-1?q?hello=C7there?=')
4399
4400    def test_header_encode_alt_charset(self):
4401        self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4402                charset='iso-8859-2')
4403
4404    def _test_header_decode(self, encoded_header, expected_decoded_header):
4405        decoded_header = quoprimime.header_decode(encoded_header)
4406        self.assertEqual(decoded_header, expected_decoded_header)
4407
4408    def test_header_decode_null(self):
4409        self._test_header_decode('', '')
4410
4411    def test_header_decode_one_word(self):
4412        self._test_header_decode('hello', 'hello')
4413
4414    def test_header_decode_two_lines(self):
4415        self._test_header_decode('hello=0Aworld', 'hello\nworld')
4416
4417    def test_header_decode_non_ascii(self):
4418        self._test_header_decode('hello=C7there', 'hello\xc7there')
4419
4420    def test_header_decode_re_bug_18380(self):
4421        # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4422        self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4423
4424    def _test_decode(self, encoded, expected_decoded, eol=None):
4425        if eol is None:
4426            decoded = quoprimime.decode(encoded)
4427        else:
4428            decoded = quoprimime.decode(encoded, eol=eol)
4429        self.assertEqual(decoded, expected_decoded)
4430
4431    def test_decode_null_word(self):
4432        self._test_decode('', '')
4433
4434    def test_decode_null_line_null_word(self):
4435        self._test_decode('\r\n', '\n')
4436
4437    def test_decode_one_word(self):
4438        self._test_decode('hello', 'hello')
4439
4440    def test_decode_one_word_eol(self):
4441        self._test_decode('hello', 'hello', eol='X')
4442
4443    def test_decode_one_line(self):
4444        self._test_decode('hello\r\n', 'hello\n')
4445
4446    def test_decode_one_line_lf(self):
4447        self._test_decode('hello\n', 'hello\n')
4448
4449    def test_decode_one_line_cr(self):
4450        self._test_decode('hello\r', 'hello\n')
4451
4452    def test_decode_one_line_nl(self):
4453        self._test_decode('hello\n', 'helloX', eol='X')
4454
4455    def test_decode_one_line_crnl(self):
4456        self._test_decode('hello\r\n', 'helloX', eol='X')
4457
4458    def test_decode_one_line_one_word(self):
4459        self._test_decode('hello\r\nworld', 'hello\nworld')
4460
4461    def test_decode_one_line_one_word_eol(self):
4462        self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4463
4464    def test_decode_two_lines(self):
4465        self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4466
4467    def test_decode_two_lines_eol(self):
4468        self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4469
4470    def test_decode_one_long_line(self):
4471        self._test_decode('Spam' * 250, 'Spam' * 250)
4472
4473    def test_decode_one_space(self):
4474        self._test_decode(' ', '')
4475
4476    def test_decode_multiple_spaces(self):
4477        self._test_decode(' ' * 5, '')
4478
4479    def test_decode_one_line_trailing_spaces(self):
4480        self._test_decode('hello    \r\n', 'hello\n')
4481
4482    def test_decode_two_lines_trailing_spaces(self):
4483        self._test_decode('hello    \r\nworld   \r\n', 'hello\nworld\n')
4484
4485    def test_decode_quoted_word(self):
4486        self._test_decode('=22quoted=20words=22', '"quoted words"')
4487
4488    def test_decode_uppercase_quoting(self):
4489        self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4490
4491    def test_decode_lowercase_quoting(self):
4492        self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4493
4494    def test_decode_soft_line_break(self):
4495        self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4496
4497    def test_decode_false_quoting(self):
4498        self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4499
4500    def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4501        kwargs = {}
4502        if maxlinelen is None:
4503            # Use body_encode's default.
4504            maxlinelen = 76
4505        else:
4506            kwargs['maxlinelen'] = maxlinelen
4507        if eol is None:
4508            # Use body_encode's default.
4509            eol = '\n'
4510        else:
4511            kwargs['eol'] = eol
4512        encoded_body = quoprimime.body_encode(body, **kwargs)
4513        self.assertEqual(encoded_body, expected_encoded_body)
4514        if eol == '\n' or eol == '\r\n':
4515            # We know how to split the result back into lines, so maxlinelen
4516            # can be checked.
4517            for line in encoded_body.splitlines():
4518                self.assertLessEqual(len(line), maxlinelen)
4519
4520    def test_encode_null(self):
4521        self._test_encode('', '')
4522
4523    def test_encode_null_lines(self):
4524        self._test_encode('\n\n', '\n\n')
4525
4526    def test_encode_one_line(self):
4527        self._test_encode('hello\n', 'hello\n')
4528
4529    def test_encode_one_line_crlf(self):
4530        self._test_encode('hello\r\n', 'hello\n')
4531
4532    def test_encode_one_line_eol(self):
4533        self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4534
4535    def test_encode_one_line_eol_after_non_ascii(self):
4536        # issue 20206; see changeset 0cf700464177 for why the encode/decode.
4537        self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
4538                          'hello=CF=85\r\n', eol='\r\n')
4539
4540    def test_encode_one_space(self):
4541        self._test_encode(' ', '=20')
4542
4543    def test_encode_one_line_one_space(self):
4544        self._test_encode(' \n', '=20\n')
4545
4546# XXX: body_encode() expect strings, but uses ord(char) from these strings
4547# to index into a 256-entry list.  For code points above 255, this will fail.
4548# Should there be a check for 8-bit only ord() values in body, or at least
4549# a comment about the expected input?
4550
4551    def test_encode_two_lines_one_space(self):
4552        self._test_encode(' \n \n', '=20\n=20\n')
4553
4554    def test_encode_one_word_trailing_spaces(self):
4555        self._test_encode('hello   ', 'hello  =20')
4556
4557    def test_encode_one_line_trailing_spaces(self):
4558        self._test_encode('hello   \n', 'hello  =20\n')
4559
4560    def test_encode_one_word_trailing_tab(self):
4561        self._test_encode('hello  \t', 'hello  =09')
4562
4563    def test_encode_one_line_trailing_tab(self):
4564        self._test_encode('hello  \t\n', 'hello  =09\n')
4565
4566    def test_encode_trailing_space_before_maxlinelen(self):
4567        self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4568
4569    def test_encode_trailing_space_at_maxlinelen(self):
4570        self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4571
4572    def test_encode_trailing_space_beyond_maxlinelen(self):
4573        self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4574
4575    def test_encode_whitespace_lines(self):
4576        self._test_encode(' \n' * 5, '=20\n' * 5)
4577
4578    def test_encode_quoted_equals(self):
4579        self._test_encode('a = b', 'a =3D b')
4580
4581    def test_encode_one_long_string(self):
4582        self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4583
4584    def test_encode_one_long_line(self):
4585        self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4586
4587    def test_encode_one_very_long_line(self):
4588        self._test_encode('x' * 200 + '\n',
4589                2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4590
4591    def test_encode_shortest_maxlinelen(self):
4592        self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
4593
4594    def test_encode_maxlinelen_too_small(self):
4595        self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4596
4597    def test_encode(self):
4598        eq = self.assertEqual
4599        eq(quoprimime.body_encode(''), '')
4600        eq(quoprimime.body_encode('hello'), 'hello')
4601        # Test the binary flag
4602        eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
4603        # Test the maxlinelen arg
4604        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
4605xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4606 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4607x xxxx xxxx xxxx xxxx=20""")
4608        # Test the eol argument
4609        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4610           """\
4611xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4612 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4613x xxxx xxxx xxxx xxxx=20""")
4614        eq(quoprimime.body_encode("""\
4615one line
4616
4617two line"""), """\
4618one line
4619
4620two line""")
4621
4622
4623
4624# Test the Charset class
4625class TestCharset(unittest.TestCase):
4626    def tearDown(self):
4627        from email import charset as CharsetModule
4628        try:
4629            del CharsetModule.CHARSETS['fake']
4630        except KeyError:
4631            pass
4632
4633    def test_codec_encodeable(self):
4634        eq = self.assertEqual
4635        # Make sure us-ascii = no Unicode conversion
4636        c = Charset('us-ascii')
4637        eq(c.header_encode('Hello World!'), 'Hello World!')
4638        # Test 8-bit idempotency with us-ascii
4639        s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
4640        self.assertRaises(UnicodeError, c.header_encode, s)
4641        c = Charset('utf-8')
4642        eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
4643
4644    def test_body_encode(self):
4645        eq = self.assertEqual
4646        # Try a charset with QP body encoding
4647        c = Charset('iso-8859-1')
4648        eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
4649        # Try a charset with Base64 body encoding
4650        c = Charset('utf-8')
4651        eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
4652        # Try a charset with None body encoding
4653        c = Charset('us-ascii')
4654        eq('hello world', c.body_encode('hello world'))
4655        # Try the convert argument, where input codec != output codec
4656        c = Charset('euc-jp')
4657        # With apologies to Tokio Kikuchi ;)
4658        # XXX FIXME
4659##         try:
4660##             eq('\x1b$B5FCO;~IW\x1b(B',
4661##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4662##             eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4663##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4664##         except LookupError:
4665##             # We probably don't have the Japanese codecs installed
4666##             pass
4667        # Testing SF bug #625509, which we have to fake, since there are no
4668        # built-in encodings where the header encoding is QP but the body
4669        # encoding is not.
4670        from email import charset as CharsetModule
4671        CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
4672        c = Charset('fake')
4673        eq('hello world', c.body_encode('hello world'))
4674
4675    def test_unicode_charset_name(self):
4676        charset = Charset('us-ascii')
4677        self.assertEqual(str(charset), 'us-ascii')
4678        self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4679
4680
4681
4682# Test multilingual MIME headers.
4683class TestHeader(TestEmailBase):
4684    def test_simple(self):
4685        eq = self.ndiffAssertEqual
4686        h = Header('Hello World!')
4687        eq(h.encode(), 'Hello World!')
4688        h.append(' Goodbye World!')
4689        eq(h.encode(), 'Hello World!  Goodbye World!')
4690
4691    def test_simple_surprise(self):
4692        eq = self.ndiffAssertEqual
4693        h = Header('Hello World!')
4694        eq(h.encode(), 'Hello World!')
4695        h.append('Goodbye World!')
4696        eq(h.encode(), 'Hello World! Goodbye World!')
4697
4698    def test_header_needs_no_decoding(self):
4699        h = 'no decoding needed'
4700        self.assertEqual(decode_header(h), [(h, None)])
4701
4702    def test_long(self):
4703        h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4704                   maxlinelen=76)
4705        for l in h.encode(splitchars=' ').split('\n '):
4706            self.assertLessEqual(len(l), 76)
4707
4708    def test_multilingual(self):
4709        eq = self.ndiffAssertEqual
4710        g = Charset("iso-8859-1")
4711        cz = Charset("iso-8859-2")
4712        utf8 = Charset("utf-8")
4713        g_head = (b'Die Mieter treten hier ein werden mit einem '
4714                  b'Foerderband komfortabel den Korridor entlang, '
4715                  b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4716                  b'gegen die rotierenden Klingen bef\xf6rdert. ')
4717        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4718                   b'd\xf9vtipu.. ')
4719        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4720                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4721                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4722                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4723                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4724                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4725                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4726                     '\u3044\u307e\u3059\u3002')
4727        h = Header(g_head, g)
4728        h.append(cz_head, cz)
4729        h.append(utf8_head, utf8)
4730        enc = h.encode(maxlinelen=76)
4731        eq(enc, """\
4732=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4733 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4734 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4735 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
4736 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4737 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4738 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4739 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
4740 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4741 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4742 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4743        decoded = decode_header(enc)
4744        eq(len(decoded), 3)
4745        eq(decoded[0], (g_head, 'iso-8859-1'))
4746        eq(decoded[1], (cz_head, 'iso-8859-2'))
4747        eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
4748        ustr = str(h)
4749        eq(ustr,
4750           (b'Die Mieter treten hier ein werden mit einem Foerderband '
4751            b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4752            b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4753            b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4754            b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4755            b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4756            b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4757            b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4758            b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4759            b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4760            b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4761            b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4762            b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4763            b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4764            b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4765            b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4766            ).decode('utf-8'))
4767        # Test make_header()
4768        newh = make_header(decode_header(enc))
4769        eq(newh, h)
4770
4771    def test_empty_header_encode(self):
4772        h = Header()
4773        self.assertEqual(h.encode(), '')
4774
4775    def test_header_ctor_default_args(self):
4776        eq = self.ndiffAssertEqual
4777        h = Header()
4778        eq(h, '')
4779        h.append('foo', Charset('iso-8859-1'))
4780        eq(h, 'foo')
4781
4782    def test_explicit_maxlinelen(self):
4783        eq = self.ndiffAssertEqual
4784        hstr = ('A very long line that must get split to something other '
4785                'than at the 76th character boundary to test the non-default '
4786                'behavior')
4787        h = Header(hstr)
4788        eq(h.encode(), '''\
4789A very long line that must get split to something other than at the 76th
4790 character boundary to test the non-default behavior''')
4791        eq(str(h), hstr)
4792        h = Header(hstr, header_name='Subject')
4793        eq(h.encode(), '''\
4794A very long line that must get split to something other than at the
4795 76th character boundary to test the non-default behavior''')
4796        eq(str(h), hstr)
4797        h = Header(hstr, maxlinelen=1024, header_name='Subject')
4798        eq(h.encode(), hstr)
4799        eq(str(h), hstr)
4800
4801    def test_quopri_splittable(self):
4802        eq = self.ndiffAssertEqual
4803        h = Header(charset='iso-8859-1', maxlinelen=20)
4804        x = 'xxxx ' * 20
4805        h.append(x)
4806        s = h.encode()
4807        eq(s, """\
4808=?iso-8859-1?q?xxx?=
4809 =?iso-8859-1?q?x_?=
4810 =?iso-8859-1?q?xx?=
4811 =?iso-8859-1?q?xx?=
4812 =?iso-8859-1?q?_x?=
4813 =?iso-8859-1?q?xx?=
4814 =?iso-8859-1?q?x_?=
4815 =?iso-8859-1?q?xx?=
4816 =?iso-8859-1?q?xx?=
4817 =?iso-8859-1?q?_x?=
4818 =?iso-8859-1?q?xx?=
4819 =?iso-8859-1?q?x_?=
4820 =?iso-8859-1?q?xx?=
4821 =?iso-8859-1?q?xx?=
4822 =?iso-8859-1?q?_x?=
4823 =?iso-8859-1?q?xx?=
4824 =?iso-8859-1?q?x_?=
4825 =?iso-8859-1?q?xx?=
4826 =?iso-8859-1?q?xx?=
4827 =?iso-8859-1?q?_x?=
4828 =?iso-8859-1?q?xx?=
4829 =?iso-8859-1?q?x_?=
4830 =?iso-8859-1?q?xx?=
4831 =?iso-8859-1?q?xx?=
4832 =?iso-8859-1?q?_x?=
4833 =?iso-8859-1?q?xx?=
4834 =?iso-8859-1?q?x_?=
4835 =?iso-8859-1?q?xx?=
4836 =?iso-8859-1?q?xx?=
4837 =?iso-8859-1?q?_x?=
4838 =?iso-8859-1?q?xx?=
4839 =?iso-8859-1?q?x_?=
4840 =?iso-8859-1?q?xx?=
4841 =?iso-8859-1?q?xx?=
4842 =?iso-8859-1?q?_x?=
4843 =?iso-8859-1?q?xx?=
4844 =?iso-8859-1?q?x_?=
4845 =?iso-8859-1?q?xx?=
4846 =?iso-8859-1?q?xx?=
4847 =?iso-8859-1?q?_x?=
4848 =?iso-8859-1?q?xx?=
4849 =?iso-8859-1?q?x_?=
4850 =?iso-8859-1?q?xx?=
4851 =?iso-8859-1?q?xx?=
4852 =?iso-8859-1?q?_x?=
4853 =?iso-8859-1?q?xx?=
4854 =?iso-8859-1?q?x_?=
4855 =?iso-8859-1?q?xx?=
4856 =?iso-8859-1?q?xx?=
4857 =?iso-8859-1?q?_?=""")
4858        eq(x, str(make_header(decode_header(s))))
4859        h = Header(charset='iso-8859-1', maxlinelen=40)
4860        h.append('xxxx ' * 20)
4861        s = h.encode()
4862        eq(s, """\
4863=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4864 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4865 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4866 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4867 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4868        eq(x, str(make_header(decode_header(s))))
4869
4870    def test_base64_splittable(self):
4871        eq = self.ndiffAssertEqual
4872        h = Header(charset='koi8-r', maxlinelen=20)
4873        x = 'xxxx ' * 20
4874        h.append(x)
4875        s = h.encode()
4876        eq(s, """\
4877=?koi8-r?b?eHh4?=
4878 =?koi8-r?b?eCB4?=
4879 =?koi8-r?b?eHh4?=
4880 =?koi8-r?b?IHh4?=
4881 =?koi8-r?b?eHgg?=
4882 =?koi8-r?b?eHh4?=
4883 =?koi8-r?b?eCB4?=
4884 =?koi8-r?b?eHh4?=
4885 =?koi8-r?b?IHh4?=
4886 =?koi8-r?b?eHgg?=
4887 =?koi8-r?b?eHh4?=
4888 =?koi8-r?b?eCB4?=
4889 =?koi8-r?b?eHh4?=
4890 =?koi8-r?b?IHh4?=
4891 =?koi8-r?b?eHgg?=
4892 =?koi8-r?b?eHh4?=
4893 =?koi8-r?b?eCB4?=
4894 =?koi8-r?b?eHh4?=
4895 =?koi8-r?b?IHh4?=
4896 =?koi8-r?b?eHgg?=
4897 =?koi8-r?b?eHh4?=
4898 =?koi8-r?b?eCB4?=
4899 =?koi8-r?b?eHh4?=
4900 =?koi8-r?b?IHh4?=
4901 =?koi8-r?b?eHgg?=
4902 =?koi8-r?b?eHh4?=
4903 =?koi8-r?b?eCB4?=
4904 =?koi8-r?b?eHh4?=
4905 =?koi8-r?b?IHh4?=
4906 =?koi8-r?b?eHgg?=
4907 =?koi8-r?b?eHh4?=
4908 =?koi8-r?b?eCB4?=
4909 =?koi8-r?b?eHh4?=
4910 =?koi8-r?b?IA==?=""")
4911        eq(x, str(make_header(decode_header(s))))
4912        h = Header(charset='koi8-r', maxlinelen=40)
4913        h.append(x)
4914        s = h.encode()
4915        eq(s, """\
4916=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4917 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4918 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4919 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4920 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4921 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4922        eq(x, str(make_header(decode_header(s))))
4923
4924    def test_us_ascii_header(self):
4925        eq = self.assertEqual
4926        s = 'hello'
4927        x = decode_header(s)
4928        eq(x, [('hello', None)])
4929        h = make_header(x)
4930        eq(s, h.encode())
4931
4932    def test_string_charset(self):
4933        eq = self.assertEqual
4934        h = Header()
4935        h.append('hello', 'iso-8859-1')
4936        eq(h, 'hello')
4937
4938##    def test_unicode_error(self):
4939##        raises = self.assertRaises
4940##        raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4941##        raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4942##        h = Header()
4943##        raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4944##        raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4945##        raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4946
4947    def test_utf8_shortest(self):
4948        eq = self.assertEqual
4949        h = Header('p\xf6stal', 'utf-8')
4950        eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4951        h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4952        eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4953
4954    def test_bad_8bit_header(self):
4955        raises = self.assertRaises
4956        eq = self.assertEqual
4957        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4958        raises(UnicodeError, Header, x)
4959        h = Header()
4960        raises(UnicodeError, h.append, x)
4961        e = x.decode('utf-8', 'replace')
4962        eq(str(Header(x, errors='replace')), e)
4963        h.append(x, errors='replace')
4964        eq(str(h), e)
4965
4966    def test_escaped_8bit_header(self):
4967        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4968        e = x.decode('ascii', 'surrogateescape')
4969        h = Header(e, charset=email.charset.UNKNOWN8BIT)
4970        self.assertEqual(str(h),
4971                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4972        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4973
4974    def test_header_handles_binary_unknown8bit(self):
4975        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4976        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4977        self.assertEqual(str(h),
4978                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4979        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4980
4981    def test_make_header_handles_binary_unknown8bit(self):
4982        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4983        h = Header(x, charset=email.charset.UNKNOWN8BIT)
4984        h2 = email.header.make_header(email.header.decode_header(h))
4985        self.assertEqual(str(h2),
4986                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4987        self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4988
4989    def test_modify_returned_list_does_not_change_header(self):
4990        h = Header('test')
4991        chunks = email.header.decode_header(h)
4992        chunks.append(('ascii', 'test2'))
4993        self.assertEqual(str(h), 'test')
4994
4995    def test_encoded_adjacent_nonencoded(self):
4996        eq = self.assertEqual
4997        h = Header()
4998        h.append('hello', 'iso-8859-1')
4999        h.append('world')
5000        s = h.encode()
5001        eq(s, '=?iso-8859-1?q?hello?= world')
5002        h = make_header(decode_header(s))
5003        eq(h.encode(), s)
5004
5005    def test_whitespace_keeper(self):
5006        eq = self.assertEqual
5007        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
5008        parts = decode_header(s)
5009        eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
5010        hdr = make_header(parts)
5011        eq(hdr.encode(),
5012           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
5013
5014    def test_broken_base64_header(self):
5015        raises = self.assertRaises
5016        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
5017        raises(errors.HeaderParseError, decode_header, s)
5018
5019    def test_shift_jis_charset(self):
5020        h = Header('文', charset='shift_jis')
5021        self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
5022
5023    def test_flatten_header_with_no_value(self):
5024        # Issue 11401 (regression from email 4.x)  Note that the space after
5025        # the header doesn't reflect the input, but this is also the way
5026        # email 4.x behaved.  At some point it would be nice to fix that.
5027        msg = email.message_from_string("EmptyHeader:")
5028        self.assertEqual(str(msg), "EmptyHeader: \n\n")
5029
5030    def test_encode_preserves_leading_ws_on_value(self):
5031        msg = Message()
5032        msg['SomeHeader'] = '   value with leading ws'
5033        self.assertEqual(str(msg), "SomeHeader:    value with leading ws\n\n")
5034
5035    def test_whitespace_header(self):
5036        self.assertEqual(Header(' ').encode(), ' ')
5037
5038
5039
5040# Test RFC 2231 header parameters (en/de)coding
5041class TestRFC2231(TestEmailBase):
5042
5043    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5044    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5045    def test_get_param(self):
5046        eq = self.assertEqual
5047        msg = self._msgobj('msg_29.txt')
5048        eq(msg.get_param('title'),
5049           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5050        eq(msg.get_param('title', unquote=False),
5051           ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
5052
5053    def test_set_param(self):
5054        eq = self.ndiffAssertEqual
5055        msg = Message()
5056        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5057                      charset='us-ascii')
5058        eq(msg.get_param('title'),
5059           ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
5060        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5061                      charset='us-ascii', language='en')
5062        eq(msg.get_param('title'),
5063           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5064        msg = self._msgobj('msg_01.txt')
5065        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5066                      charset='us-ascii', language='en')
5067        eq(msg.as_string(maxheaderlen=78), """\
5068Return-Path: <bbb@zzz.org>
5069Delivered-To: bbb@zzz.org
5070Received: by mail.zzz.org (Postfix, from userid 889)
5071\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5072MIME-Version: 1.0
5073Content-Transfer-Encoding: 7bit
5074Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5075From: bbb@ddd.com (John X. Doe)
5076To: bbb@zzz.org
5077Subject: This is a test message
5078Date: Fri, 4 May 2001 14:05:44 -0400
5079Content-Type: text/plain; charset=us-ascii;
5080 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5081
5082
5083Hi,
5084
5085Do you like this message?
5086
5087-Me
5088""")
5089
5090    def test_set_param_requote(self):
5091        msg = Message()
5092        msg.set_param('title', 'foo')
5093        self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
5094        msg.set_param('title', 'bar', requote=False)
5095        self.assertEqual(msg['content-type'], 'text/plain; title=bar')
5096        # tspecial is still quoted.
5097        msg.set_param('title', "(bar)bell", requote=False)
5098        self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
5099
5100    def test_del_param(self):
5101        eq = self.ndiffAssertEqual
5102        msg = self._msgobj('msg_01.txt')
5103        msg.set_param('foo', 'bar', charset='us-ascii', language='en')
5104        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5105            charset='us-ascii', language='en')
5106        msg.del_param('foo', header='Content-Type')
5107        eq(msg.as_string(maxheaderlen=78), """\
5108Return-Path: <bbb@zzz.org>
5109Delivered-To: bbb@zzz.org
5110Received: by mail.zzz.org (Postfix, from userid 889)
5111\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5112MIME-Version: 1.0
5113Content-Transfer-Encoding: 7bit
5114Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
5115From: bbb@ddd.com (John X. Doe)
5116To: bbb@zzz.org
5117Subject: This is a test message
5118Date: Fri, 4 May 2001 14:05:44 -0400
5119Content-Type: text/plain; charset="us-ascii";
5120 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5121
5122
5123Hi,
5124
5125Do you like this message?
5126
5127-Me
5128""")
5129
5130    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
5131    # I changed the charset name, though, because the one in the file isn't
5132    # a legal charset name.  Should add a test for an illegal charset.
5133    def test_rfc2231_get_content_charset(self):
5134        eq = self.assertEqual
5135        msg = self._msgobj('msg_32.txt')
5136        eq(msg.get_content_charset(), 'us-ascii')
5137
5138    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
5139    def test_rfc2231_parse_rfc_quoting(self):
5140        m = textwrap.dedent('''\
5141            Content-Disposition: inline;
5142            \tfilename*0*=''This%20is%20even%20more%20;
5143            \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
5144            \tfilename*2="is it not.pdf"
5145
5146            ''')
5147        msg = email.message_from_string(m)
5148        self.assertEqual(msg.get_filename(),
5149                         'This is even more ***fun*** is it not.pdf')
5150        self.assertEqual(m, msg.as_string())
5151
5152    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5153    def test_rfc2231_parse_extra_quoting(self):
5154        m = textwrap.dedent('''\
5155            Content-Disposition: inline;
5156            \tfilename*0*="''This%20is%20even%20more%20";
5157            \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5158            \tfilename*2="is it not.pdf"
5159
5160            ''')
5161        msg = email.message_from_string(m)
5162        self.assertEqual(msg.get_filename(),
5163                         'This is even more ***fun*** is it not.pdf')
5164        self.assertEqual(m, msg.as_string())
5165
5166    # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
5167    # but new test uses *0* because otherwise lang/charset is not valid.
5168    # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
5169    def test_rfc2231_no_language_or_charset(self):
5170        m = '''\
5171Content-Transfer-Encoding: 8bit
5172Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
5173Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
5174
5175'''
5176        msg = email.message_from_string(m)
5177        param = msg.get_param('NAME')
5178        self.assertNotIsInstance(param, tuple)
5179        self.assertEqual(
5180            param,
5181            'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
5182
5183    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
5184    def test_rfc2231_no_language_or_charset_in_filename(self):
5185        m = '''\
5186Content-Disposition: inline;
5187\tfilename*0*="''This%20is%20even%20more%20";
5188\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5189\tfilename*2="is it not.pdf"
5190
5191'''
5192        msg = email.message_from_string(m)
5193        self.assertEqual(msg.get_filename(),
5194                         'This is even more ***fun*** is it not.pdf')
5195
5196    # Duplicate of previous test?
5197    def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
5198        m = '''\
5199Content-Disposition: inline;
5200\tfilename*0*="''This%20is%20even%20more%20";
5201\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5202\tfilename*2="is it not.pdf"
5203
5204'''
5205        msg = email.message_from_string(m)
5206        self.assertEqual(msg.get_filename(),
5207                         'This is even more ***fun*** is it not.pdf')
5208
5209    # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
5210    # but the test below is wrong (the first part should be decoded).
5211    def test_rfc2231_partly_encoded(self):
5212        m = '''\
5213Content-Disposition: inline;
5214\tfilename*0="''This%20is%20even%20more%20";
5215\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5216\tfilename*2="is it not.pdf"
5217
5218'''
5219        msg = email.message_from_string(m)
5220        self.assertEqual(
5221            msg.get_filename(),
5222            'This%20is%20even%20more%20***fun*** is it not.pdf')
5223
5224    def test_rfc2231_partly_nonencoded(self):
5225        m = '''\
5226Content-Disposition: inline;
5227\tfilename*0="This%20is%20even%20more%20";
5228\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
5229\tfilename*2="is it not.pdf"
5230
5231'''
5232        msg = email.message_from_string(m)
5233        self.assertEqual(
5234            msg.get_filename(),
5235            'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
5236
5237    def test_rfc2231_no_language_or_charset_in_boundary(self):
5238        m = '''\
5239Content-Type: multipart/alternative;
5240\tboundary*0*="''This%20is%20even%20more%20";
5241\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
5242\tboundary*2="is it not.pdf"
5243
5244'''
5245        msg = email.message_from_string(m)
5246        self.assertEqual(msg.get_boundary(),
5247                         'This is even more ***fun*** is it not.pdf')
5248
5249    def test_rfc2231_no_language_or_charset_in_charset(self):
5250        # This is a nonsensical charset value, but tests the code anyway
5251        m = '''\
5252Content-Type: text/plain;
5253\tcharset*0*="This%20is%20even%20more%20";
5254\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
5255\tcharset*2="is it not.pdf"
5256
5257'''
5258        msg = email.message_from_string(m)
5259        self.assertEqual(msg.get_content_charset(),
5260                         'this is even more ***fun*** is it not.pdf')
5261
5262    # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
5263    def test_rfc2231_bad_encoding_in_filename(self):
5264        m = '''\
5265Content-Disposition: inline;
5266\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
5267\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5268\tfilename*2="is it not.pdf"
5269
5270'''
5271        msg = email.message_from_string(m)
5272        self.assertEqual(msg.get_filename(),
5273                         'This is even more ***fun*** is it not.pdf')
5274
5275    def test_rfc2231_bad_encoding_in_charset(self):
5276        m = """\
5277Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
5278
5279"""
5280        msg = email.message_from_string(m)
5281        # This should return None because non-ascii characters in the charset
5282        # are not allowed.
5283        self.assertEqual(msg.get_content_charset(), None)
5284
5285    def test_rfc2231_bad_character_in_charset(self):
5286        m = """\
5287Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
5288
5289"""
5290        msg = email.message_from_string(m)
5291        # This should return None because non-ascii characters in the charset
5292        # are not allowed.
5293        self.assertEqual(msg.get_content_charset(), None)
5294
5295    def test_rfc2231_bad_character_in_filename(self):
5296        m = '''\
5297Content-Disposition: inline;
5298\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
5299\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5300\tfilename*2*="is it not.pdf%E2"
5301
5302'''
5303        msg = email.message_from_string(m)
5304        self.assertEqual(msg.get_filename(),
5305                         'This is even more ***fun*** is it not.pdf\ufffd')
5306
5307    def test_rfc2231_unknown_encoding(self):
5308        m = """\
5309Content-Transfer-Encoding: 8bit
5310Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
5311
5312"""
5313        msg = email.message_from_string(m)
5314        self.assertEqual(msg.get_filename(), 'myfile.txt')
5315
5316    def test_rfc2231_single_tick_in_filename_extended(self):
5317        eq = self.assertEqual
5318        m = """\
5319Content-Type: application/x-foo;
5320\tname*0*=\"Frank's\"; name*1*=\" Document\"
5321
5322"""
5323        msg = email.message_from_string(m)
5324        charset, language, s = msg.get_param('name')
5325        eq(charset, None)
5326        eq(language, None)
5327        eq(s, "Frank's Document")
5328
5329    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5330    def test_rfc2231_single_tick_in_filename(self):
5331        m = """\
5332Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5333
5334"""
5335        msg = email.message_from_string(m)
5336        param = msg.get_param('name')
5337        self.assertNotIsInstance(param, tuple)
5338        self.assertEqual(param, "Frank's Document")
5339
5340    def test_rfc2231_missing_tick(self):
5341        m = '''\
5342Content-Disposition: inline;
5343\tfilename*0*="'This%20is%20broken";
5344'''
5345        msg = email.message_from_string(m)
5346        self.assertEqual(
5347            msg.get_filename(),
5348            "'This is broken")
5349
5350    def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
5351        m = '''\
5352Content-Disposition: inline;
5353\tfilename*0*="'This%20is%E2broken";
5354'''
5355        msg = email.message_from_string(m)
5356        self.assertEqual(
5357            msg.get_filename(),
5358            "'This is\ufffdbroken")
5359
5360    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
5361    def test_rfc2231_tick_attack_extended(self):
5362        eq = self.assertEqual
5363        m = """\
5364Content-Type: application/x-foo;
5365\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5366
5367"""
5368        msg = email.message_from_string(m)
5369        charset, language, s = msg.get_param('name')
5370        eq(charset, 'us-ascii')
5371        eq(language, 'en-us')
5372        eq(s, "Frank's Document")
5373
5374    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
5375    def test_rfc2231_tick_attack(self):
5376        m = """\
5377Content-Type: application/x-foo;
5378\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5379
5380"""
5381        msg = email.message_from_string(m)
5382        param = msg.get_param('name')
5383        self.assertNotIsInstance(param, tuple)
5384        self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5385
5386    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
5387    def test_rfc2231_no_extended_values(self):
5388        eq = self.assertEqual
5389        m = """\
5390Content-Type: application/x-foo; name=\"Frank's Document\"
5391
5392"""
5393        msg = email.message_from_string(m)
5394        eq(msg.get_param('name'), "Frank's Document")
5395
5396    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
5397    def test_rfc2231_encoded_then_unencoded_segments(self):
5398        eq = self.assertEqual
5399        m = """\
5400Content-Type: application/x-foo;
5401\tname*0*=\"us-ascii'en-us'My\";
5402\tname*1=\" Document\";
5403\tname*2*=\" For You\"
5404
5405"""
5406        msg = email.message_from_string(m)
5407        charset, language, s = msg.get_param('name')
5408        eq(charset, 'us-ascii')
5409        eq(language, 'en-us')
5410        eq(s, 'My Document For You')
5411
5412    # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5413    # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
5414    def test_rfc2231_unencoded_then_encoded_segments(self):
5415        eq = self.assertEqual
5416        m = """\
5417Content-Type: application/x-foo;
5418\tname*0=\"us-ascii'en-us'My\";
5419\tname*1*=\" Document\";
5420\tname*2*=\" For You\"
5421
5422"""
5423        msg = email.message_from_string(m)
5424        charset, language, s = msg.get_param('name')
5425        eq(charset, 'us-ascii')
5426        eq(language, 'en-us')
5427        eq(s, 'My Document For You')
5428
5429    def test_should_not_hang_on_invalid_ew_messages(self):
5430        messages = ["""From: user@host.com
5431To: user@host.com
5432Bad-Header:
5433 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
5434 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
5435 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
5436
5437Hello!
5438""", """From: ����� �������� <xxx@xxx>
5439To: "xxx" <xxx@xxx>
5440Subject:   ��� ���������� ����� ����� � ��������� �� ����
5441MIME-Version: 1.0
5442Content-Type: text/plain; charset="windows-1251";
5443Content-Transfer-Encoding: 8bit
5444
5445�� ����� � ���� ������ ��� ��������
5446"""]
5447        for m in messages:
5448            with self.subTest(m=m):
5449                msg = email.message_from_string(m)
5450
5451
5452# Tests to ensure that signed parts of an email are completely preserved, as
5453# required by RFC1847 section 2.1.  Note that these are incomplete, because the
5454# email package does not currently always preserve the body.  See issue 1670765.
5455class TestSigned(TestEmailBase):
5456
5457    def _msg_and_obj(self, filename):
5458        with openfile(filename) as fp:
5459            original = fp.read()
5460            msg = email.message_from_string(original)
5461        return original, msg
5462
5463    def _signed_parts_eq(self, original, result):
5464        # Extract the first mime part of each message
5465        import re
5466        repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5467        inpart = repart.search(original).group(2)
5468        outpart = repart.search(result).group(2)
5469        self.assertEqual(outpart, inpart)
5470
5471    def test_long_headers_as_string(self):
5472        original, msg = self._msg_and_obj('msg_45.txt')
5473        result = msg.as_string()
5474        self._signed_parts_eq(original, result)
5475
5476    def test_long_headers_as_string_maxheaderlen(self):
5477        original, msg = self._msg_and_obj('msg_45.txt')
5478        result = msg.as_string(maxheaderlen=60)
5479        self._signed_parts_eq(original, result)
5480
5481    def test_long_headers_flatten(self):
5482        original, msg = self._msg_and_obj('msg_45.txt')
5483        fp = StringIO()
5484        Generator(fp).flatten(msg)
5485        result = fp.getvalue()
5486        self._signed_parts_eq(original, result)
5487
5488
5489
5490if __name__ == '__main__':
5491    unittest.main()
5492