1import datetime
2import textwrap
3import unittest
4from email import errors
5from email import policy
6from email.message import Message
7from test.test_email import TestEmailBase, parameterize
8from email import headerregistry
9from email.headerregistry import Address, Group
10
11
12DITTO = object()
13
14
15class TestHeaderRegistry(TestEmailBase):
16
17    def test_arbitrary_name_unstructured(self):
18        factory = headerregistry.HeaderRegistry()
19        h = factory('foobar', 'test')
20        self.assertIsInstance(h, headerregistry.BaseHeader)
21        self.assertIsInstance(h, headerregistry.UnstructuredHeader)
22
23    def test_name_case_ignored(self):
24        factory = headerregistry.HeaderRegistry()
25        # Whitebox check that test is valid
26        self.assertNotIn('Subject', factory.registry)
27        h = factory('Subject', 'test')
28        self.assertIsInstance(h, headerregistry.BaseHeader)
29        self.assertIsInstance(h, headerregistry.UniqueUnstructuredHeader)
30
31    class FooBase:
32        def __init__(self, *args, **kw):
33            pass
34
35    def test_override_default_base_class(self):
36        factory = headerregistry.HeaderRegistry(base_class=self.FooBase)
37        h = factory('foobar', 'test')
38        self.assertIsInstance(h, self.FooBase)
39        self.assertIsInstance(h, headerregistry.UnstructuredHeader)
40
41    class FooDefault:
42        parse = headerregistry.UnstructuredHeader.parse
43
44    def test_override_default_class(self):
45        factory = headerregistry.HeaderRegistry(default_class=self.FooDefault)
46        h = factory('foobar', 'test')
47        self.assertIsInstance(h, headerregistry.BaseHeader)
48        self.assertIsInstance(h, self.FooDefault)
49
50    def test_override_default_class_only_overrides_default(self):
51        factory = headerregistry.HeaderRegistry(default_class=self.FooDefault)
52        h = factory('subject', 'test')
53        self.assertIsInstance(h, headerregistry.BaseHeader)
54        self.assertIsInstance(h, headerregistry.UniqueUnstructuredHeader)
55
56    def test_dont_use_default_map(self):
57        factory = headerregistry.HeaderRegistry(use_default_map=False)
58        h = factory('subject', 'test')
59        self.assertIsInstance(h, headerregistry.BaseHeader)
60        self.assertIsInstance(h, headerregistry.UnstructuredHeader)
61
62    def test_map_to_type(self):
63        factory = headerregistry.HeaderRegistry()
64        h1 = factory('foobar', 'test')
65        factory.map_to_type('foobar', headerregistry.UniqueUnstructuredHeader)
66        h2 = factory('foobar', 'test')
67        self.assertIsInstance(h1, headerregistry.BaseHeader)
68        self.assertIsInstance(h1, headerregistry.UnstructuredHeader)
69        self.assertIsInstance(h2, headerregistry.BaseHeader)
70        self.assertIsInstance(h2, headerregistry.UniqueUnstructuredHeader)
71
72
73class TestHeaderBase(TestEmailBase):
74
75    factory = headerregistry.HeaderRegistry()
76
77    def make_header(self, name, value):
78        return self.factory(name, value)
79
80
81class TestBaseHeaderFeatures(TestHeaderBase):
82
83    def test_str(self):
84        h = self.make_header('subject', 'this is a test')
85        self.assertIsInstance(h, str)
86        self.assertEqual(h, 'this is a test')
87        self.assertEqual(str(h), 'this is a test')
88
89    def test_substr(self):
90        h = self.make_header('subject', 'this is a test')
91        self.assertEqual(h[5:7], 'is')
92
93    def test_has_name(self):
94        h = self.make_header('subject', 'this is a test')
95        self.assertEqual(h.name, 'subject')
96
97    def _test_attr_ro(self, attr):
98        h = self.make_header('subject', 'this is a test')
99        with self.assertRaises(AttributeError):
100            setattr(h, attr, 'foo')
101
102    def test_name_read_only(self):
103        self._test_attr_ro('name')
104
105    def test_defects_read_only(self):
106        self._test_attr_ro('defects')
107
108    def test_defects_is_tuple(self):
109        h = self.make_header('subject', 'this is a test')
110        self.assertEqual(len(h.defects), 0)
111        self.assertIsInstance(h.defects, tuple)
112        # Make sure it is still true when there are defects.
113        h = self.make_header('date', '')
114        self.assertEqual(len(h.defects), 1)
115        self.assertIsInstance(h.defects, tuple)
116
117    # XXX: FIXME
118    #def test_CR_in_value(self):
119    #    # XXX: this also re-raises the issue of embedded headers,
120    #    # need test and solution for that.
121    #    value = '\r'.join(['this is', ' a test'])
122    #    h = self.make_header('subject', value)
123    #    self.assertEqual(h, value)
124    #    self.assertDefectsEqual(h.defects, [errors.ObsoleteHeaderDefect])
125
126
127@parameterize
128class TestUnstructuredHeader(TestHeaderBase):
129
130    def string_as_value(self,
131                        source,
132                        decoded,
133                        *args):
134        l = len(args)
135        defects = args[0] if l>0 else []
136        header = 'Subject:' + (' ' if source else '')
137        folded = header + (args[1] if l>1 else source) + '\n'
138        h = self.make_header('Subject', source)
139        self.assertEqual(h, decoded)
140        self.assertDefectsEqual(h.defects, defects)
141        self.assertEqual(h.fold(policy=policy.default), folded)
142
143    string_params = {
144
145        'rfc2047_simple_quopri': (
146            '=?utf-8?q?this_is_a_test?=',
147            'this is a test',
148            [],
149            'this is a test'),
150
151        'rfc2047_gb2312_base64': (
152            '=?gb2312?b?1eLKx9bQzsSy4srUo6E=?=',
153            '\u8fd9\u662f\u4e2d\u6587\u6d4b\u8bd5\uff01',
154            [],
155            '=?utf-8?b?6L+Z5piv5Lit5paH5rWL6K+V77yB?='),
156
157        'rfc2047_simple_nonascii_quopri': (
158            '=?utf-8?q?=C3=89ric?=',
159            'Éric'),
160
161        'rfc2047_quopri_with_regular_text': (
162            'The =?utf-8?q?=C3=89ric=2C?= Himself',
163            'The Éric, Himself'),
164
165    }
166
167
168@parameterize
169class TestDateHeader(TestHeaderBase):
170
171    datestring = 'Sun, 23 Sep 2001 20:10:55 -0700'
172    utcoffset = datetime.timedelta(hours=-7)
173    tz = datetime.timezone(utcoffset)
174    dt = datetime.datetime(2001, 9, 23, 20, 10, 55, tzinfo=tz)
175
176    def test_parse_date(self):
177        h = self.make_header('date', self.datestring)
178        self.assertEqual(h, self.datestring)
179        self.assertEqual(h.datetime, self.dt)
180        self.assertEqual(h.datetime.utcoffset(), self.utcoffset)
181        self.assertEqual(h.defects, ())
182
183    def test_set_from_datetime(self):
184        h = self.make_header('date', self.dt)
185        self.assertEqual(h, self.datestring)
186        self.assertEqual(h.datetime, self.dt)
187        self.assertEqual(h.defects, ())
188
189    def test_date_header_properties(self):
190        h = self.make_header('date', self.datestring)
191        self.assertIsInstance(h, headerregistry.UniqueDateHeader)
192        self.assertEqual(h.max_count, 1)
193        self.assertEqual(h.defects, ())
194
195    def test_resent_date_header_properties(self):
196        h = self.make_header('resent-date', self.datestring)
197        self.assertIsInstance(h, headerregistry.DateHeader)
198        self.assertEqual(h.max_count, None)
199        self.assertEqual(h.defects, ())
200
201    def test_no_value_is_defect(self):
202        h = self.make_header('date', '')
203        self.assertEqual(len(h.defects), 1)
204        self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue)
205
206    def test_datetime_read_only(self):
207        h = self.make_header('date', self.datestring)
208        with self.assertRaises(AttributeError):
209            h.datetime = 'foo'
210
211    def test_set_date_header_from_datetime(self):
212        m = Message(policy=policy.default)
213        m['Date'] = self.dt
214        self.assertEqual(m['Date'], self.datestring)
215        self.assertEqual(m['Date'].datetime, self.dt)
216
217
218@parameterize
219class TestContentTypeHeader(TestHeaderBase):
220
221    def content_type_as_value(self,
222                              source,
223                              content_type,
224                              maintype,
225                              subtype,
226                              *args):
227        l = len(args)
228        parmdict = args[0] if l>0 else {}
229        defects =  args[1] if l>1 else []
230        decoded =  args[2] if l>2 and args[2] is not DITTO else source
231        header = 'Content-Type:' + ' ' if source else ''
232        folded = args[3] if l>3 else header + decoded + '\n'
233        h = self.make_header('Content-Type', source)
234        self.assertEqual(h.content_type, content_type)
235        self.assertEqual(h.maintype, maintype)
236        self.assertEqual(h.subtype, subtype)
237        self.assertEqual(h.params, parmdict)
238        with self.assertRaises(TypeError):
239            h.params['abc'] = 'xyz'   # make sure params is read-only.
240        self.assertDefectsEqual(h.defects, defects)
241        self.assertEqual(h, decoded)
242        self.assertEqual(h.fold(policy=policy.default), folded)
243
244    content_type_params = {
245
246        # Examples from RFC 2045.
247
248        'RFC_2045_1': (
249            'text/plain; charset=us-ascii (Plain text)',
250            'text/plain',
251            'text',
252            'plain',
253            {'charset': 'us-ascii'},
254            [],
255            'text/plain; charset="us-ascii"'),
256
257        'RFC_2045_2': (
258            'text/plain; charset=us-ascii',
259            'text/plain',
260            'text',
261            'plain',
262            {'charset': 'us-ascii'},
263            [],
264            'text/plain; charset="us-ascii"'),
265
266        'RFC_2045_3': (
267            'text/plain; charset="us-ascii"',
268            'text/plain',
269            'text',
270            'plain',
271            {'charset': 'us-ascii'}),
272
273        # RFC 2045 5.2 says syntactically invalid values are to be treated as
274        # text/plain.
275
276        'no_subtype_in_content_type': (
277            'text/',
278            'text/plain',
279            'text',
280            'plain',
281            {},
282            [errors.InvalidHeaderDefect]),
283
284        'no_slash_in_content_type': (
285            'foo',
286            'text/plain',
287            'text',
288            'plain',
289            {},
290            [errors.InvalidHeaderDefect]),
291
292        'junk_text_in_content_type': (
293            '<crazy "stuff">',
294            'text/plain',
295            'text',
296            'plain',
297            {},
298            [errors.InvalidHeaderDefect]),
299
300        'too_many_slashes_in_content_type': (
301            'image/jpeg/foo',
302            'text/plain',
303            'text',
304            'plain',
305            {},
306            [errors.InvalidHeaderDefect]),
307
308        # But unknown names are OK.  We could make non-IANA names a defect, but
309        # by not doing so we make ourselves future proof.  The fact that they
310        # are unknown will be detectable by the fact that they don't appear in
311        # the mime_registry...and the application is free to extend that list
312        # to handle them even if the core library doesn't.
313
314        'unknown_content_type': (
315            'bad/names',
316            'bad/names',
317            'bad',
318            'names'),
319
320        # The content type is case insensitive, and CFWS is ignored.
321
322        'mixed_case_content_type': (
323            'ImAge/JPeg',
324            'image/jpeg',
325            'image',
326            'jpeg'),
327
328        'spaces_in_content_type': (
329            '  text  /  plain  ',
330            'text/plain',
331            'text',
332            'plain'),
333
334        'cfws_in_content_type': (
335            '(foo) text (bar)/(baz)plain(stuff)',
336            'text/plain',
337            'text',
338            'plain'),
339
340        # test some parameters (more tests could be added for parameters
341        # associated with other content types, but since parameter parsing is
342        # generic they would be redundant for the current implementation).
343
344        'charset_param': (
345            'text/plain; charset="utf-8"',
346            'text/plain',
347            'text',
348            'plain',
349            {'charset': 'utf-8'}),
350
351        'capitalized_charset': (
352            'text/plain; charset="US-ASCII"',
353            'text/plain',
354            'text',
355            'plain',
356            {'charset': 'US-ASCII'}),
357
358        'unknown_charset': (
359            'text/plain; charset="fOo"',
360            'text/plain',
361            'text',
362            'plain',
363            {'charset': 'fOo'}),
364
365        'capitalized_charset_param_name_and_comment': (
366            'text/plain; (interjection) Charset="utf-8"',
367            'text/plain',
368            'text',
369            'plain',
370            {'charset': 'utf-8'},
371            [],
372            # Should the parameter name be lowercased here?
373            'text/plain; Charset="utf-8"'),
374
375        # Since this is pretty much the ur-mimeheader, we'll put all the tests
376        # that exercise the parameter parsing and formatting here.  Note that
377        # when we refold we may canonicalize, so things like whitespace,
378        # quoting, and rfc2231 encoding may change from what was in the input
379        # header.
380
381        'unquoted_param_value': (
382            'text/plain; title=foo',
383            'text/plain',
384            'text',
385            'plain',
386            {'title': 'foo'},
387            [],
388            'text/plain; title="foo"',
389            ),
390
391        'param_value_with_tspecials': (
392            'text/plain; title="(bar)foo blue"',
393            'text/plain',
394            'text',
395            'plain',
396            {'title': '(bar)foo blue'}),
397
398        'param_with_extra_quoted_whitespace': (
399            'text/plain; title="  a     loong  way \t home   "',
400            'text/plain',
401            'text',
402            'plain',
403            {'title': '  a     loong  way \t home   '}),
404
405        'bad_params': (
406            'blarg; baz; boo',
407            'text/plain',
408            'text',
409            'plain',
410            {'baz': '', 'boo': ''},
411            [errors.InvalidHeaderDefect]*3),
412
413        'spaces_around_param_equals': (
414            'Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"',
415            'multipart/mixed',
416            'multipart',
417            'mixed',
418            {'boundary': 'CPIMSSMTPC06p5f3tG'},
419            [],
420            'Multipart/mixed; boundary="CPIMSSMTPC06p5f3tG"',
421            ),
422
423        'spaces_around_semis': (
424            ('image/jpeg; name="wibble.JPG" ; x-mac-type="4A504547" ; '
425                'x-mac-creator="474B4F4E"'),
426            'image/jpeg',
427            'image',
428            'jpeg',
429            {'name': 'wibble.JPG',
430             'x-mac-type': '4A504547',
431             'x-mac-creator': '474B4F4E'},
432            [],
433            ('image/jpeg; name="wibble.JPG"; x-mac-type="4A504547"; '
434                'x-mac-creator="474B4F4E"'),
435            ('Content-Type: image/jpeg; name="wibble.JPG";'
436                ' x-mac-type="4A504547";\n'
437             ' x-mac-creator="474B4F4E"\n'),
438            ),
439
440        'lots_of_mime_params': (
441            ('image/jpeg; name="wibble.JPG"; x-mac-type="4A504547"; '
442                'x-mac-creator="474B4F4E"; x-extrastuff="make it longer"'),
443            'image/jpeg',
444            'image',
445            'jpeg',
446            {'name': 'wibble.JPG',
447             'x-mac-type': '4A504547',
448             'x-mac-creator': '474B4F4E',
449             'x-extrastuff': 'make it longer'},
450            [],
451            ('image/jpeg; name="wibble.JPG"; x-mac-type="4A504547"; '
452                'x-mac-creator="474B4F4E"; x-extrastuff="make it longer"'),
453            # In this case the whole of the MimeParameters does *not* fit
454            # one one line, so we break at a lower syntactic level.
455            ('Content-Type: image/jpeg; name="wibble.JPG";'
456                ' x-mac-type="4A504547";\n'
457             ' x-mac-creator="474B4F4E"; x-extrastuff="make it longer"\n'),
458            ),
459
460        'semis_inside_quotes': (
461            'image/jpeg; name="Jim&amp;&amp;Jill"',
462            'image/jpeg',
463            'image',
464            'jpeg',
465            {'name': 'Jim&amp;&amp;Jill'}),
466
467        'single_quotes_inside_quotes': (
468            'image/jpeg; name="Jim \'Bob\' Jill"',
469            'image/jpeg',
470            'image',
471            'jpeg',
472            {'name': "Jim 'Bob' Jill"}),
473
474        'double_quotes_inside_quotes': (
475            r'image/jpeg; name="Jim \"Bob\" Jill"',
476            'image/jpeg',
477            'image',
478            'jpeg',
479            {'name': 'Jim "Bob" Jill'},
480            [],
481            r'image/jpeg; name="Jim \"Bob\" Jill"'),
482
483        'non_ascii_in_params': (
484            ('foo\xa7/bar; b\xa7r=two; '
485                'baz=thr\xa7e'.encode('latin-1').decode('us-ascii',
486                                                        'surrogateescape')),
487            'foo\uFFFD/bar',
488            'foo\uFFFD',
489            'bar',
490            {'b\uFFFDr': 'two', 'baz': 'thr\uFFFDe'},
491            [errors.UndecodableBytesDefect]*3,
492            'foo�/bar; b�r="two"; baz="thr�e"',
493            # XXX Two bugs here: the mime type is not allowed to be an encoded
494            # word, and we shouldn't be emitting surrogates in the parameter
495            # names.  But I don't know what the behavior should be here, so I'm
496            # punting for now.  In practice this is unlikely to be encountered
497            # since headers with binary in them only come from a binary source
498            # and are almost certain to be re-emitted without refolding.
499            'Content-Type: =?unknown-8bit?q?foo=A7?=/bar; b\udca7r="two";\n'
500            " baz*=unknown-8bit''thr%A7e\n",
501            ),
502
503        # RFC 2231 parameter tests.
504
505        'rfc2231_segmented_normal_values': (
506            'image/jpeg; name*0="abc"; name*1=".html"',
507            'image/jpeg',
508            'image',
509            'jpeg',
510            {'name': "abc.html"},
511            [],
512            'image/jpeg; name="abc.html"'),
513
514        'quotes_inside_rfc2231_value': (
515            r'image/jpeg; bar*0="baz\"foobar"; bar*1="\"baz"',
516            'image/jpeg',
517            'image',
518            'jpeg',
519            {'bar': 'baz"foobar"baz'},
520            [],
521            r'image/jpeg; bar="baz\"foobar\"baz"'),
522
523        'non_ascii_rfc2231_value': (
524            ('text/plain; charset=us-ascii; '
525             "title*=us-ascii'en'This%20is%20"
526             'not%20f\xa7n').encode('latin-1').decode('us-ascii',
527                                                     'surrogateescape'),
528            'text/plain',
529            'text',
530            'plain',
531            {'charset': 'us-ascii', 'title': 'This is not f\uFFFDn'},
532             [errors.UndecodableBytesDefect],
533             'text/plain; charset="us-ascii"; title="This is not f�n"',
534            'Content-Type: text/plain; charset="us-ascii";\n'
535            " title*=unknown-8bit''This%20is%20not%20f%A7n\n",
536            ),
537
538        'rfc2231_encoded_charset': (
539            'text/plain; charset*=ansi-x3.4-1968\'\'us-ascii',
540            'text/plain',
541            'text',
542            'plain',
543            {'charset': 'us-ascii'},
544            [],
545            'text/plain; charset="us-ascii"'),
546
547        # This follows the RFC: no double quotes around encoded values.
548        'rfc2231_encoded_no_double_quotes': (
549            ("text/plain;"
550                "\tname*0*=''This%20is%20;"
551                "\tname*1*=%2A%2A%2Afun%2A%2A%2A%20;"
552                '\tname*2="is it not.pdf"'),
553            'text/plain',
554            'text',
555            'plain',
556            {'name': 'This is ***fun*** is it not.pdf'},
557            [],
558            'text/plain; name="This is ***fun*** is it not.pdf"',
559            ),
560
561        # Make sure we also handle it if there are spurious double quotes.
562        'rfc2231_encoded_with_double_quotes': (
563            ("text/plain;"
564                '\tname*0*="us-ascii\'\'This%20is%20even%20more%20";'
565                '\tname*1*="%2A%2A%2Afun%2A%2A%2A%20";'
566                '\tname*2="is it not.pdf"'),
567            'text/plain',
568            'text',
569            'plain',
570            {'name': 'This is even more ***fun*** is it not.pdf'},
571            [errors.InvalidHeaderDefect]*2,
572            'text/plain; name="This is even more ***fun*** is it not.pdf"',
573            ),
574
575        'rfc2231_single_quote_inside_double_quotes': (
576            ('text/plain; charset=us-ascii;'
577               '\ttitle*0*="us-ascii\'en\'This%20is%20really%20";'
578               '\ttitle*1*="%2A%2A%2Afun%2A%2A%2A%20";'
579               '\ttitle*2="isn\'t it!"'),
580            'text/plain',
581            'text',
582            'plain',
583            {'charset': 'us-ascii', 'title': "This is really ***fun*** isn't it!"},
584            [errors.InvalidHeaderDefect]*2,
585            ('text/plain; charset="us-ascii"; '
586               'title="This is really ***fun*** isn\'t it!"'),
587            ('Content-Type: text/plain; charset="us-ascii";\n'
588                ' title="This is really ***fun*** isn\'t it!"\n'),
589            ),
590
591        'rfc2231_single_quote_in_value_with_charset_and_lang': (
592            ('application/x-foo;'
593                "\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\""),
594            'application/x-foo',
595            'application',
596            'x-foo',
597            {'name': "Frank's Document"},
598            [errors.InvalidHeaderDefect]*2,
599            'application/x-foo; name="Frank\'s Document"',
600            ),
601
602        'rfc2231_single_quote_in_non_encoded_value': (
603            ('application/x-foo;'
604                "\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\""),
605            'application/x-foo',
606            'application',
607            'x-foo',
608            {'name': "us-ascii'en-us'Frank's Document"},
609            [],
610            'application/x-foo; name="us-ascii\'en-us\'Frank\'s Document"',
611             ),
612
613        'rfc2231_no_language_or_charset': (
614            'text/plain; NAME*0*=english_is_the_default.html',
615            'text/plain',
616            'text',
617            'plain',
618            {'name': 'english_is_the_default.html'},
619            [errors.InvalidHeaderDefect],
620            'text/plain; NAME="english_is_the_default.html"'),
621
622        'rfc2231_encoded_no_charset': (
623            ("text/plain;"
624                '\tname*0*="\'\'This%20is%20even%20more%20";'
625                '\tname*1*="%2A%2A%2Afun%2A%2A%2A%20";'
626                '\tname*2="is it.pdf"'),
627            'text/plain',
628            'text',
629            'plain',
630            {'name': 'This is even more ***fun*** is it.pdf'},
631            [errors.InvalidHeaderDefect]*2,
632            'text/plain; name="This is even more ***fun*** is it.pdf"',
633            ),
634
635        'rfc2231_partly_encoded': (
636            ("text/plain;"
637                '\tname*0*="\'\'This%20is%20even%20more%20";'
638                '\tname*1*="%2A%2A%2Afun%2A%2A%2A%20";'
639                '\tname*2="is it.pdf"'),
640            'text/plain',
641            'text',
642            'plain',
643            {'name': 'This is even more ***fun*** is it.pdf'},
644            [errors.InvalidHeaderDefect]*2,
645            'text/plain; name="This is even more ***fun*** is it.pdf"',
646            ),
647
648        'rfc2231_partly_encoded_2': (
649            ("text/plain;"
650                '\tname*0*="\'\'This%20is%20even%20more%20";'
651                '\tname*1="%2A%2A%2Afun%2A%2A%2A%20";'
652                '\tname*2="is it.pdf"'),
653            'text/plain',
654            'text',
655            'plain',
656            {'name': 'This is even more %2A%2A%2Afun%2A%2A%2A%20is it.pdf'},
657            [errors.InvalidHeaderDefect],
658            ('text/plain;'
659             ' name="This is even more %2A%2A%2Afun%2A%2A%2A%20is it.pdf"'),
660            ('Content-Type: text/plain;\n'
661             ' name="This is even more %2A%2A%2Afun%2A%2A%2A%20is'
662                ' it.pdf"\n'),
663            ),
664
665        'rfc2231_unknown_charset_treated_as_ascii': (
666            "text/plain; name*0*=bogus'xx'ascii_is_the_default",
667            'text/plain',
668            'text',
669            'plain',
670            {'name': 'ascii_is_the_default'},
671            [],
672            'text/plain; name="ascii_is_the_default"'),
673
674        'rfc2231_bad_character_in_charset_parameter_value': (
675            "text/plain; charset*=ascii''utf-8%F1%F2%F3",
676            'text/plain',
677            'text',
678            'plain',
679            {'charset': 'utf-8\uFFFD\uFFFD\uFFFD'},
680            [errors.UndecodableBytesDefect],
681            'text/plain; charset="utf-8\uFFFD\uFFFD\uFFFD"',
682            "Content-Type: text/plain;"
683            " charset*=unknown-8bit''utf-8%F1%F2%F3\n",
684            ),
685
686        'rfc2231_utf8_in_supposedly_ascii_charset_parameter_value': (
687            "text/plain; charset*=ascii''utf-8%E2%80%9D",
688            'text/plain',
689            'text',
690            'plain',
691            {'charset': 'utf-8”'},
692            [errors.UndecodableBytesDefect],
693            'text/plain; charset="utf-8”"',
694            # XXX Should folding change the charset to utf8?  Currently it just
695            # reproduces the original, which is arguably fine.
696            "Content-Type: text/plain;"
697            " charset*=unknown-8bit''utf-8%E2%80%9D\n",
698            ),
699
700        'rfc2231_encoded_then_unencoded_segments': (
701            ('application/x-foo;'
702                '\tname*0*="us-ascii\'en-us\'My";'
703                '\tname*1=" Document";'
704                '\tname*2=" For You"'),
705            'application/x-foo',
706            'application',
707            'x-foo',
708            {'name': 'My Document For You'},
709            [errors.InvalidHeaderDefect],
710            'application/x-foo; name="My Document For You"',
711            ),
712
713        # My reading of the RFC is that this is an invalid header.  The RFC
714        # says that if charset and language information is given, the first
715        # segment *must* be encoded.
716        'rfc2231_unencoded_then_encoded_segments': (
717            ('application/x-foo;'
718                '\tname*0=us-ascii\'en-us\'My;'
719                '\tname*1*=" Document";'
720                '\tname*2*=" For You"'),
721            'application/x-foo',
722            'application',
723            'x-foo',
724            {'name': 'My Document For You'},
725            [errors.InvalidHeaderDefect]*3,
726            'application/x-foo; name="My Document For You"',
727            ),
728
729        # XXX: I would say this one should default to ascii/en for the
730        # "encoded" segment, since the first segment is not encoded and is
731        # in double quotes, making the value a valid non-encoded string.  The
732        # old parser decodes this just like the previous case, which may be the
733        # better Postel rule, but could equally result in borking headers that
734        # intentionally have quoted quotes in them.  We could get this 98%
735        # right if we treat it as a quoted string *unless* it matches the
736        # charset'lang'value pattern exactly *and* there is at least one
737        # encoded segment.  Implementing that algorithm will require some
738        # refactoring, so I haven't done it (yet).
739        'rfc2231_quoted_unencoded_then_encoded_segments': (
740            ('application/x-foo;'
741                '\tname*0="us-ascii\'en-us\'My";'
742                '\tname*1*=" Document";'
743                '\tname*2*=" For You"'),
744            'application/x-foo',
745            'application',
746            'x-foo',
747            {'name': "us-ascii'en-us'My Document For You"},
748            [errors.InvalidHeaderDefect]*2,
749            'application/x-foo; name="us-ascii\'en-us\'My Document For You"',
750            ),
751
752        # Make sure our folding algorithm produces multiple sections correctly.
753        # We could mix encoded and non-encoded segments, but we don't, we just
754        # make them all encoded.  It might be worth fixing that, since the
755        # sections can get used for wrapping ascii text.
756        'rfc2231_folded_segments_correctly_formatted': (
757            ('application/x-foo;'
758                '\tname="' + "with spaces"*8 + '"'),
759            'application/x-foo',
760            'application',
761            'x-foo',
762            {'name': "with spaces"*8},
763            [],
764            'application/x-foo; name="' + "with spaces"*8 + '"',
765            "Content-Type: application/x-foo;\n"
766            " name*0*=us-ascii''with%20spaceswith%20spaceswith%20spaceswith"
767                "%20spaceswith;\n"
768            " name*1*=%20spaceswith%20spaceswith%20spaceswith%20spaces\n"
769            ),
770
771    }
772
773
774@parameterize
775class TestContentTransferEncoding(TestHeaderBase):
776
777    def cte_as_value(self,
778                     source,
779                     cte,
780                     *args):
781        l = len(args)
782        defects =  args[0] if l>0 else []
783        decoded =  args[1] if l>1 and args[1] is not DITTO else source
784        header = 'Content-Transfer-Encoding:' + ' ' if source else ''
785        folded = args[2] if l>2 else header + source + '\n'
786        h = self.make_header('Content-Transfer-Encoding', source)
787        self.assertEqual(h.cte, cte)
788        self.assertDefectsEqual(h.defects, defects)
789        self.assertEqual(h, decoded)
790        self.assertEqual(h.fold(policy=policy.default), folded)
791
792    cte_params = {
793
794        'RFC_2183_1': (
795            'base64',
796            'base64',),
797
798        'no_value': (
799            '',
800            '7bit',
801            [errors.HeaderMissingRequiredValue],
802            '',
803            'Content-Transfer-Encoding:\n',
804            ),
805
806        'junk_after_cte': (
807            '7bit and a bunch more',
808            '7bit',
809            [errors.InvalidHeaderDefect]),
810
811    }
812
813
814@parameterize
815class TestContentDisposition(TestHeaderBase):
816
817    def content_disp_as_value(self,
818                              source,
819                              content_disposition,
820                              *args):
821        l = len(args)
822        parmdict = args[0] if l>0 else {}
823        defects =  args[1] if l>1 else []
824        decoded =  args[2] if l>2 and args[2] is not DITTO else source
825        header = 'Content-Disposition:' + ' ' if source else ''
826        folded = args[3] if l>3 else header + source + '\n'
827        h = self.make_header('Content-Disposition', source)
828        self.assertEqual(h.content_disposition, content_disposition)
829        self.assertEqual(h.params, parmdict)
830        self.assertDefectsEqual(h.defects, defects)
831        self.assertEqual(h, decoded)
832        self.assertEqual(h.fold(policy=policy.default), folded)
833
834    content_disp_params = {
835
836        # Examples from RFC 2183.
837
838        'RFC_2183_1': (
839            'inline',
840            'inline',),
841
842        'RFC_2183_2': (
843            ('attachment; filename=genome.jpeg;'
844             '  modification-date="Wed, 12 Feb 1997 16:29:51 -0500";'),
845            'attachment',
846            {'filename': 'genome.jpeg',
847             'modification-date': 'Wed, 12 Feb 1997 16:29:51 -0500'},
848            [],
849            ('attachment; filename="genome.jpeg"; '
850                 'modification-date="Wed, 12 Feb 1997 16:29:51 -0500"'),
851            ('Content-Disposition: attachment; filename="genome.jpeg";\n'
852             ' modification-date="Wed, 12 Feb 1997 16:29:51 -0500"\n'),
853            ),
854
855        'no_value': (
856            '',
857            None,
858            {},
859            [errors.HeaderMissingRequiredValue],
860            '',
861            'Content-Disposition:\n'),
862
863        'invalid_value': (
864            'ab./k',
865            'ab.',
866            {},
867            [errors.InvalidHeaderDefect]),
868
869        'invalid_value_with_params': (
870            'ab./k; filename="foo"',
871            'ab.',
872            {'filename': 'foo'},
873            [errors.InvalidHeaderDefect]),
874
875        'invalid_parameter_value_with_fws_between_ew': (
876            'attachment; filename="=?UTF-8?Q?Schulbesuchsbest=C3=A4ttigung=2E?='
877            '               =?UTF-8?Q?pdf?="',
878            'attachment',
879            {'filename': 'Schulbesuchsbestättigung.pdf'},
880            [errors.InvalidHeaderDefect]*3,
881            ('attachment; filename="Schulbesuchsbestättigung.pdf"'),
882            ('Content-Disposition: attachment;\n'
883             ' filename*=utf-8\'\'Schulbesuchsbest%C3%A4ttigung.pdf\n'),
884            ),
885
886        'parameter_value_with_fws_between_tokens': (
887            'attachment; filename="File =?utf-8?q?Name?= With Spaces.pdf"',
888            'attachment',
889            {'filename': 'File Name With Spaces.pdf'},
890            [errors.InvalidHeaderDefect],
891            'attachment; filename="File Name With Spaces.pdf"',
892            ('Content-Disposition: attachment; filename="File Name With Spaces.pdf"\n'),
893            )
894    }
895
896
897@parameterize
898class TestMIMEVersionHeader(TestHeaderBase):
899
900    def version_string_as_MIME_Version(self,
901                                       source,
902                                       decoded,
903                                       version,
904                                       major,
905                                       minor,
906                                       defects):
907        h = self.make_header('MIME-Version', source)
908        self.assertEqual(h, decoded)
909        self.assertEqual(h.version, version)
910        self.assertEqual(h.major, major)
911        self.assertEqual(h.minor, minor)
912        self.assertDefectsEqual(h.defects, defects)
913        if source:
914            source = ' ' + source
915        self.assertEqual(h.fold(policy=policy.default),
916                         'MIME-Version:' + source + '\n')
917
918    version_string_params = {
919
920        # Examples from the RFC.
921
922        'RFC_2045_1': (
923            '1.0',
924            '1.0',
925            '1.0',
926            1,
927            0,
928            []),
929
930        'RFC_2045_2': (
931            '1.0 (produced by MetaSend Vx.x)',
932            '1.0 (produced by MetaSend Vx.x)',
933            '1.0',
934            1,
935            0,
936            []),
937
938        'RFC_2045_3': (
939            '(produced by MetaSend Vx.x) 1.0',
940            '(produced by MetaSend Vx.x) 1.0',
941            '1.0',
942            1,
943            0,
944            []),
945
946        'RFC_2045_4': (
947            '1.(produced by MetaSend Vx.x)0',
948            '1.(produced by MetaSend Vx.x)0',
949            '1.0',
950            1,
951            0,
952            []),
953
954        # Other valid values.
955
956        '1_1': (
957            '1.1',
958            '1.1',
959            '1.1',
960            1,
961            1,
962            []),
963
964        '2_1': (
965            '2.1',
966            '2.1',
967            '2.1',
968            2,
969            1,
970            []),
971
972        'whitespace': (
973            '1 .0',
974            '1 .0',
975            '1.0',
976            1,
977            0,
978            []),
979
980        'leading_trailing_whitespace_ignored': (
981            '  1.0  ',
982            '  1.0  ',
983            '1.0',
984            1,
985            0,
986            []),
987
988        # Recoverable invalid values.  We can recover here only because we
989        # already have a valid value by the time we encounter the garbage.
990        # Anywhere else, and we don't know where the garbage ends.
991
992        'non_comment_garbage_after': (
993            '1.0 <abc>',
994            '1.0 <abc>',
995            '1.0',
996            1,
997            0,
998            [errors.InvalidHeaderDefect]),
999
1000        # Unrecoverable invalid values.  We *could* apply more heuristics to
1001        # get something out of the first two, but doing so is not worth the
1002        # effort.
1003
1004        'non_comment_garbage_before': (
1005            '<abc> 1.0',
1006            '<abc> 1.0',
1007            None,
1008            None,
1009            None,
1010            [errors.InvalidHeaderDefect]),
1011
1012        'non_comment_garbage_inside': (
1013            '1.<abc>0',
1014            '1.<abc>0',
1015            None,
1016            None,
1017            None,
1018            [errors.InvalidHeaderDefect]),
1019
1020        'two_periods': (
1021            '1..0',
1022            '1..0',
1023            None,
1024            None,
1025            None,
1026            [errors.InvalidHeaderDefect]),
1027
1028        '2_x': (
1029            '2.x',
1030            '2.x',
1031            None,  # This could be 2, but it seems safer to make it None.
1032            None,
1033            None,
1034            [errors.InvalidHeaderDefect]),
1035
1036        'foo': (
1037            'foo',
1038            'foo',
1039            None,
1040            None,
1041            None,
1042            [errors.InvalidHeaderDefect]),
1043
1044        'missing': (
1045            '',
1046            '',
1047            None,
1048            None,
1049            None,
1050            [errors.HeaderMissingRequiredValue]),
1051
1052        }
1053
1054
1055@parameterize
1056class TestAddressHeader(TestHeaderBase):
1057
1058    example_params = {
1059
1060        'empty':
1061            ('<>',
1062             [errors.InvalidHeaderDefect],
1063             '<>',
1064             '',
1065             '<>',
1066             '',
1067             '',
1068             None),
1069
1070        'address_only':
1071            ('zippy@pinhead.com',
1072             [],
1073             'zippy@pinhead.com',
1074             '',
1075             'zippy@pinhead.com',
1076             'zippy',
1077             'pinhead.com',
1078             None),
1079
1080        'name_and_address':
1081            ('Zaphrod Beblebrux <zippy@pinhead.com>',
1082             [],
1083             'Zaphrod Beblebrux <zippy@pinhead.com>',
1084             'Zaphrod Beblebrux',
1085             'zippy@pinhead.com',
1086             'zippy',
1087             'pinhead.com',
1088             None),
1089
1090        'quoted_local_part':
1091            ('Zaphrod Beblebrux <"foo bar"@pinhead.com>',
1092             [],
1093             'Zaphrod Beblebrux <"foo bar"@pinhead.com>',
1094             'Zaphrod Beblebrux',
1095             '"foo bar"@pinhead.com',
1096             'foo bar',
1097             'pinhead.com',
1098             None),
1099
1100        'quoted_parens_in_name':
1101            (r'"A \(Special\) Person" <person@dom.ain>',
1102             [],
1103             '"A (Special) Person" <person@dom.ain>',
1104             'A (Special) Person',
1105             'person@dom.ain',
1106             'person',
1107             'dom.ain',
1108             None),
1109
1110        'quoted_backslashes_in_name':
1111            (r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>',
1112             [],
1113             r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>',
1114             r'Arthur \Backslash\ Foobar',
1115             'person@dom.ain',
1116             'person',
1117             'dom.ain',
1118             None),
1119
1120        'name_with_dot':
1121            ('John X. Doe <jxd@example.com>',
1122             [errors.ObsoleteHeaderDefect],
1123             '"John X. Doe" <jxd@example.com>',
1124             'John X. Doe',
1125             'jxd@example.com',
1126             'jxd',
1127             'example.com',
1128             None),
1129
1130        'quoted_strings_in_local_part':
1131            ('""example" example"@example.com',
1132             [errors.InvalidHeaderDefect]*3,
1133             '"example example"@example.com',
1134             '',
1135             '"example example"@example.com',
1136             'example example',
1137             'example.com',
1138             None),
1139
1140        'escaped_quoted_strings_in_local_part':
1141            (r'"\"example\" example"@example.com',
1142             [],
1143             r'"\"example\" example"@example.com',
1144             '',
1145             r'"\"example\" example"@example.com',
1146             r'"example" example',
1147             'example.com',
1148            None),
1149
1150        'escaped_escapes_in_local_part':
1151            (r'"\\"example\\" example"@example.com',
1152             [errors.InvalidHeaderDefect]*5,
1153             r'"\\example\\\\ example"@example.com',
1154             '',
1155             r'"\\example\\\\ example"@example.com',
1156             r'\example\\ example',
1157             'example.com',
1158            None),
1159
1160        'spaces_in_unquoted_local_part_collapsed':
1161            ('merwok  wok  @example.com',
1162             [errors.InvalidHeaderDefect]*2,
1163             '"merwok wok"@example.com',
1164             '',
1165             '"merwok wok"@example.com',
1166             'merwok wok',
1167             'example.com',
1168             None),
1169
1170        'spaces_around_dots_in_local_part_removed':
1171            ('merwok. wok .  wok@example.com',
1172             [errors.ObsoleteHeaderDefect],
1173             'merwok.wok.wok@example.com',
1174             '',
1175             'merwok.wok.wok@example.com',
1176             'merwok.wok.wok',
1177             'example.com',
1178             None),
1179
1180        'rfc2047_atom_is_decoded':
1181            ('=?utf-8?q?=C3=89ric?= <foo@example.com>',
1182            [],
1183            'Éric <foo@example.com>',
1184            'Éric',
1185            'foo@example.com',
1186            'foo',
1187            'example.com',
1188            None),
1189
1190        'rfc2047_atom_in_phrase_is_decoded':
1191            ('The =?utf-8?q?=C3=89ric=2C?= Himself <foo@example.com>',
1192            [],
1193            '"The Éric, Himself" <foo@example.com>',
1194            'The Éric, Himself',
1195            'foo@example.com',
1196            'foo',
1197            'example.com',
1198            None),
1199
1200        'rfc2047_atom_in_quoted_string_is_decoded':
1201            ('"=?utf-8?q?=C3=89ric?=" <foo@example.com>',
1202            [errors.InvalidHeaderDefect,
1203            errors.InvalidHeaderDefect],
1204            'Éric <foo@example.com>',
1205            'Éric',
1206            'foo@example.com',
1207            'foo',
1208            'example.com',
1209            None),
1210
1211        }
1212
1213        # XXX: Need many more examples, and in particular some with names in
1214        # trailing comments, which aren't currently handled.  comments in
1215        # general are not handled yet.
1216
1217    def example_as_address(self, source, defects, decoded, display_name,
1218                           addr_spec, username, domain, comment):
1219        h = self.make_header('sender', source)
1220        self.assertEqual(h, decoded)
1221        self.assertDefectsEqual(h.defects, defects)
1222        a = h.address
1223        self.assertEqual(str(a), decoded)
1224        self.assertEqual(len(h.groups), 1)
1225        self.assertEqual([a], list(h.groups[0].addresses))
1226        self.assertEqual([a], list(h.addresses))
1227        self.assertEqual(a.display_name, display_name)
1228        self.assertEqual(a.addr_spec, addr_spec)
1229        self.assertEqual(a.username, username)
1230        self.assertEqual(a.domain, domain)
1231        # XXX: we have no comment support yet.
1232        #self.assertEqual(a.comment, comment)
1233
1234    def example_as_group(self, source, defects, decoded, display_name,
1235                         addr_spec, username, domain, comment):
1236        source = 'foo: {};'.format(source)
1237        gdecoded = 'foo: {};'.format(decoded) if decoded else 'foo:;'
1238        h = self.make_header('to', source)
1239        self.assertEqual(h, gdecoded)
1240        self.assertDefectsEqual(h.defects, defects)
1241        self.assertEqual(h.groups[0].addresses, h.addresses)
1242        self.assertEqual(len(h.groups), 1)
1243        self.assertEqual(len(h.addresses), 1)
1244        a = h.addresses[0]
1245        self.assertEqual(str(a), decoded)
1246        self.assertEqual(a.display_name, display_name)
1247        self.assertEqual(a.addr_spec, addr_spec)
1248        self.assertEqual(a.username, username)
1249        self.assertEqual(a.domain, domain)
1250
1251    def test_simple_address_list(self):
1252        value = ('Fred <dinsdale@python.org>, foo@example.com, '
1253                    '"Harry W. Hastings" <hasty@example.com>')
1254        h = self.make_header('to', value)
1255        self.assertEqual(h, value)
1256        self.assertEqual(len(h.groups), 3)
1257        self.assertEqual(len(h.addresses), 3)
1258        for i in range(3):
1259            self.assertEqual(h.groups[i].addresses[0], h.addresses[i])
1260        self.assertEqual(str(h.addresses[0]), 'Fred <dinsdale@python.org>')
1261        self.assertEqual(str(h.addresses[1]), 'foo@example.com')
1262        self.assertEqual(str(h.addresses[2]),
1263            '"Harry W. Hastings" <hasty@example.com>')
1264        self.assertEqual(h.addresses[2].display_name,
1265            'Harry W. Hastings')
1266
1267    def test_complex_address_list(self):
1268        examples = list(self.example_params.values())
1269        source = ('dummy list:;, another: (empty);,' +
1270                 ', '.join([x[0] for x in examples[:4]]) + ', ' +
1271                 r'"A \"list\"": ' +
1272                    ', '.join([x[0] for x in examples[4:6]]) + ';,' +
1273                 ', '.join([x[0] for x in examples[6:]])
1274            )
1275        # XXX: the fact that (empty) disappears here is a potential API design
1276        # bug.  We don't currently have a way to preserve comments.
1277        expected = ('dummy list:;, another:;, ' +
1278                 ', '.join([x[2] for x in examples[:4]]) + ', ' +
1279                 r'"A \"list\"": ' +
1280                    ', '.join([x[2] for x in examples[4:6]]) + ';, ' +
1281                 ', '.join([x[2] for x in examples[6:]])
1282            )
1283
1284        h = self.make_header('to', source)
1285        self.assertEqual(h.split(','), expected.split(','))
1286        self.assertEqual(h, expected)
1287        self.assertEqual(len(h.groups), 7 + len(examples) - 6)
1288        self.assertEqual(h.groups[0].display_name, 'dummy list')
1289        self.assertEqual(h.groups[1].display_name, 'another')
1290        self.assertEqual(h.groups[6].display_name, 'A "list"')
1291        self.assertEqual(len(h.addresses), len(examples))
1292        for i in range(4):
1293            self.assertIsNone(h.groups[i+2].display_name)
1294            self.assertEqual(str(h.groups[i+2].addresses[0]), examples[i][2])
1295        for i in range(7, 7 + len(examples) - 6):
1296            self.assertIsNone(h.groups[i].display_name)
1297            self.assertEqual(str(h.groups[i].addresses[0]), examples[i-1][2])
1298        for i in range(len(examples)):
1299            self.assertEqual(str(h.addresses[i]), examples[i][2])
1300            self.assertEqual(h.addresses[i].addr_spec, examples[i][4])
1301
1302    def test_address_read_only(self):
1303        h = self.make_header('sender', 'abc@xyz.com')
1304        with self.assertRaises(AttributeError):
1305            h.address = 'foo'
1306
1307    def test_addresses_read_only(self):
1308        h = self.make_header('sender', 'abc@xyz.com')
1309        with self.assertRaises(AttributeError):
1310            h.addresses = 'foo'
1311
1312    def test_groups_read_only(self):
1313        h = self.make_header('sender', 'abc@xyz.com')
1314        with self.assertRaises(AttributeError):
1315            h.groups = 'foo'
1316
1317    def test_addresses_types(self):
1318        source = 'me <who@example.com>'
1319        h = self.make_header('to', source)
1320        self.assertIsInstance(h.addresses, tuple)
1321        self.assertIsInstance(h.addresses[0], Address)
1322
1323    def test_groups_types(self):
1324        source = 'me <who@example.com>'
1325        h = self.make_header('to', source)
1326        self.assertIsInstance(h.groups, tuple)
1327        self.assertIsInstance(h.groups[0], Group)
1328
1329    def test_set_from_Address(self):
1330        h = self.make_header('to', Address('me', 'foo', 'example.com'))
1331        self.assertEqual(h, 'me <foo@example.com>')
1332
1333    def test_set_from_Address_list(self):
1334        h = self.make_header('to', [Address('me', 'foo', 'example.com'),
1335                                    Address('you', 'bar', 'example.com')])
1336        self.assertEqual(h, 'me <foo@example.com>, you <bar@example.com>')
1337
1338    def test_set_from_Address_and_Group_list(self):
1339        h = self.make_header('to', [Address('me', 'foo', 'example.com'),
1340                                    Group('bing', [Address('fiz', 'z', 'b.com'),
1341                                                   Address('zif', 'f', 'c.com')]),
1342                                    Address('you', 'bar', 'example.com')])
1343        self.assertEqual(h, 'me <foo@example.com>, bing: fiz <z@b.com>, '
1344                            'zif <f@c.com>;, you <bar@example.com>')
1345        self.assertEqual(h.fold(policy=policy.default.clone(max_line_length=40)),
1346                        'to: me <foo@example.com>,\n'
1347                        ' bing: fiz <z@b.com>, zif <f@c.com>;,\n'
1348                        ' you <bar@example.com>\n')
1349
1350    def test_set_from_Group_list(self):
1351        h = self.make_header('to', [Group('bing', [Address('fiz', 'z', 'b.com'),
1352                                                   Address('zif', 'f', 'c.com')])])
1353        self.assertEqual(h, 'bing: fiz <z@b.com>, zif <f@c.com>;')
1354
1355
1356class TestAddressAndGroup(TestEmailBase):
1357
1358    def _test_attr_ro(self, obj, attr):
1359        with self.assertRaises(AttributeError):
1360            setattr(obj, attr, 'foo')
1361
1362    def test_address_display_name_ro(self):
1363        self._test_attr_ro(Address('foo', 'bar', 'baz'), 'display_name')
1364
1365    def test_address_username_ro(self):
1366        self._test_attr_ro(Address('foo', 'bar', 'baz'), 'username')
1367
1368    def test_address_domain_ro(self):
1369        self._test_attr_ro(Address('foo', 'bar', 'baz'), 'domain')
1370
1371    def test_group_display_name_ro(self):
1372        self._test_attr_ro(Group('foo'), 'display_name')
1373
1374    def test_group_addresses_ro(self):
1375        self._test_attr_ro(Group('foo'), 'addresses')
1376
1377    def test_address_from_username_domain(self):
1378        a = Address('foo', 'bar', 'baz')
1379        self.assertEqual(a.display_name, 'foo')
1380        self.assertEqual(a.username, 'bar')
1381        self.assertEqual(a.domain, 'baz')
1382        self.assertEqual(a.addr_spec, 'bar@baz')
1383        self.assertEqual(str(a), 'foo <bar@baz>')
1384
1385    def test_address_from_addr_spec(self):
1386        a = Address('foo', addr_spec='bar@baz')
1387        self.assertEqual(a.display_name, 'foo')
1388        self.assertEqual(a.username, 'bar')
1389        self.assertEqual(a.domain, 'baz')
1390        self.assertEqual(a.addr_spec, 'bar@baz')
1391        self.assertEqual(str(a), 'foo <bar@baz>')
1392
1393    def test_address_with_no_display_name(self):
1394        a = Address(addr_spec='bar@baz')
1395        self.assertEqual(a.display_name, '')
1396        self.assertEqual(a.username, 'bar')
1397        self.assertEqual(a.domain, 'baz')
1398        self.assertEqual(a.addr_spec, 'bar@baz')
1399        self.assertEqual(str(a), 'bar@baz')
1400
1401    def test_null_address(self):
1402        a = Address()
1403        self.assertEqual(a.display_name, '')
1404        self.assertEqual(a.username, '')
1405        self.assertEqual(a.domain, '')
1406        self.assertEqual(a.addr_spec, '<>')
1407        self.assertEqual(str(a), '<>')
1408
1409    def test_domain_only(self):
1410        # This isn't really a valid address.
1411        a = Address(domain='buzz')
1412        self.assertEqual(a.display_name, '')
1413        self.assertEqual(a.username, '')
1414        self.assertEqual(a.domain, 'buzz')
1415        self.assertEqual(a.addr_spec, '@buzz')
1416        self.assertEqual(str(a), '@buzz')
1417
1418    def test_username_only(self):
1419        # This isn't really a valid address.
1420        a = Address(username='buzz')
1421        self.assertEqual(a.display_name, '')
1422        self.assertEqual(a.username, 'buzz')
1423        self.assertEqual(a.domain, '')
1424        self.assertEqual(a.addr_spec, 'buzz')
1425        self.assertEqual(str(a), 'buzz')
1426
1427    def test_display_name_only(self):
1428        a = Address('buzz')
1429        self.assertEqual(a.display_name, 'buzz')
1430        self.assertEqual(a.username, '')
1431        self.assertEqual(a.domain, '')
1432        self.assertEqual(a.addr_spec, '<>')
1433        self.assertEqual(str(a), 'buzz <>')
1434
1435    def test_quoting(self):
1436        # Ideally we'd check every special individually, but I'm not up for
1437        # writing that many tests.
1438        a = Address('Sara J.', 'bad name', 'example.com')
1439        self.assertEqual(a.display_name, 'Sara J.')
1440        self.assertEqual(a.username, 'bad name')
1441        self.assertEqual(a.domain, 'example.com')
1442        self.assertEqual(a.addr_spec, '"bad name"@example.com')
1443        self.assertEqual(str(a), '"Sara J." <"bad name"@example.com>')
1444
1445    def test_il8n(self):
1446        a = Address('Éric', 'wok', 'exàmple.com')
1447        self.assertEqual(a.display_name, 'Éric')
1448        self.assertEqual(a.username, 'wok')
1449        self.assertEqual(a.domain, 'exàmple.com')
1450        self.assertEqual(a.addr_spec, 'wok@exàmple.com')
1451        self.assertEqual(str(a), 'Éric <wok@exàmple.com>')
1452
1453    # XXX: there is an API design issue that needs to be solved here.
1454    #def test_non_ascii_username_raises(self):
1455    #    with self.assertRaises(ValueError):
1456    #        Address('foo', 'wők', 'example.com')
1457
1458    def test_crlf_in_constructor_args_raises(self):
1459        cases = (
1460            dict(display_name='foo\r'),
1461            dict(display_name='foo\n'),
1462            dict(display_name='foo\r\n'),
1463            dict(domain='example.com\r'),
1464            dict(domain='example.com\n'),
1465            dict(domain='example.com\r\n'),
1466            dict(username='wok\r'),
1467            dict(username='wok\n'),
1468            dict(username='wok\r\n'),
1469            dict(addr_spec='wok@example.com\r'),
1470            dict(addr_spec='wok@example.com\n'),
1471            dict(addr_spec='wok@example.com\r\n')
1472        )
1473        for kwargs in cases:
1474            with self.subTest(kwargs=kwargs), self.assertRaisesRegex(ValueError, "invalid arguments"):
1475                Address(**kwargs)
1476
1477    def test_non_ascii_username_in_addr_spec_raises(self):
1478        with self.assertRaises(ValueError):
1479            Address('foo', addr_spec='wők@example.com')
1480
1481    def test_address_addr_spec_and_username_raises(self):
1482        with self.assertRaises(TypeError):
1483            Address('foo', username='bing', addr_spec='bar@baz')
1484
1485    def test_address_addr_spec_and_domain_raises(self):
1486        with self.assertRaises(TypeError):
1487            Address('foo', domain='bing', addr_spec='bar@baz')
1488
1489    def test_address_addr_spec_and_username_and_domain_raises(self):
1490        with self.assertRaises(TypeError):
1491            Address('foo', username='bong', domain='bing', addr_spec='bar@baz')
1492
1493    def test_space_in_addr_spec_username_raises(self):
1494        with self.assertRaises(ValueError):
1495            Address('foo', addr_spec="bad name@example.com")
1496
1497    def test_bad_addr_sepc_raises(self):
1498        with self.assertRaises(ValueError):
1499            Address('foo', addr_spec="name@ex[]ample.com")
1500
1501    def test_empty_group(self):
1502        g = Group('foo')
1503        self.assertEqual(g.display_name, 'foo')
1504        self.assertEqual(g.addresses, tuple())
1505        self.assertEqual(str(g), 'foo:;')
1506
1507    def test_empty_group_list(self):
1508        g = Group('foo', addresses=[])
1509        self.assertEqual(g.display_name, 'foo')
1510        self.assertEqual(g.addresses, tuple())
1511        self.assertEqual(str(g), 'foo:;')
1512
1513    def test_null_group(self):
1514        g = Group()
1515        self.assertIsNone(g.display_name)
1516        self.assertEqual(g.addresses, tuple())
1517        self.assertEqual(str(g), 'None:;')
1518
1519    def test_group_with_addresses(self):
1520        addrs = [Address('b', 'b', 'c'), Address('a', 'b','c')]
1521        g = Group('foo', addrs)
1522        self.assertEqual(g.display_name, 'foo')
1523        self.assertEqual(g.addresses, tuple(addrs))
1524        self.assertEqual(str(g), 'foo: b <b@c>, a <b@c>;')
1525
1526    def test_group_with_addresses_no_display_name(self):
1527        addrs = [Address('b', 'b', 'c'), Address('a', 'b','c')]
1528        g = Group(addresses=addrs)
1529        self.assertIsNone(g.display_name)
1530        self.assertEqual(g.addresses, tuple(addrs))
1531        self.assertEqual(str(g), 'None: b <b@c>, a <b@c>;')
1532
1533    def test_group_with_one_address_no_display_name(self):
1534        addrs = [Address('b', 'b', 'c')]
1535        g = Group(addresses=addrs)
1536        self.assertIsNone(g.display_name)
1537        self.assertEqual(g.addresses, tuple(addrs))
1538        self.assertEqual(str(g), 'b <b@c>')
1539
1540    def test_display_name_quoting(self):
1541        g = Group('foo.bar')
1542        self.assertEqual(g.display_name, 'foo.bar')
1543        self.assertEqual(g.addresses, tuple())
1544        self.assertEqual(str(g), '"foo.bar":;')
1545
1546    def test_display_name_blanks_not_quoted(self):
1547        g = Group('foo bar')
1548        self.assertEqual(g.display_name, 'foo bar')
1549        self.assertEqual(g.addresses, tuple())
1550        self.assertEqual(str(g), 'foo bar:;')
1551
1552    def test_set_message_header_from_address(self):
1553        a = Address('foo', 'bar', 'example.com')
1554        m = Message(policy=policy.default)
1555        m['To'] = a
1556        self.assertEqual(m['to'], 'foo <bar@example.com>')
1557        self.assertEqual(m['to'].addresses, (a,))
1558
1559    def test_set_message_header_from_group(self):
1560        g = Group('foo bar')
1561        m = Message(policy=policy.default)
1562        m['To'] = g
1563        self.assertEqual(m['to'], 'foo bar:;')
1564        self.assertEqual(m['to'].addresses, g.addresses)
1565
1566
1567class TestFolding(TestHeaderBase):
1568
1569    def test_address_display_names(self):
1570        """Test the folding and encoding of address headers."""
1571        for name, result in (
1572                ('Foo Bar, France', '"Foo Bar, France"'),
1573                ('Foo Bar (France)', '"Foo Bar (France)"'),
1574                ('Foo Bar, España', 'Foo =?utf-8?q?Bar=2C_Espa=C3=B1a?='),
1575                ('Foo Bar (España)', 'Foo Bar =?utf-8?b?KEVzcGHDsWEp?='),
1576                ('Foo, Bar España', '=?utf-8?q?Foo=2C_Bar_Espa=C3=B1a?='),
1577                ('Foo, Bar [España]', '=?utf-8?q?Foo=2C_Bar_=5BEspa=C3=B1a=5D?='),
1578                ('Foo Bär, France', 'Foo =?utf-8?q?B=C3=A4r=2C?= France'),
1579                ('Foo Bär <France>', 'Foo =?utf-8?q?B=C3=A4r_=3CFrance=3E?='),
1580                (
1581                    'Lôrem ipsum dôlôr sit amet, cônsectetuer adipiscing. '
1582                    'Suspendisse pôtenti. Aliquam nibh. Suspendisse pôtenti.',
1583                    '=?utf-8?q?L=C3=B4rem_ipsum_d=C3=B4l=C3=B4r_sit_amet=2C_c'
1584                    '=C3=B4nsectetuer?=\n =?utf-8?q?adipiscing=2E_Suspendisse'
1585                    '_p=C3=B4tenti=2E_Aliquam_nibh=2E?=\n Suspendisse =?utf-8'
1586                    '?q?p=C3=B4tenti=2E?=',
1587                    ),
1588                ):
1589            h = self.make_header('To', Address(name, addr_spec='a@b.com'))
1590            self.assertEqual(h.fold(policy=policy.default),
1591                                    'To: %s <a@b.com>\n' % result)
1592
1593    def test_short_unstructured(self):
1594        h = self.make_header('subject', 'this is a test')
1595        self.assertEqual(h.fold(policy=policy.default),
1596                         'subject: this is a test\n')
1597
1598    def test_long_unstructured(self):
1599        h = self.make_header('Subject', 'This is a long header '
1600            'line that will need to be folded into two lines '
1601            'and will demonstrate basic folding')
1602        self.assertEqual(h.fold(policy=policy.default),
1603                        'Subject: This is a long header line that will '
1604                            'need to be folded into two lines\n'
1605                        ' and will demonstrate basic folding\n')
1606
1607    def test_unstructured_short_max_line_length(self):
1608        h = self.make_header('Subject', 'this is a short header '
1609            'that will be folded anyway')
1610        self.assertEqual(
1611            h.fold(policy=policy.default.clone(max_line_length=20)),
1612            textwrap.dedent("""\
1613                Subject: this is a
1614                 short header that
1615                 will be folded
1616                 anyway
1617                """))
1618
1619    def test_fold_unstructured_single_word(self):
1620        h = self.make_header('Subject', 'test')
1621        self.assertEqual(h.fold(policy=policy.default), 'Subject: test\n')
1622
1623    def test_fold_unstructured_short(self):
1624        h = self.make_header('Subject', 'test test test')
1625        self.assertEqual(h.fold(policy=policy.default),
1626                        'Subject: test test test\n')
1627
1628    def test_fold_unstructured_with_overlong_word(self):
1629        h = self.make_header('Subject', 'thisisaverylonglineconsistingofa'
1630            'singlewordthatwontfit')
1631        self.assertEqual(
1632            h.fold(policy=policy.default.clone(max_line_length=20)),
1633            'Subject: \n'
1634            ' =?utf-8?q?thisisa?=\n'
1635            ' =?utf-8?q?verylon?=\n'
1636            ' =?utf-8?q?glineco?=\n'
1637            ' =?utf-8?q?nsistin?=\n'
1638            ' =?utf-8?q?gofasin?=\n'
1639            ' =?utf-8?q?gleword?=\n'
1640            ' =?utf-8?q?thatwon?=\n'
1641            ' =?utf-8?q?tfit?=\n'
1642            )
1643
1644    def test_fold_unstructured_with_two_overlong_words(self):
1645        h = self.make_header('Subject', 'thisisaverylonglineconsistingofa'
1646            'singlewordthatwontfit plusanotherverylongwordthatwontfit')
1647        self.assertEqual(
1648            h.fold(policy=policy.default.clone(max_line_length=20)),
1649            'Subject: \n'
1650            ' =?utf-8?q?thisisa?=\n'
1651            ' =?utf-8?q?verylon?=\n'
1652            ' =?utf-8?q?glineco?=\n'
1653            ' =?utf-8?q?nsistin?=\n'
1654            ' =?utf-8?q?gofasin?=\n'
1655            ' =?utf-8?q?gleword?=\n'
1656            ' =?utf-8?q?thatwon?=\n'
1657            ' =?utf-8?q?tfit_pl?=\n'
1658            ' =?utf-8?q?usanoth?=\n'
1659            ' =?utf-8?q?erveryl?=\n'
1660            ' =?utf-8?q?ongword?=\n'
1661            ' =?utf-8?q?thatwon?=\n'
1662            ' =?utf-8?q?tfit?=\n'
1663            )
1664
1665    # XXX Need test for when max_line_length is less than the chrome size.
1666
1667    def test_fold_unstructured_with_slightly_long_word(self):
1668        h = self.make_header('Subject', 'thislongwordislessthanmaxlinelen')
1669        self.assertEqual(
1670            h.fold(policy=policy.default.clone(max_line_length=35)),
1671            'Subject:\n thislongwordislessthanmaxlinelen\n')
1672
1673    def test_fold_unstructured_with_commas(self):
1674        # The old wrapper would fold this at the commas.
1675        h = self.make_header('Subject', "This header is intended to "
1676            "demonstrate, in a fairly succinct way, that we now do "
1677            "not give a , special treatment in unstructured headers.")
1678        self.assertEqual(
1679            h.fold(policy=policy.default.clone(max_line_length=60)),
1680            textwrap.dedent("""\
1681                Subject: This header is intended to demonstrate, in a fairly
1682                 succinct way, that we now do not give a , special treatment
1683                 in unstructured headers.
1684                 """))
1685
1686    def test_fold_address_list(self):
1687        h = self.make_header('To', '"Theodore H. Perfect" <yes@man.com>, '
1688            '"My address is very long because my name is long" <foo@bar.com>, '
1689            '"Only A. Friend" <no@yes.com>')
1690        self.assertEqual(h.fold(policy=policy.default), textwrap.dedent("""\
1691            To: "Theodore H. Perfect" <yes@man.com>,
1692             "My address is very long because my name is long" <foo@bar.com>,
1693             "Only A. Friend" <no@yes.com>
1694             """))
1695
1696    def test_fold_date_header(self):
1697        h = self.make_header('Date', 'Sat, 2 Feb 2002 17:00:06 -0800')
1698        self.assertEqual(h.fold(policy=policy.default),
1699                        'Date: Sat, 02 Feb 2002 17:00:06 -0800\n')
1700
1701    def test_fold_overlong_words_using_RFC2047(self):
1702        h = self.make_header(
1703            'X-Report-Abuse',
1704            '<https://www.mailitapp.com/report_abuse.php?'
1705              'mid=xxx-xxx-xxxxxxxxxxxxxxxxxxxxxxxx==-xxx-xx-xx>')
1706        self.assertEqual(
1707            h.fold(policy=policy.default),
1708            'X-Report-Abuse: =?utf-8?q?=3Chttps=3A//www=2Emailitapp=2E'
1709                'com/report=5Fabuse?=\n'
1710            ' =?utf-8?q?=2Ephp=3Fmid=3Dxxx-xxx-xxxx'
1711                'xxxxxxxxxxxxxxxxxxxx=3D=3D-xxx-xx-xx?=\n'
1712            ' =?utf-8?q?=3E?=\n')
1713
1714    def test_message_id_header_is_not_folded(self):
1715        h = self.make_header(
1716            'Message-ID',
1717            '<somemessageidlongerthan@maxlinelength.com>')
1718        self.assertEqual(
1719            h.fold(policy=policy.default.clone(max_line_length=20)),
1720            'Message-ID: <somemessageidlongerthan@maxlinelength.com>\n')
1721
1722        # Test message-id isn't folded when id-right is no-fold-literal.
1723        h = self.make_header(
1724            'Message-ID',
1725            '<somemessageidlongerthan@[127.0.0.0.0.0.0.0.0.1]>')
1726        self.assertEqual(
1727            h.fold(policy=policy.default.clone(max_line_length=20)),
1728            'Message-ID: <somemessageidlongerthan@[127.0.0.0.0.0.0.0.0.1]>\n')
1729
1730        # Test message-id isn't folded when id-right is non-ascii characters.
1731        h = self.make_header('Message-ID', '<ईमेल@wők.com>')
1732        self.assertEqual(
1733            h.fold(policy=policy.default.clone(max_line_length=30)),
1734            'Message-ID: <ईमेल@wők.com>\n')
1735
1736        # Test message-id is folded without breaking the msg-id token into
1737        # encoded words, *even* if they don't fit into max_line_length.
1738        h = self.make_header('Message-ID', '<ईमेलfromMessage@wők.com>')
1739        self.assertEqual(
1740            h.fold(policy=policy.default.clone(max_line_length=20)),
1741            'Message-ID:\n <ईमेलfromMessage@wők.com>\n')
1742
1743if __name__ == '__main__':
1744    unittest.main()
1745