1"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
10from types import MappingProxyType
11
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18    def __init__(self, display_name='', username='', domain='', addr_spec=None):
19        """Create an object representing a full email address.
20
21        An address can have a 'display_name', a 'username', and a 'domain'.  In
22        addition to specifying the username and domain separately, they may be
23        specified together by using the addr_spec keyword *instead of* the
24        username and domain keywords.  If an addr_spec string is specified it
25        must be properly quoted according to RFC 5322 rules; an error will be
26        raised if it is not.
27
28        An Address object has display_name, username, domain, and addr_spec
29        attributes, all of which are read-only.  The addr_spec and the string
30        value of the object are both quoted according to RFC5322 rules, but
31        without any Content Transfer Encoding.
32
33        """
34
35        inputs = ''.join(filter(None, (display_name, username, domain, addr_spec)))
36        if '\r' in inputs or '\n' in inputs:
37            raise ValueError("invalid arguments; address parts cannot contain CR or LF")
38
39        # This clause with its potential 'raise' may only happen when an
40        # application program creates an Address object using an addr_spec
41        # keyword.  The email library code itself must always supply username
42        # and domain.
43        if addr_spec is not None:
44            if username or domain:
45                raise TypeError("addrspec specified when username and/or "
46                                "domain also specified")
47            a_s, rest = parser.get_addr_spec(addr_spec)
48            if rest:
49                raise ValueError("Invalid addr_spec; only '{}' "
50                                 "could be parsed from '{}'".format(
51                                    a_s, addr_spec))
52            if a_s.all_defects:
53                raise a_s.all_defects[0]
54            username = a_s.local_part
55            domain = a_s.domain
56        self._display_name = display_name
57        self._username = username
58        self._domain = domain
59
60    @property
61    def display_name(self):
62        return self._display_name
63
64    @property
65    def username(self):
66        return self._username
67
68    @property
69    def domain(self):
70        return self._domain
71
72    @property
73    def addr_spec(self):
74        """The addr_spec (username@domain) portion of the address, quoted
75        according to RFC 5322 rules, but with no Content Transfer Encoding.
76        """
77        nameset = set(self.username)
78        if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
79            lp = parser.quote_string(self.username)
80        else:
81            lp = self.username
82        if self.domain:
83            return lp + '@' + self.domain
84        if not lp:
85            return '<>'
86        return lp
87
88    def __repr__(self):
89        return "{}(display_name={!r}, username={!r}, domain={!r})".format(
90                        self.__class__.__name__,
91                        self.display_name, self.username, self.domain)
92
93    def __str__(self):
94        nameset = set(self.display_name)
95        if len(nameset) > len(nameset-parser.SPECIALS):
96            disp = parser.quote_string(self.display_name)
97        else:
98            disp = self.display_name
99        if disp:
100            addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
101            return "{} <{}>".format(disp, addr_spec)
102        return self.addr_spec
103
104    def __eq__(self, other):
105        if type(other) != type(self):
106            return False
107        return (self.display_name == other.display_name and
108                self.username == other.username and
109                self.domain == other.domain)
110
111
112class Group:
113
114    def __init__(self, display_name=None, addresses=None):
115        """Create an object representing an address group.
116
117        An address group consists of a display_name followed by colon and a
118        list of addresses (see Address) terminated by a semi-colon.  The Group
119        is created by specifying a display_name and a possibly empty list of
120        Address objects.  A Group can also be used to represent a single
121        address that is not in a group, which is convenient when manipulating
122        lists that are a combination of Groups and individual Addresses.  In
123        this case the display_name should be set to None.  In particular, the
124        string representation of a Group whose display_name is None is the same
125        as the Address object, if there is one and only one Address object in
126        the addresses list.
127
128        """
129        self._display_name = display_name
130        self._addresses = tuple(addresses) if addresses else tuple()
131
132    @property
133    def display_name(self):
134        return self._display_name
135
136    @property
137    def addresses(self):
138        return self._addresses
139
140    def __repr__(self):
141        return "{}(display_name={!r}, addresses={!r}".format(
142                 self.__class__.__name__,
143                 self.display_name, self.addresses)
144
145    def __str__(self):
146        if self.display_name is None and len(self.addresses)==1:
147            return str(self.addresses[0])
148        disp = self.display_name
149        if disp is not None:
150            nameset = set(disp)
151            if len(nameset) > len(nameset-parser.SPECIALS):
152                disp = parser.quote_string(disp)
153        adrstr = ", ".join(str(x) for x in self.addresses)
154        adrstr = ' ' + adrstr if adrstr else adrstr
155        return "{}:{};".format(disp, adrstr)
156
157    def __eq__(self, other):
158        if type(other) != type(self):
159            return False
160        return (self.display_name == other.display_name and
161                self.addresses == other.addresses)
162
163
164# Header Classes #
165
166class BaseHeader(str):
167
168    """Base class for message headers.
169
170    Implements generic behavior and provides tools for subclasses.
171
172    A subclass must define a classmethod named 'parse' that takes an unfolded
173    value string and a dictionary as its arguments.  The dictionary will
174    contain one key, 'defects', initialized to an empty list.  After the call
175    the dictionary must contain two additional keys: parse_tree, set to the
176    parse tree obtained from parsing the header, and 'decoded', set to the
177    string value of the idealized representation of the data from the value.
178    (That is, encoded words are decoded, and values that have canonical
179    representations are so represented.)
180
181    The defects key is intended to collect parsing defects, which the message
182    parser will subsequently dispose of as appropriate.  The parser should not,
183    insofar as practical, raise any errors.  Defects should be added to the
184    list instead.  The standard header parsers register defects for RFC
185    compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
186    errors.
187
188    The parse method may add additional keys to the dictionary.  In this case
189    the subclass must define an 'init' method, which will be passed the
190    dictionary as its keyword arguments.  The method should use (usually by
191    setting them as the value of similarly named attributes) and remove all the
192    extra keys added by its parse method, and then use super to call its parent
193    class with the remaining arguments and keywords.
194
195    The subclass should also make sure that a 'max_count' attribute is defined
196    that is either None or 1. XXX: need to better define this API.
197
198    """
199
200    def __new__(cls, name, value):
201        kwds = {'defects': []}
202        cls.parse(value, kwds)
203        if utils._has_surrogates(kwds['decoded']):
204            kwds['decoded'] = utils._sanitize(kwds['decoded'])
205        self = str.__new__(cls, kwds['decoded'])
206        del kwds['decoded']
207        self.init(name, **kwds)
208        return self
209
210    def init(self, name, *, parse_tree, defects):
211        self._name = name
212        self._parse_tree = parse_tree
213        self._defects = defects
214
215    @property
216    def name(self):
217        return self._name
218
219    @property
220    def defects(self):
221        return tuple(self._defects)
222
223    def __reduce__(self):
224        return (
225            _reconstruct_header,
226            (
227                self.__class__.__name__,
228                self.__class__.__bases__,
229                str(self),
230            ),
231            self.__dict__)
232
233    @classmethod
234    def _reconstruct(cls, value):
235        return str.__new__(cls, value)
236
237    def fold(self, *, policy):
238        """Fold header according to policy.
239
240        The parsed representation of the header is folded according to
241        RFC5322 rules, as modified by the policy.  If the parse tree
242        contains surrogateescaped bytes, the bytes are CTE encoded using
243        the charset 'unknown-8bit".
244
245        Any non-ASCII characters in the parse tree are CTE encoded using
246        charset utf-8. XXX: make this a policy setting.
247
248        The returned value is an ASCII-only string possibly containing linesep
249        characters, and ending with a linesep character.  The string includes
250        the header name and the ': ' separator.
251
252        """
253        # At some point we need to put fws here if it was in the source.
254        header = parser.Header([
255            parser.HeaderLabel([
256                parser.ValueTerminal(self.name, 'header-name'),
257                parser.ValueTerminal(':', 'header-sep')]),
258            ])
259        if self._parse_tree:
260            header.append(
261                parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
262        header.append(self._parse_tree)
263        return header.fold(policy=policy)
264
265
266def _reconstruct_header(cls_name, bases, value):
267    return type(cls_name, bases, {})._reconstruct(value)
268
269
270class UnstructuredHeader:
271
272    max_count = None
273    value_parser = staticmethod(parser.get_unstructured)
274
275    @classmethod
276    def parse(cls, value, kwds):
277        kwds['parse_tree'] = cls.value_parser(value)
278        kwds['decoded'] = str(kwds['parse_tree'])
279
280
281class UniqueUnstructuredHeader(UnstructuredHeader):
282
283    max_count = 1
284
285
286class DateHeader:
287
288    """Header whose value consists of a single timestamp.
289
290    Provides an additional attribute, datetime, which is either an aware
291    datetime using a timezone, or a naive datetime if the timezone
292    in the input string is -0000.  Also accepts a datetime as input.
293    The 'value' attribute is the normalized form of the timestamp,
294    which means it is the output of format_datetime on the datetime.
295    """
296
297    max_count = None
298
299    # This is used only for folding, not for creating 'decoded'.
300    value_parser = staticmethod(parser.get_unstructured)
301
302    @classmethod
303    def parse(cls, value, kwds):
304        if not value:
305            kwds['defects'].append(errors.HeaderMissingRequiredValue())
306            kwds['datetime'] = None
307            kwds['decoded'] = ''
308            kwds['parse_tree'] = parser.TokenList()
309            return
310        if isinstance(value, str):
311            value = utils.parsedate_to_datetime(value)
312        kwds['datetime'] = value
313        kwds['decoded'] = utils.format_datetime(kwds['datetime'])
314        kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
315
316    def init(self, *args, **kw):
317        self._datetime = kw.pop('datetime')
318        super().init(*args, **kw)
319
320    @property
321    def datetime(self):
322        return self._datetime
323
324
325class UniqueDateHeader(DateHeader):
326
327    max_count = 1
328
329
330class AddressHeader:
331
332    max_count = None
333
334    @staticmethod
335    def value_parser(value):
336        address_list, value = parser.get_address_list(value)
337        assert not value, 'this should not happen'
338        return address_list
339
340    @classmethod
341    def parse(cls, value, kwds):
342        if isinstance(value, str):
343            # We are translating here from the RFC language (address/mailbox)
344            # to our API language (group/address).
345            kwds['parse_tree'] = address_list = cls.value_parser(value)
346            groups = []
347            for addr in address_list.addresses:
348                groups.append(Group(addr.display_name,
349                                    [Address(mb.display_name or '',
350                                             mb.local_part or '',
351                                             mb.domain or '')
352                                     for mb in addr.all_mailboxes]))
353            defects = list(address_list.all_defects)
354        else:
355            # Assume it is Address/Group stuff
356            if not hasattr(value, '__iter__'):
357                value = [value]
358            groups = [Group(None, [item]) if not hasattr(item, 'addresses')
359                                          else item
360                                    for item in value]
361            defects = []
362        kwds['groups'] = groups
363        kwds['defects'] = defects
364        kwds['decoded'] = ', '.join([str(item) for item in groups])
365        if 'parse_tree' not in kwds:
366            kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
367
368    def init(self, *args, **kw):
369        self._groups = tuple(kw.pop('groups'))
370        self._addresses = None
371        super().init(*args, **kw)
372
373    @property
374    def groups(self):
375        return self._groups
376
377    @property
378    def addresses(self):
379        if self._addresses is None:
380            self._addresses = tuple(address for group in self._groups
381                                            for address in group.addresses)
382        return self._addresses
383
384
385class UniqueAddressHeader(AddressHeader):
386
387    max_count = 1
388
389
390class SingleAddressHeader(AddressHeader):
391
392    @property
393    def address(self):
394        if len(self.addresses)!=1:
395            raise ValueError(("value of single address header {} is not "
396                "a single address").format(self.name))
397        return self.addresses[0]
398
399
400class UniqueSingleAddressHeader(SingleAddressHeader):
401
402    max_count = 1
403
404
405class MIMEVersionHeader:
406
407    max_count = 1
408
409    value_parser = staticmethod(parser.parse_mime_version)
410
411    @classmethod
412    def parse(cls, value, kwds):
413        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
414        kwds['decoded'] = str(parse_tree)
415        kwds['defects'].extend(parse_tree.all_defects)
416        kwds['major'] = None if parse_tree.minor is None else parse_tree.major
417        kwds['minor'] = parse_tree.minor
418        if parse_tree.minor is not None:
419            kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
420        else:
421            kwds['version'] = None
422
423    def init(self, *args, **kw):
424        self._version = kw.pop('version')
425        self._major = kw.pop('major')
426        self._minor = kw.pop('minor')
427        super().init(*args, **kw)
428
429    @property
430    def major(self):
431        return self._major
432
433    @property
434    def minor(self):
435        return self._minor
436
437    @property
438    def version(self):
439        return self._version
440
441
442class ParameterizedMIMEHeader:
443
444    # Mixin that handles the params dict.  Must be subclassed and
445    # a property value_parser for the specific header provided.
446
447    max_count = 1
448
449    @classmethod
450    def parse(cls, value, kwds):
451        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
452        kwds['decoded'] = str(parse_tree)
453        kwds['defects'].extend(parse_tree.all_defects)
454        if parse_tree.params is None:
455            kwds['params'] = {}
456        else:
457            # The MIME RFCs specify that parameter ordering is arbitrary.
458            kwds['params'] = {utils._sanitize(name).lower():
459                                    utils._sanitize(value)
460                               for name, value in parse_tree.params}
461
462    def init(self, *args, **kw):
463        self._params = kw.pop('params')
464        super().init(*args, **kw)
465
466    @property
467    def params(self):
468        return MappingProxyType(self._params)
469
470
471class ContentTypeHeader(ParameterizedMIMEHeader):
472
473    value_parser = staticmethod(parser.parse_content_type_header)
474
475    def init(self, *args, **kw):
476        super().init(*args, **kw)
477        self._maintype = utils._sanitize(self._parse_tree.maintype)
478        self._subtype = utils._sanitize(self._parse_tree.subtype)
479
480    @property
481    def maintype(self):
482        return self._maintype
483
484    @property
485    def subtype(self):
486        return self._subtype
487
488    @property
489    def content_type(self):
490        return self.maintype + '/' + self.subtype
491
492
493class ContentDispositionHeader(ParameterizedMIMEHeader):
494
495    value_parser = staticmethod(parser.parse_content_disposition_header)
496
497    def init(self, *args, **kw):
498        super().init(*args, **kw)
499        cd = self._parse_tree.content_disposition
500        self._content_disposition = cd if cd is None else utils._sanitize(cd)
501
502    @property
503    def content_disposition(self):
504        return self._content_disposition
505
506
507class ContentTransferEncodingHeader:
508
509    max_count = 1
510
511    value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
512
513    @classmethod
514    def parse(cls, value, kwds):
515        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
516        kwds['decoded'] = str(parse_tree)
517        kwds['defects'].extend(parse_tree.all_defects)
518
519    def init(self, *args, **kw):
520        super().init(*args, **kw)
521        self._cte = utils._sanitize(self._parse_tree.cte)
522
523    @property
524    def cte(self):
525        return self._cte
526
527
528class MessageIDHeader:
529
530    max_count = 1
531    value_parser = staticmethod(parser.parse_message_id)
532
533    @classmethod
534    def parse(cls, value, kwds):
535        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
536        kwds['decoded'] = str(parse_tree)
537        kwds['defects'].extend(parse_tree.all_defects)
538
539
540# The header factory #
541
542_default_header_map = {
543    'subject':                      UniqueUnstructuredHeader,
544    'date':                         UniqueDateHeader,
545    'resent-date':                  DateHeader,
546    'orig-date':                    UniqueDateHeader,
547    'sender':                       UniqueSingleAddressHeader,
548    'resent-sender':                SingleAddressHeader,
549    'to':                           UniqueAddressHeader,
550    'resent-to':                    AddressHeader,
551    'cc':                           UniqueAddressHeader,
552    'resent-cc':                    AddressHeader,
553    'bcc':                          UniqueAddressHeader,
554    'resent-bcc':                   AddressHeader,
555    'from':                         UniqueAddressHeader,
556    'resent-from':                  AddressHeader,
557    'reply-to':                     UniqueAddressHeader,
558    'mime-version':                 MIMEVersionHeader,
559    'content-type':                 ContentTypeHeader,
560    'content-disposition':          ContentDispositionHeader,
561    'content-transfer-encoding':    ContentTransferEncodingHeader,
562    'message-id':                   MessageIDHeader,
563    }
564
565class HeaderRegistry:
566
567    """A header_factory and header registry."""
568
569    def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
570                       use_default_map=True):
571        """Create a header_factory that works with the Policy API.
572
573        base_class is the class that will be the last class in the created
574        header class's __bases__ list.  default_class is the class that will be
575        used if "name" (see __call__) does not appear in the registry.
576        use_default_map controls whether or not the default mapping of names to
577        specialized classes is copied in to the registry when the factory is
578        created.  The default is True.
579
580        """
581        self.registry = {}
582        self.base_class = base_class
583        self.default_class = default_class
584        if use_default_map:
585            self.registry.update(_default_header_map)
586
587    def map_to_type(self, name, cls):
588        """Register cls as the specialized class for handling "name" headers.
589
590        """
591        self.registry[name.lower()] = cls
592
593    def __getitem__(self, name):
594        cls = self.registry.get(name.lower(), self.default_class)
595        return type('_'+cls.__name__, (cls, self.base_class), {})
596
597    def __call__(self, name, value):
598        """Create a header instance for header 'name' from 'value'.
599
600        Creates a header instance by creating a specialized class for parsing
601        and representing the specified header by combining the factory
602        base_class with a specialized class from the registry or the
603        default_class, and passing the name and value to the constructed
604        class's constructor.
605
606        """
607        return self[name](name, value)
608