1"""Representing and manipulating email headers via custom objects. 2 3This module provides an implementation of the HeaderRegistry API. 4The implementation is designed to flexibly follow RFC5322 rules. 5 6Eventually HeaderRegistry will be a public API, but it isn't yet, 7and will probably change some before that happens. 8 9""" 10from types import MappingProxyType 11 12from email import utils 13from email import errors 14from email import _header_value_parser as parser 15 16class Address: 17 18 def __init__(self, display_name='', username='', domain='', addr_spec=None): 19 """Create an object representing a full email address. 20 21 An address can have a 'display_name', a 'username', and a 'domain'. In 22 addition to specifying the username and domain separately, they may be 23 specified together by using the addr_spec keyword *instead of* the 24 username and domain keywords. If an addr_spec string is specified it 25 must be properly quoted according to RFC 5322 rules; an error will be 26 raised if it is not. 27 28 An Address object has display_name, username, domain, and addr_spec 29 attributes, all of which are read-only. The addr_spec and the string 30 value of the object are both quoted according to RFC5322 rules, but 31 without any Content Transfer Encoding. 32 33 """ 34 35 inputs = ''.join(filter(None, (display_name, username, domain, addr_spec))) 36 if '\r' in inputs or '\n' in inputs: 37 raise ValueError("invalid arguments; address parts cannot contain CR or LF") 38 39 # This clause with its potential 'raise' may only happen when an 40 # application program creates an Address object using an addr_spec 41 # keyword. The email library code itself must always supply username 42 # and domain. 43 if addr_spec is not None: 44 if username or domain: 45 raise TypeError("addrspec specified when username and/or " 46 "domain also specified") 47 a_s, rest = parser.get_addr_spec(addr_spec) 48 if rest: 49 raise ValueError("Invalid addr_spec; only '{}' " 50 "could be parsed from '{}'".format( 51 a_s, addr_spec)) 52 if a_s.all_defects: 53 raise a_s.all_defects[0] 54 username = a_s.local_part 55 domain = a_s.domain 56 self._display_name = display_name 57 self._username = username 58 self._domain = domain 59 60 @property 61 def display_name(self): 62 return self._display_name 63 64 @property 65 def username(self): 66 return self._username 67 68 @property 69 def domain(self): 70 return self._domain 71 72 @property 73 def addr_spec(self): 74 """The addr_spec (username@domain) portion of the address, quoted 75 according to RFC 5322 rules, but with no Content Transfer Encoding. 76 """ 77 nameset = set(self.username) 78 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): 79 lp = parser.quote_string(self.username) 80 else: 81 lp = self.username 82 if self.domain: 83 return lp + '@' + self.domain 84 if not lp: 85 return '<>' 86 return lp 87 88 def __repr__(self): 89 return "{}(display_name={!r}, username={!r}, domain={!r})".format( 90 self.__class__.__name__, 91 self.display_name, self.username, self.domain) 92 93 def __str__(self): 94 nameset = set(self.display_name) 95 if len(nameset) > len(nameset-parser.SPECIALS): 96 disp = parser.quote_string(self.display_name) 97 else: 98 disp = self.display_name 99 if disp: 100 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec 101 return "{} <{}>".format(disp, addr_spec) 102 return self.addr_spec 103 104 def __eq__(self, other): 105 if type(other) != type(self): 106 return False 107 return (self.display_name == other.display_name and 108 self.username == other.username and 109 self.domain == other.domain) 110 111 112class Group: 113 114 def __init__(self, display_name=None, addresses=None): 115 """Create an object representing an address group. 116 117 An address group consists of a display_name followed by colon and a 118 list of addresses (see Address) terminated by a semi-colon. The Group 119 is created by specifying a display_name and a possibly empty list of 120 Address objects. A Group can also be used to represent a single 121 address that is not in a group, which is convenient when manipulating 122 lists that are a combination of Groups and individual Addresses. In 123 this case the display_name should be set to None. In particular, the 124 string representation of a Group whose display_name is None is the same 125 as the Address object, if there is one and only one Address object in 126 the addresses list. 127 128 """ 129 self._display_name = display_name 130 self._addresses = tuple(addresses) if addresses else tuple() 131 132 @property 133 def display_name(self): 134 return self._display_name 135 136 @property 137 def addresses(self): 138 return self._addresses 139 140 def __repr__(self): 141 return "{}(display_name={!r}, addresses={!r}".format( 142 self.__class__.__name__, 143 self.display_name, self.addresses) 144 145 def __str__(self): 146 if self.display_name is None and len(self.addresses)==1: 147 return str(self.addresses[0]) 148 disp = self.display_name 149 if disp is not None: 150 nameset = set(disp) 151 if len(nameset) > len(nameset-parser.SPECIALS): 152 disp = parser.quote_string(disp) 153 adrstr = ", ".join(str(x) for x in self.addresses) 154 adrstr = ' ' + adrstr if adrstr else adrstr 155 return "{}:{};".format(disp, adrstr) 156 157 def __eq__(self, other): 158 if type(other) != type(self): 159 return False 160 return (self.display_name == other.display_name and 161 self.addresses == other.addresses) 162 163 164# Header Classes # 165 166class BaseHeader(str): 167 168 """Base class for message headers. 169 170 Implements generic behavior and provides tools for subclasses. 171 172 A subclass must define a classmethod named 'parse' that takes an unfolded 173 value string and a dictionary as its arguments. The dictionary will 174 contain one key, 'defects', initialized to an empty list. After the call 175 the dictionary must contain two additional keys: parse_tree, set to the 176 parse tree obtained from parsing the header, and 'decoded', set to the 177 string value of the idealized representation of the data from the value. 178 (That is, encoded words are decoded, and values that have canonical 179 representations are so represented.) 180 181 The defects key is intended to collect parsing defects, which the message 182 parser will subsequently dispose of as appropriate. The parser should not, 183 insofar as practical, raise any errors. Defects should be added to the 184 list instead. The standard header parsers register defects for RFC 185 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing 186 errors. 187 188 The parse method may add additional keys to the dictionary. In this case 189 the subclass must define an 'init' method, which will be passed the 190 dictionary as its keyword arguments. The method should use (usually by 191 setting them as the value of similarly named attributes) and remove all the 192 extra keys added by its parse method, and then use super to call its parent 193 class with the remaining arguments and keywords. 194 195 The subclass should also make sure that a 'max_count' attribute is defined 196 that is either None or 1. XXX: need to better define this API. 197 198 """ 199 200 def __new__(cls, name, value): 201 kwds = {'defects': []} 202 cls.parse(value, kwds) 203 if utils._has_surrogates(kwds['decoded']): 204 kwds['decoded'] = utils._sanitize(kwds['decoded']) 205 self = str.__new__(cls, kwds['decoded']) 206 del kwds['decoded'] 207 self.init(name, **kwds) 208 return self 209 210 def init(self, name, *, parse_tree, defects): 211 self._name = name 212 self._parse_tree = parse_tree 213 self._defects = defects 214 215 @property 216 def name(self): 217 return self._name 218 219 @property 220 def defects(self): 221 return tuple(self._defects) 222 223 def __reduce__(self): 224 return ( 225 _reconstruct_header, 226 ( 227 self.__class__.__name__, 228 self.__class__.__bases__, 229 str(self), 230 ), 231 self.__dict__) 232 233 @classmethod 234 def _reconstruct(cls, value): 235 return str.__new__(cls, value) 236 237 def fold(self, *, policy): 238 """Fold header according to policy. 239 240 The parsed representation of the header is folded according to 241 RFC5322 rules, as modified by the policy. If the parse tree 242 contains surrogateescaped bytes, the bytes are CTE encoded using 243 the charset 'unknown-8bit". 244 245 Any non-ASCII characters in the parse tree are CTE encoded using 246 charset utf-8. XXX: make this a policy setting. 247 248 The returned value is an ASCII-only string possibly containing linesep 249 characters, and ending with a linesep character. The string includes 250 the header name and the ': ' separator. 251 252 """ 253 # At some point we need to put fws here if it was in the source. 254 header = parser.Header([ 255 parser.HeaderLabel([ 256 parser.ValueTerminal(self.name, 'header-name'), 257 parser.ValueTerminal(':', 'header-sep')]), 258 ]) 259 if self._parse_tree: 260 header.append( 261 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')])) 262 header.append(self._parse_tree) 263 return header.fold(policy=policy) 264 265 266def _reconstruct_header(cls_name, bases, value): 267 return type(cls_name, bases, {})._reconstruct(value) 268 269 270class UnstructuredHeader: 271 272 max_count = None 273 value_parser = staticmethod(parser.get_unstructured) 274 275 @classmethod 276 def parse(cls, value, kwds): 277 kwds['parse_tree'] = cls.value_parser(value) 278 kwds['decoded'] = str(kwds['parse_tree']) 279 280 281class UniqueUnstructuredHeader(UnstructuredHeader): 282 283 max_count = 1 284 285 286class DateHeader: 287 288 """Header whose value consists of a single timestamp. 289 290 Provides an additional attribute, datetime, which is either an aware 291 datetime using a timezone, or a naive datetime if the timezone 292 in the input string is -0000. Also accepts a datetime as input. 293 The 'value' attribute is the normalized form of the timestamp, 294 which means it is the output of format_datetime on the datetime. 295 """ 296 297 max_count = None 298 299 # This is used only for folding, not for creating 'decoded'. 300 value_parser = staticmethod(parser.get_unstructured) 301 302 @classmethod 303 def parse(cls, value, kwds): 304 if not value: 305 kwds['defects'].append(errors.HeaderMissingRequiredValue()) 306 kwds['datetime'] = None 307 kwds['decoded'] = '' 308 kwds['parse_tree'] = parser.TokenList() 309 return 310 if isinstance(value, str): 311 value = utils.parsedate_to_datetime(value) 312 kwds['datetime'] = value 313 kwds['decoded'] = utils.format_datetime(kwds['datetime']) 314 kwds['parse_tree'] = cls.value_parser(kwds['decoded']) 315 316 def init(self, *args, **kw): 317 self._datetime = kw.pop('datetime') 318 super().init(*args, **kw) 319 320 @property 321 def datetime(self): 322 return self._datetime 323 324 325class UniqueDateHeader(DateHeader): 326 327 max_count = 1 328 329 330class AddressHeader: 331 332 max_count = None 333 334 @staticmethod 335 def value_parser(value): 336 address_list, value = parser.get_address_list(value) 337 assert not value, 'this should not happen' 338 return address_list 339 340 @classmethod 341 def parse(cls, value, kwds): 342 if isinstance(value, str): 343 # We are translating here from the RFC language (address/mailbox) 344 # to our API language (group/address). 345 kwds['parse_tree'] = address_list = cls.value_parser(value) 346 groups = [] 347 for addr in address_list.addresses: 348 groups.append(Group(addr.display_name, 349 [Address(mb.display_name or '', 350 mb.local_part or '', 351 mb.domain or '') 352 for mb in addr.all_mailboxes])) 353 defects = list(address_list.all_defects) 354 else: 355 # Assume it is Address/Group stuff 356 if not hasattr(value, '__iter__'): 357 value = [value] 358 groups = [Group(None, [item]) if not hasattr(item, 'addresses') 359 else item 360 for item in value] 361 defects = [] 362 kwds['groups'] = groups 363 kwds['defects'] = defects 364 kwds['decoded'] = ', '.join([str(item) for item in groups]) 365 if 'parse_tree' not in kwds: 366 kwds['parse_tree'] = cls.value_parser(kwds['decoded']) 367 368 def init(self, *args, **kw): 369 self._groups = tuple(kw.pop('groups')) 370 self._addresses = None 371 super().init(*args, **kw) 372 373 @property 374 def groups(self): 375 return self._groups 376 377 @property 378 def addresses(self): 379 if self._addresses is None: 380 self._addresses = tuple(address for group in self._groups 381 for address in group.addresses) 382 return self._addresses 383 384 385class UniqueAddressHeader(AddressHeader): 386 387 max_count = 1 388 389 390class SingleAddressHeader(AddressHeader): 391 392 @property 393 def address(self): 394 if len(self.addresses)!=1: 395 raise ValueError(("value of single address header {} is not " 396 "a single address").format(self.name)) 397 return self.addresses[0] 398 399 400class UniqueSingleAddressHeader(SingleAddressHeader): 401 402 max_count = 1 403 404 405class MIMEVersionHeader: 406 407 max_count = 1 408 409 value_parser = staticmethod(parser.parse_mime_version) 410 411 @classmethod 412 def parse(cls, value, kwds): 413 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 414 kwds['decoded'] = str(parse_tree) 415 kwds['defects'].extend(parse_tree.all_defects) 416 kwds['major'] = None if parse_tree.minor is None else parse_tree.major 417 kwds['minor'] = parse_tree.minor 418 if parse_tree.minor is not None: 419 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) 420 else: 421 kwds['version'] = None 422 423 def init(self, *args, **kw): 424 self._version = kw.pop('version') 425 self._major = kw.pop('major') 426 self._minor = kw.pop('minor') 427 super().init(*args, **kw) 428 429 @property 430 def major(self): 431 return self._major 432 433 @property 434 def minor(self): 435 return self._minor 436 437 @property 438 def version(self): 439 return self._version 440 441 442class ParameterizedMIMEHeader: 443 444 # Mixin that handles the params dict. Must be subclassed and 445 # a property value_parser for the specific header provided. 446 447 max_count = 1 448 449 @classmethod 450 def parse(cls, value, kwds): 451 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 452 kwds['decoded'] = str(parse_tree) 453 kwds['defects'].extend(parse_tree.all_defects) 454 if parse_tree.params is None: 455 kwds['params'] = {} 456 else: 457 # The MIME RFCs specify that parameter ordering is arbitrary. 458 kwds['params'] = {utils._sanitize(name).lower(): 459 utils._sanitize(value) 460 for name, value in parse_tree.params} 461 462 def init(self, *args, **kw): 463 self._params = kw.pop('params') 464 super().init(*args, **kw) 465 466 @property 467 def params(self): 468 return MappingProxyType(self._params) 469 470 471class ContentTypeHeader(ParameterizedMIMEHeader): 472 473 value_parser = staticmethod(parser.parse_content_type_header) 474 475 def init(self, *args, **kw): 476 super().init(*args, **kw) 477 self._maintype = utils._sanitize(self._parse_tree.maintype) 478 self._subtype = utils._sanitize(self._parse_tree.subtype) 479 480 @property 481 def maintype(self): 482 return self._maintype 483 484 @property 485 def subtype(self): 486 return self._subtype 487 488 @property 489 def content_type(self): 490 return self.maintype + '/' + self.subtype 491 492 493class ContentDispositionHeader(ParameterizedMIMEHeader): 494 495 value_parser = staticmethod(parser.parse_content_disposition_header) 496 497 def init(self, *args, **kw): 498 super().init(*args, **kw) 499 cd = self._parse_tree.content_disposition 500 self._content_disposition = cd if cd is None else utils._sanitize(cd) 501 502 @property 503 def content_disposition(self): 504 return self._content_disposition 505 506 507class ContentTransferEncodingHeader: 508 509 max_count = 1 510 511 value_parser = staticmethod(parser.parse_content_transfer_encoding_header) 512 513 @classmethod 514 def parse(cls, value, kwds): 515 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 516 kwds['decoded'] = str(parse_tree) 517 kwds['defects'].extend(parse_tree.all_defects) 518 519 def init(self, *args, **kw): 520 super().init(*args, **kw) 521 self._cte = utils._sanitize(self._parse_tree.cte) 522 523 @property 524 def cte(self): 525 return self._cte 526 527 528class MessageIDHeader: 529 530 max_count = 1 531 value_parser = staticmethod(parser.parse_message_id) 532 533 @classmethod 534 def parse(cls, value, kwds): 535 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 536 kwds['decoded'] = str(parse_tree) 537 kwds['defects'].extend(parse_tree.all_defects) 538 539 540# The header factory # 541 542_default_header_map = { 543 'subject': UniqueUnstructuredHeader, 544 'date': UniqueDateHeader, 545 'resent-date': DateHeader, 546 'orig-date': UniqueDateHeader, 547 'sender': UniqueSingleAddressHeader, 548 'resent-sender': SingleAddressHeader, 549 'to': UniqueAddressHeader, 550 'resent-to': AddressHeader, 551 'cc': UniqueAddressHeader, 552 'resent-cc': AddressHeader, 553 'bcc': UniqueAddressHeader, 554 'resent-bcc': AddressHeader, 555 'from': UniqueAddressHeader, 556 'resent-from': AddressHeader, 557 'reply-to': UniqueAddressHeader, 558 'mime-version': MIMEVersionHeader, 559 'content-type': ContentTypeHeader, 560 'content-disposition': ContentDispositionHeader, 561 'content-transfer-encoding': ContentTransferEncodingHeader, 562 'message-id': MessageIDHeader, 563 } 564 565class HeaderRegistry: 566 567 """A header_factory and header registry.""" 568 569 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, 570 use_default_map=True): 571 """Create a header_factory that works with the Policy API. 572 573 base_class is the class that will be the last class in the created 574 header class's __bases__ list. default_class is the class that will be 575 used if "name" (see __call__) does not appear in the registry. 576 use_default_map controls whether or not the default mapping of names to 577 specialized classes is copied in to the registry when the factory is 578 created. The default is True. 579 580 """ 581 self.registry = {} 582 self.base_class = base_class 583 self.default_class = default_class 584 if use_default_map: 585 self.registry.update(_default_header_map) 586 587 def map_to_type(self, name, cls): 588 """Register cls as the specialized class for handling "name" headers. 589 590 """ 591 self.registry[name.lower()] = cls 592 593 def __getitem__(self, name): 594 cls = self.registry.get(name.lower(), self.default_class) 595 return type('_'+cls.__name__, (cls, self.base_class), {}) 596 597 def __call__(self, name, value): 598 """Create a header instance for header 'name' from 'value'. 599 600 Creates a header instance by creating a specialized class for parsing 601 and representing the specified header by combining the factory 602 base_class with a specialized class from the registry or the 603 default_class, and passing the name and value to the constructed 604 class's constructor. 605 606 """ 607 return self[name](name, value) 608