1"""This will be the home for the policy that hooks in the new 2code that adds all the email6 features. 3""" 4from __future__ import unicode_literals 5from __future__ import division 6from __future__ import absolute_import 7from future.builtins import super 8 9from future.standard_library.email._policybase import (Policy, Compat32, 10 compat32, _extend_docstrings) 11from future.standard_library.email.utils import _has_surrogates 12from future.standard_library.email.headerregistry import HeaderRegistry as HeaderRegistry 13 14__all__ = [ 15 'Compat32', 16 'compat32', 17 'Policy', 18 'EmailPolicy', 19 'default', 20 'strict', 21 'SMTP', 22 'HTTP', 23 ] 24 25@_extend_docstrings 26class EmailPolicy(Policy): 27 28 """+ 29 PROVISIONAL 30 31 The API extensions enabled by this policy are currently provisional. 32 Refer to the documentation for details. 33 34 This policy adds new header parsing and folding algorithms. Instead of 35 simple strings, headers are custom objects with custom attributes 36 depending on the type of the field. The folding algorithm fully 37 implements RFCs 2047 and 5322. 38 39 In addition to the settable attributes listed above that apply to 40 all Policies, this policy adds the following additional attributes: 41 42 refold_source -- if the value for a header in the Message object 43 came from the parsing of some source, this attribute 44 indicates whether or not a generator should refold 45 that value when transforming the message back into 46 stream form. The possible values are: 47 48 none -- all source values use original folding 49 long -- source values that have any line that is 50 longer than max_line_length will be 51 refolded 52 all -- all values are refolded. 53 54 The default is 'long'. 55 56 header_factory -- a callable that takes two arguments, 'name' and 57 'value', where 'name' is a header field name and 58 'value' is an unfolded header field value, and 59 returns a string-like object that represents that 60 header. A default header_factory is provided that 61 understands some of the RFC5322 header field types. 62 (Currently address fields and date fields have 63 special treatment, while all other fields are 64 treated as unstructured. This list will be 65 completed before the extension is marked stable.) 66 """ 67 68 refold_source = 'long' 69 header_factory = HeaderRegistry() 70 71 def __init__(self, **kw): 72 # Ensure that each new instance gets a unique header factory 73 # (as opposed to clones, which share the factory). 74 if 'header_factory' not in kw: 75 object.__setattr__(self, 'header_factory', HeaderRegistry()) 76 super().__init__(**kw) 77 78 def header_max_count(self, name): 79 """+ 80 The implementation for this class returns the max_count attribute from 81 the specialized header class that would be used to construct a header 82 of type 'name'. 83 """ 84 return self.header_factory[name].max_count 85 86 # The logic of the next three methods is chosen such that it is possible to 87 # switch a Message object between a Compat32 policy and a policy derived 88 # from this class and have the results stay consistent. This allows a 89 # Message object constructed with this policy to be passed to a library 90 # that only handles Compat32 objects, or to receive such an object and 91 # convert it to use the newer style by just changing its policy. It is 92 # also chosen because it postpones the relatively expensive full rfc5322 93 # parse until as late as possible when parsing from source, since in many 94 # applications only a few headers will actually be inspected. 95 96 def header_source_parse(self, sourcelines): 97 """+ 98 The name is parsed as everything up to the ':' and returned unmodified. 99 The value is determined by stripping leading whitespace off the 100 remainder of the first line, joining all subsequent lines together, and 101 stripping any trailing carriage return or linefeed characters. (This 102 is the same as Compat32). 103 104 """ 105 name, value = sourcelines[0].split(':', 1) 106 value = value.lstrip(' \t') + ''.join(sourcelines[1:]) 107 return (name, value.rstrip('\r\n')) 108 109 def header_store_parse(self, name, value): 110 """+ 111 The name is returned unchanged. If the input value has a 'name' 112 attribute and it matches the name ignoring case, the value is returned 113 unchanged. Otherwise the name and value are passed to header_factory 114 method, and the resulting custom header object is returned as the 115 value. In this case a ValueError is raised if the input value contains 116 CR or LF characters. 117 118 """ 119 if hasattr(value, 'name') and value.name.lower() == name.lower(): 120 return (name, value) 121 if isinstance(value, str) and len(value.splitlines())>1: 122 raise ValueError("Header values may not contain linefeed " 123 "or carriage return characters") 124 return (name, self.header_factory(name, value)) 125 126 def header_fetch_parse(self, name, value): 127 """+ 128 If the value has a 'name' attribute, it is returned to unmodified. 129 Otherwise the name and the value with any linesep characters removed 130 are passed to the header_factory method, and the resulting custom 131 header object is returned. Any surrogateescaped bytes get turned 132 into the unicode unknown-character glyph. 133 134 """ 135 if hasattr(value, 'name'): 136 return value 137 return self.header_factory(name, ''.join(value.splitlines())) 138 139 def fold(self, name, value): 140 """+ 141 Header folding is controlled by the refold_source policy setting. A 142 value is considered to be a 'source value' if and only if it does not 143 have a 'name' attribute (having a 'name' attribute means it is a header 144 object of some sort). If a source value needs to be refolded according 145 to the policy, it is converted into a custom header object by passing 146 the name and the value with any linesep characters removed to the 147 header_factory method. Folding of a custom header object is done by 148 calling its fold method with the current policy. 149 150 Source values are split into lines using splitlines. If the value is 151 not to be refolded, the lines are rejoined using the linesep from the 152 policy and returned. The exception is lines containing non-ascii 153 binary data. In that case the value is refolded regardless of the 154 refold_source setting, which causes the binary data to be CTE encoded 155 using the unknown-8bit charset. 156 157 """ 158 return self._fold(name, value, refold_binary=True) 159 160 def fold_binary(self, name, value): 161 """+ 162 The same as fold if cte_type is 7bit, except that the returned value is 163 bytes. 164 165 If cte_type is 8bit, non-ASCII binary data is converted back into 166 bytes. Headers with binary data are not refolded, regardless of the 167 refold_header setting, since there is no way to know whether the binary 168 data consists of single byte characters or multibyte characters. 169 170 """ 171 folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') 172 return folded.encode('ascii', 'surrogateescape') 173 174 def _fold(self, name, value, refold_binary=False): 175 if hasattr(value, 'name'): 176 return value.fold(policy=self) 177 maxlen = self.max_line_length if self.max_line_length else float('inf') 178 lines = value.splitlines() 179 refold = (self.refold_source == 'all' or 180 self.refold_source == 'long' and 181 (lines and len(lines[0])+len(name)+2 > maxlen or 182 any(len(x) > maxlen for x in lines[1:]))) 183 if refold or refold_binary and _has_surrogates(value): 184 return self.header_factory(name, ''.join(lines)).fold(policy=self) 185 return name + ': ' + self.linesep.join(lines) + self.linesep 186 187 188default = EmailPolicy() 189# Make the default policy use the class default header_factory 190del default.header_factory 191strict = default.clone(raise_on_defect=True) 192SMTP = default.clone(linesep='\r\n') 193HTTP = default.clone(linesep='\r\n', max_line_length=None) 194