1"""This will be the home for the policy that hooks in the new
2code that adds all the email6 features.
3"""
4
5import re
6import sys
7from email._policybase import Policy, Compat32, compat32, _extend_docstrings
8from email.utils import _has_surrogates
9from email.headerregistry import HeaderRegistry as HeaderRegistry
10from email.contentmanager import raw_data_manager
11from email.message import EmailMessage
12
13__all__ = [
14    'Compat32',
15    'compat32',
16    'Policy',
17    'EmailPolicy',
18    'default',
19    'strict',
20    'SMTP',
21    'HTTP',
22    ]
23
24linesep_splitter = re.compile(r'\n|\r')
25
26@_extend_docstrings
27class EmailPolicy(Policy):
28
29    """+
30    PROVISIONAL
31
32    The API extensions enabled by this policy are currently provisional.
33    Refer to the documentation for details.
34
35    This policy adds new header parsing and folding algorithms.  Instead of
36    simple strings, headers are custom objects with custom attributes
37    depending on the type of the field.  The folding algorithm fully
38    implements RFCs 2047 and 5322.
39
40    In addition to the settable attributes listed above that apply to
41    all Policies, this policy adds the following additional attributes:
42
43    utf8                -- if False (the default) message headers will be
44                           serialized as ASCII, using encoded words to encode
45                           any non-ASCII characters in the source strings.  If
46                           True, the message headers will be serialized using
47                           utf8 and will not contain encoded words (see RFC
48                           6532 for more on this serialization format).
49
50    refold_source       -- if the value for a header in the Message object
51                           came from the parsing of some source, this attribute
52                           indicates whether or not a generator should refold
53                           that value when transforming the message back into
54                           stream form.  The possible values are:
55
56                           none  -- all source values use original folding
57                           long  -- source values that have any line that is
58                                    longer than max_line_length will be
59                                    refolded
60                           all  -- all values are refolded.
61
62                           The default is 'long'.
63
64    header_factory      -- a callable that takes two arguments, 'name' and
65                           'value', where 'name' is a header field name and
66                           'value' is an unfolded header field value, and
67                           returns a string-like object that represents that
68                           header.  A default header_factory is provided that
69                           understands some of the RFC5322 header field types.
70                           (Currently address fields and date fields have
71                           special treatment, while all other fields are
72                           treated as unstructured.  This list will be
73                           completed before the extension is marked stable.)
74
75    content_manager     -- an object with at least two methods: get_content
76                           and set_content.  When the get_content or
77                           set_content method of a Message object is called,
78                           it calls the corresponding method of this object,
79                           passing it the message object as its first argument,
80                           and any arguments or keywords that were passed to
81                           it as additional arguments.  The default
82                           content_manager is
83                           :data:`~email.contentmanager.raw_data_manager`.
84
85    """
86
87    message_factory = EmailMessage
88    utf8 = False
89    refold_source = 'long'
90    header_factory = HeaderRegistry()
91    content_manager = raw_data_manager
92
93    def __init__(self, **kw):
94        # Ensure that each new instance gets a unique header factory
95        # (as opposed to clones, which share the factory).
96        if 'header_factory' not in kw:
97            object.__setattr__(self, 'header_factory', HeaderRegistry())
98        super().__init__(**kw)
99
100    def header_max_count(self, name):
101        """+
102        The implementation for this class returns the max_count attribute from
103        the specialized header class that would be used to construct a header
104        of type 'name'.
105        """
106        return self.header_factory[name].max_count
107
108    # The logic of the next three methods is chosen such that it is possible to
109    # switch a Message object between a Compat32 policy and a policy derived
110    # from this class and have the results stay consistent.  This allows a
111    # Message object constructed with this policy to be passed to a library
112    # that only handles Compat32 objects, or to receive such an object and
113    # convert it to use the newer style by just changing its policy.  It is
114    # also chosen because it postpones the relatively expensive full rfc5322
115    # parse until as late as possible when parsing from source, since in many
116    # applications only a few headers will actually be inspected.
117
118    def header_source_parse(self, sourcelines):
119        """+
120        The name is parsed as everything up to the ':' and returned unmodified.
121        The value is determined by stripping leading whitespace off the
122        remainder of the first line, joining all subsequent lines together, and
123        stripping any trailing carriage return or linefeed characters.  (This
124        is the same as Compat32).
125
126        """
127        name, value = sourcelines[0].split(':', 1)
128        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
129        return (name, value.rstrip('\r\n'))
130
131    def header_store_parse(self, name, value):
132        """+
133        The name is returned unchanged.  If the input value has a 'name'
134        attribute and it matches the name ignoring case, the value is returned
135        unchanged.  Otherwise the name and value are passed to header_factory
136        method, and the resulting custom header object is returned as the
137        value.  In this case a ValueError is raised if the input value contains
138        CR or LF characters.
139
140        """
141        if hasattr(value, 'name') and value.name.lower() == name.lower():
142            return (name, value)
143        if isinstance(value, str) and len(value.splitlines())>1:
144            # XXX this error message isn't quite right when we use splitlines
145            # (see issue 22233), but I'm not sure what should happen here.
146            raise ValueError("Header values may not contain linefeed "
147                             "or carriage return characters")
148        return (name, self.header_factory(name, value))
149
150    def header_fetch_parse(self, name, value):
151        """+
152        If the value has a 'name' attribute, it is returned to unmodified.
153        Otherwise the name and the value with any linesep characters removed
154        are passed to the header_factory method, and the resulting custom
155        header object is returned.  Any surrogateescaped bytes get turned
156        into the unicode unknown-character glyph.
157
158        """
159        if hasattr(value, 'name'):
160            return value
161        # We can't use splitlines here because it splits on more than \r and \n.
162        value = ''.join(linesep_splitter.split(value))
163        return self.header_factory(name, value)
164
165    def fold(self, name, value):
166        """+
167        Header folding is controlled by the refold_source policy setting.  A
168        value is considered to be a 'source value' if and only if it does not
169        have a 'name' attribute (having a 'name' attribute means it is a header
170        object of some sort).  If a source value needs to be refolded according
171        to the policy, it is converted into a custom header object by passing
172        the name and the value with any linesep characters removed to the
173        header_factory method.  Folding of a custom header object is done by
174        calling its fold method with the current policy.
175
176        Source values are split into lines using splitlines.  If the value is
177        not to be refolded, the lines are rejoined using the linesep from the
178        policy and returned.  The exception is lines containing non-ascii
179        binary data.  In that case the value is refolded regardless of the
180        refold_source setting, which causes the binary data to be CTE encoded
181        using the unknown-8bit charset.
182
183        """
184        return self._fold(name, value, refold_binary=True)
185
186    def fold_binary(self, name, value):
187        """+
188        The same as fold if cte_type is 7bit, except that the returned value is
189        bytes.
190
191        If cte_type is 8bit, non-ASCII binary data is converted back into
192        bytes.  Headers with binary data are not refolded, regardless of the
193        refold_header setting, since there is no way to know whether the binary
194        data consists of single byte characters or multibyte characters.
195
196        If utf8 is true, headers are encoded to utf8, otherwise to ascii with
197        non-ASCII unicode rendered as encoded words.
198
199        """
200        folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
201        charset = 'utf8' if self.utf8 else 'ascii'
202        return folded.encode(charset, 'surrogateescape')
203
204    def _fold(self, name, value, refold_binary=False):
205        if hasattr(value, 'name'):
206            return value.fold(policy=self)
207        maxlen = self.max_line_length if self.max_line_length else sys.maxsize
208        lines = value.splitlines()
209        refold = (self.refold_source == 'all' or
210                  self.refold_source == 'long' and
211                    (lines and len(lines[0])+len(name)+2 > maxlen or
212                     any(len(x) > maxlen for x in lines[1:])))
213        if refold or refold_binary and _has_surrogates(value):
214            return self.header_factory(name, ''.join(lines)).fold(policy=self)
215        return name + ': ' + self.linesep.join(lines) + self.linesep
216
217
218default = EmailPolicy()
219# Make the default policy use the class default header_factory
220del default.header_factory
221strict = default.clone(raise_on_defect=True)
222SMTP = default.clone(linesep='\r\n')
223HTTP = default.clone(linesep='\r\n', max_line_length=None)
224SMTPUTF8 = SMTP.clone(utf8=True)
225