1from __future__ import absolute_import
2
3import email.utils
4import mimetypes
5import re
6
7from .packages import six
8
9
10def guess_content_type(filename, default="application/octet-stream"):
11    """
12    Guess the "Content-Type" of a file.
13
14    :param filename:
15        The filename to guess the "Content-Type" of using :mod:`mimetypes`.
16    :param default:
17        If no "Content-Type" can be guessed, default to `default`.
18    """
19    if filename:
20        return mimetypes.guess_type(filename)[0] or default
21    return default
22
23
24def format_header_param_rfc2231(name, value):
25    """
26    Helper function to format and quote a single header parameter using the
27    strategy defined in RFC 2231.
28
29    Particularly useful for header parameters which might contain
30    non-ASCII values, like file names. This follows
31    `RFC 2388 Section 4.4 <https://tools.ietf.org/html/rfc2388#section-4.4>`_.
32
33    :param name:
34        The name of the parameter, a string expected to be ASCII only.
35    :param value:
36        The value of the parameter, provided as ``bytes`` or `str``.
37    :ret:
38        An RFC-2231-formatted unicode string.
39    """
40    if isinstance(value, six.binary_type):
41        value = value.decode("utf-8")
42
43    if not any(ch in value for ch in '"\\\r\n'):
44        result = u'%s="%s"' % (name, value)
45        try:
46            result.encode("ascii")
47        except (UnicodeEncodeError, UnicodeDecodeError):
48            pass
49        else:
50            return result
51
52    if six.PY2:  # Python 2:
53        value = value.encode("utf-8")
54
55    # encode_rfc2231 accepts an encoded string and returns an ascii-encoded
56    # string in Python 2 but accepts and returns unicode strings in Python 3
57    value = email.utils.encode_rfc2231(value, "utf-8")
58    value = "%s*=%s" % (name, value)
59
60    if six.PY2:  # Python 2:
61        value = value.decode("utf-8")
62
63    return value
64
65
66_HTML5_REPLACEMENTS = {
67    u"\u0022": u"%22",
68    # Replace "\" with "\\".
69    u"\u005C": u"\u005C\u005C",
70}
71
72# All control characters from 0x00 to 0x1F *except* 0x1B.
73_HTML5_REPLACEMENTS.update(
74    {
75        six.unichr(cc): u"%{:02X}".format(cc)
76        for cc in range(0x00, 0x1F + 1)
77        if cc not in (0x1B,)
78    }
79)
80
81
82def _replace_multiple(value, needles_and_replacements):
83    def replacer(match):
84        return needles_and_replacements[match.group(0)]
85
86    pattern = re.compile(
87        r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()])
88    )
89
90    result = pattern.sub(replacer, value)
91
92    return result
93
94
95def format_header_param_html5(name, value):
96    """
97    Helper function to format and quote a single header parameter using the
98    HTML5 strategy.
99
100    Particularly useful for header parameters which might contain
101    non-ASCII values, like file names. This follows the `HTML5 Working Draft
102    Section 4.10.22.7`_ and matches the behavior of curl and modern browsers.
103
104    .. _HTML5 Working Draft Section 4.10.22.7:
105        https://w3c.github.io/html/sec-forms.html#multipart-form-data
106
107    :param name:
108        The name of the parameter, a string expected to be ASCII only.
109    :param value:
110        The value of the parameter, provided as ``bytes`` or `str``.
111    :ret:
112        A unicode string, stripped of troublesome characters.
113    """
114    if isinstance(value, six.binary_type):
115        value = value.decode("utf-8")
116
117    value = _replace_multiple(value, _HTML5_REPLACEMENTS)
118
119    return u'%s="%s"' % (name, value)
120
121
122# For backwards-compatibility.
123format_header_param = format_header_param_html5
124
125
126class RequestField(object):
127    """
128    A data container for request body parameters.
129
130    :param name:
131        The name of this request field. Must be unicode.
132    :param data:
133        The data/value body.
134    :param filename:
135        An optional filename of the request field. Must be unicode.
136    :param headers:
137        An optional dict-like object of headers to initially use for the field.
138    :param header_formatter:
139        An optional callable that is used to encode and format the headers. By
140        default, this is :func:`format_header_param_html5`.
141    """
142
143    def __init__(
144        self,
145        name,
146        data,
147        filename=None,
148        headers=None,
149        header_formatter=format_header_param_html5,
150    ):
151        self._name = name
152        self._filename = filename
153        self.data = data
154        self.headers = {}
155        if headers:
156            self.headers = dict(headers)
157        self.header_formatter = header_formatter
158
159    @classmethod
160    def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5):
161        """
162        A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
163
164        Supports constructing :class:`~urllib3.fields.RequestField` from
165        parameter of key/value strings AND key/filetuple. A filetuple is a
166        (filename, data, MIME type) tuple where the MIME type is optional.
167        For example::
168
169            'foo': 'bar',
170            'fakefile': ('foofile.txt', 'contents of foofile'),
171            'realfile': ('barfile.txt', open('realfile').read()),
172            'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
173            'nonamefile': 'contents of nonamefile field',
174
175        Field names and filenames must be unicode.
176        """
177        if isinstance(value, tuple):
178            if len(value) == 3:
179                filename, data, content_type = value
180            else:
181                filename, data = value
182                content_type = guess_content_type(filename)
183        else:
184            filename = None
185            content_type = None
186            data = value
187
188        request_param = cls(
189            fieldname, data, filename=filename, header_formatter=header_formatter
190        )
191        request_param.make_multipart(content_type=content_type)
192
193        return request_param
194
195    def _render_part(self, name, value):
196        """
197        Overridable helper function to format a single header parameter. By
198        default, this calls ``self.header_formatter``.
199
200        :param name:
201            The name of the parameter, a string expected to be ASCII only.
202        :param value:
203            The value of the parameter, provided as a unicode string.
204        """
205
206        return self.header_formatter(name, value)
207
208    def _render_parts(self, header_parts):
209        """
210        Helper function to format and quote a single header.
211
212        Useful for single headers that are composed of multiple items. E.g.,
213        'Content-Disposition' fields.
214
215        :param header_parts:
216            A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
217            as `k1="v1"; k2="v2"; ...`.
218        """
219        parts = []
220        iterable = header_parts
221        if isinstance(header_parts, dict):
222            iterable = header_parts.items()
223
224        for name, value in iterable:
225            if value is not None:
226                parts.append(self._render_part(name, value))
227
228        return u"; ".join(parts)
229
230    def render_headers(self):
231        """
232        Renders the headers for this request field.
233        """
234        lines = []
235
236        sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"]
237        for sort_key in sort_keys:
238            if self.headers.get(sort_key, False):
239                lines.append(u"%s: %s" % (sort_key, self.headers[sort_key]))
240
241        for header_name, header_value in self.headers.items():
242            if header_name not in sort_keys:
243                if header_value:
244                    lines.append(u"%s: %s" % (header_name, header_value))
245
246        lines.append(u"\r\n")
247        return u"\r\n".join(lines)
248
249    def make_multipart(
250        self, content_disposition=None, content_type=None, content_location=None
251    ):
252        """
253        Makes this request field into a multipart request field.
254
255        This method overrides "Content-Disposition", "Content-Type" and
256        "Content-Location" headers to the request parameter.
257
258        :param content_type:
259            The 'Content-Type' of the request body.
260        :param content_location:
261            The 'Content-Location' of the request body.
262
263        """
264        self.headers["Content-Disposition"] = content_disposition or u"form-data"
265        self.headers["Content-Disposition"] += u"; ".join(
266            [
267                u"",
268                self._render_parts(
269                    ((u"name", self._name), (u"filename", self._filename))
270                ),
271            ]
272        )
273        self.headers["Content-Type"] = content_type
274        self.headers["Content-Location"] = content_location
275