1from __future__ import absolute_import 2 3import email.utils 4import mimetypes 5import re 6 7from .packages import six 8 9 10def guess_content_type(filename, default="application/octet-stream"): 11 """ 12 Guess the "Content-Type" of a file. 13 14 :param filename: 15 The filename to guess the "Content-Type" of using :mod:`mimetypes`. 16 :param default: 17 If no "Content-Type" can be guessed, default to `default`. 18 """ 19 if filename: 20 return mimetypes.guess_type(filename)[0] or default 21 return default 22 23 24def format_header_param_rfc2231(name, value): 25 """ 26 Helper function to format and quote a single header parameter using the 27 strategy defined in RFC 2231. 28 29 Particularly useful for header parameters which might contain 30 non-ASCII values, like file names. This follows 31 `RFC 2388 Section 4.4 <https://tools.ietf.org/html/rfc2388#section-4.4>`_. 32 33 :param name: 34 The name of the parameter, a string expected to be ASCII only. 35 :param value: 36 The value of the parameter, provided as ``bytes`` or `str``. 37 :ret: 38 An RFC-2231-formatted unicode string. 39 """ 40 if isinstance(value, six.binary_type): 41 value = value.decode("utf-8") 42 43 if not any(ch in value for ch in '"\\\r\n'): 44 result = u'%s="%s"' % (name, value) 45 try: 46 result.encode("ascii") 47 except (UnicodeEncodeError, UnicodeDecodeError): 48 pass 49 else: 50 return result 51 52 if six.PY2: # Python 2: 53 value = value.encode("utf-8") 54 55 # encode_rfc2231 accepts an encoded string and returns an ascii-encoded 56 # string in Python 2 but accepts and returns unicode strings in Python 3 57 value = email.utils.encode_rfc2231(value, "utf-8") 58 value = "%s*=%s" % (name, value) 59 60 if six.PY2: # Python 2: 61 value = value.decode("utf-8") 62 63 return value 64 65 66_HTML5_REPLACEMENTS = { 67 u"\u0022": u"%22", 68 # Replace "\" with "\\". 69 u"\u005C": u"\u005C\u005C", 70} 71 72# All control characters from 0x00 to 0x1F *except* 0x1B. 73_HTML5_REPLACEMENTS.update( 74 { 75 six.unichr(cc): u"%{:02X}".format(cc) 76 for cc in range(0x00, 0x1F + 1) 77 if cc not in (0x1B,) 78 } 79) 80 81 82def _replace_multiple(value, needles_and_replacements): 83 def replacer(match): 84 return needles_and_replacements[match.group(0)] 85 86 pattern = re.compile( 87 r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()]) 88 ) 89 90 result = pattern.sub(replacer, value) 91 92 return result 93 94 95def format_header_param_html5(name, value): 96 """ 97 Helper function to format and quote a single header parameter using the 98 HTML5 strategy. 99 100 Particularly useful for header parameters which might contain 101 non-ASCII values, like file names. This follows the `HTML5 Working Draft 102 Section 4.10.22.7`_ and matches the behavior of curl and modern browsers. 103 104 .. _HTML5 Working Draft Section 4.10.22.7: 105 https://w3c.github.io/html/sec-forms.html#multipart-form-data 106 107 :param name: 108 The name of the parameter, a string expected to be ASCII only. 109 :param value: 110 The value of the parameter, provided as ``bytes`` or `str``. 111 :ret: 112 A unicode string, stripped of troublesome characters. 113 """ 114 if isinstance(value, six.binary_type): 115 value = value.decode("utf-8") 116 117 value = _replace_multiple(value, _HTML5_REPLACEMENTS) 118 119 return u'%s="%s"' % (name, value) 120 121 122# For backwards-compatibility. 123format_header_param = format_header_param_html5 124 125 126class RequestField(object): 127 """ 128 A data container for request body parameters. 129 130 :param name: 131 The name of this request field. Must be unicode. 132 :param data: 133 The data/value body. 134 :param filename: 135 An optional filename of the request field. Must be unicode. 136 :param headers: 137 An optional dict-like object of headers to initially use for the field. 138 :param header_formatter: 139 An optional callable that is used to encode and format the headers. By 140 default, this is :func:`format_header_param_html5`. 141 """ 142 143 def __init__( 144 self, 145 name, 146 data, 147 filename=None, 148 headers=None, 149 header_formatter=format_header_param_html5, 150 ): 151 self._name = name 152 self._filename = filename 153 self.data = data 154 self.headers = {} 155 if headers: 156 self.headers = dict(headers) 157 self.header_formatter = header_formatter 158 159 @classmethod 160 def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5): 161 """ 162 A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. 163 164 Supports constructing :class:`~urllib3.fields.RequestField` from 165 parameter of key/value strings AND key/filetuple. A filetuple is a 166 (filename, data, MIME type) tuple where the MIME type is optional. 167 For example:: 168 169 'foo': 'bar', 170 'fakefile': ('foofile.txt', 'contents of foofile'), 171 'realfile': ('barfile.txt', open('realfile').read()), 172 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), 173 'nonamefile': 'contents of nonamefile field', 174 175 Field names and filenames must be unicode. 176 """ 177 if isinstance(value, tuple): 178 if len(value) == 3: 179 filename, data, content_type = value 180 else: 181 filename, data = value 182 content_type = guess_content_type(filename) 183 else: 184 filename = None 185 content_type = None 186 data = value 187 188 request_param = cls( 189 fieldname, data, filename=filename, header_formatter=header_formatter 190 ) 191 request_param.make_multipart(content_type=content_type) 192 193 return request_param 194 195 def _render_part(self, name, value): 196 """ 197 Overridable helper function to format a single header parameter. By 198 default, this calls ``self.header_formatter``. 199 200 :param name: 201 The name of the parameter, a string expected to be ASCII only. 202 :param value: 203 The value of the parameter, provided as a unicode string. 204 """ 205 206 return self.header_formatter(name, value) 207 208 def _render_parts(self, header_parts): 209 """ 210 Helper function to format and quote a single header. 211 212 Useful for single headers that are composed of multiple items. E.g., 213 'Content-Disposition' fields. 214 215 :param header_parts: 216 A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format 217 as `k1="v1"; k2="v2"; ...`. 218 """ 219 parts = [] 220 iterable = header_parts 221 if isinstance(header_parts, dict): 222 iterable = header_parts.items() 223 224 for name, value in iterable: 225 if value is not None: 226 parts.append(self._render_part(name, value)) 227 228 return u"; ".join(parts) 229 230 def render_headers(self): 231 """ 232 Renders the headers for this request field. 233 """ 234 lines = [] 235 236 sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"] 237 for sort_key in sort_keys: 238 if self.headers.get(sort_key, False): 239 lines.append(u"%s: %s" % (sort_key, self.headers[sort_key])) 240 241 for header_name, header_value in self.headers.items(): 242 if header_name not in sort_keys: 243 if header_value: 244 lines.append(u"%s: %s" % (header_name, header_value)) 245 246 lines.append(u"\r\n") 247 return u"\r\n".join(lines) 248 249 def make_multipart( 250 self, content_disposition=None, content_type=None, content_location=None 251 ): 252 """ 253 Makes this request field into a multipart request field. 254 255 This method overrides "Content-Disposition", "Content-Type" and 256 "Content-Location" headers to the request parameter. 257 258 :param content_type: 259 The 'Content-Type' of the request body. 260 :param content_location: 261 The 'Content-Location' of the request body. 262 263 """ 264 self.headers["Content-Disposition"] = content_disposition or u"form-data" 265 self.headers["Content-Disposition"] += u"; ".join( 266 [ 267 u"", 268 self._render_parts( 269 ((u"name", self._name), (u"filename", self._filename)) 270 ), 271 ] 272 ) 273 self.headers["Content-Type"] = content_type 274 self.headers["Content-Location"] = content_location 275