1# coding: utf-8
2# Modified Work: Copyright (c) 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
3# This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
4# Copyright 2018 Kenneth Reitz
5
6# -*- coding: utf-8 -*-
7
8"""
9requests.utils
10~~~~~~~~~~~~~~
11
12This module provides utility functions that are used within Requests
13that are also useful for external consumption.
14"""
15
16import codecs
17import contextlib
18import io
19import os
20import re
21import socket
22import struct
23import sys
24import tempfile
25import warnings
26import zipfile
27from collections import OrderedDict
28
29from .__version__ import __version__
30from . import certs
31# to_native_string is unused here, but imported here for backwards compatibility
32from ._internal_utils import to_native_string
33from .compat import parse_http_list as _parse_list_header
34from .compat import (
35    quote, urlparse, bytes, str, unquote, getproxies,
36    proxy_bypass, urlunparse, basestring, integer_types, is_py3,
37    proxy_bypass_environment, getproxies_environment, Mapping)
38from .cookies import cookiejar_from_dict
39from .structures import CaseInsensitiveDict
40from .exceptions import (
41    InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
42
43NETRC_FILES = ('.netrc', '_netrc')
44
45DEFAULT_CA_BUNDLE_PATH = certs.where()
46
47DEFAULT_PORTS = {'http': 80, 'https': 443}
48
49
50if sys.platform == 'win32':
51    # provide a proxy_bypass version on Windows without DNS lookups
52
53    def proxy_bypass_registry(host):
54        try:
55            if is_py3:
56                import winreg
57            else:
58                import _winreg as winreg
59        except ImportError:
60            return False
61
62        try:
63            internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
64                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
65            # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
66            proxyEnable = int(winreg.QueryValueEx(internetSettings,
67                                              'ProxyEnable')[0])
68            # ProxyOverride is almost always a string
69            proxyOverride = winreg.QueryValueEx(internetSettings,
70                                                'ProxyOverride')[0]
71        except OSError:
72            return False
73        if not proxyEnable or not proxyOverride:
74            return False
75
76        # make a check value list from the registry entry: replace the
77        # '<local>' string by the localhost entry and the corresponding
78        # canonical entry.
79        proxyOverride = proxyOverride.split(';')
80        # now check if we match one of the registry values.
81        for test in proxyOverride:
82            if test == '<local>':
83                if '.' not in host:
84                    return True
85            test = test.replace(".", r"\.")     # mask dots
86            test = test.replace("*", r".*")     # change glob sequence
87            test = test.replace("?", r".")      # change glob char
88            if re.match(test, host, re.I):
89                return True
90        return False
91
92    def proxy_bypass(host):  # noqa
93        """Return True, if the host should be bypassed.
94
95        Checks proxy settings gathered from the environment, if specified,
96        or the registry.
97        """
98        if getproxies_environment():
99            return proxy_bypass_environment(host)
100        else:
101            return proxy_bypass_registry(host)
102
103
104def dict_to_sequence(d):
105    """Returns an internal sequence dictionary update."""
106
107    if hasattr(d, 'items'):
108        d = d.items()
109
110    return d
111
112
113def super_len(o):
114    total_length = None
115    current_position = 0
116
117    if hasattr(o, '__len__'):
118        total_length = len(o)
119
120    elif hasattr(o, 'len'):
121        total_length = o.len
122
123    elif hasattr(o, 'fileno'):
124        try:
125            fileno = o.fileno()
126        except io.UnsupportedOperation:
127            pass
128        else:
129            total_length = os.fstat(fileno).st_size
130
131            # Having used fstat to determine the file length, we need to
132            # confirm that this file was opened up in binary mode.
133            if 'b' not in o.mode:
134                warnings.warn((
135                    "Requests has determined the content-length for this "
136                    "request using the binary size of the file: however, the "
137                    "file has been opened in text mode (i.e. without the 'b' "
138                    "flag in the mode). This may lead to an incorrect "
139                    "content-length. In Requests 3.0, support will be removed "
140                    "for files in text mode."),
141                    FileModeWarning
142                )
143
144    if hasattr(o, 'tell'):
145        try:
146            current_position = o.tell()
147        except (OSError, IOError):
148            # This can happen in some weird situations, such as when the file
149            # is actually a special file descriptor like stdin. In this
150            # instance, we don't know what the length is, so set it to zero and
151            # let requests chunk it instead.
152            if total_length is not None:
153                current_position = total_length
154        else:
155            if hasattr(o, 'seek') and total_length is None:
156                # StringIO and BytesIO have seek but no useable fileno
157                try:
158                    # seek to end of file
159                    o.seek(0, 2)
160                    total_length = o.tell()
161
162                    # seek back to current position to support
163                    # partially read file-like objects
164                    o.seek(current_position or 0)
165                except (OSError, IOError):
166                    total_length = 0
167
168    if total_length is None:
169        total_length = 0
170
171    return max(0, total_length - current_position)
172
173
174def get_netrc_auth(url, raise_errors=False):
175    """Returns the Requests tuple auth for a given url from netrc."""
176
177    netrc_file = os.environ.get('NETRC')
178    if netrc_file is not None:
179        netrc_locations = (netrc_file,)
180    else:
181        netrc_locations = ('~/{}'.format(f) for f in NETRC_FILES)
182
183    try:
184        from netrc import netrc, NetrcParseError
185
186        netrc_path = None
187
188        for f in netrc_locations:
189            try:
190                loc = os.path.expanduser(f)
191            except KeyError:
192                # os.path.expanduser can fail when $HOME is undefined and
193                # getpwuid fails. See https://bugs.python.org/issue20164 &
194                # https://github.com/psf/requests/issues/1846
195                return
196
197            if os.path.exists(loc):
198                netrc_path = loc
199                break
200
201        # Abort early if there isn't one.
202        if netrc_path is None:
203            return
204
205        ri = urlparse(url)
206
207        # Strip port numbers from netloc. This weird `if...encode`` dance is
208        # used for Python 3.2, which doesn't support unicode literals.
209        splitstr = b':'
210        if isinstance(url, str):
211            splitstr = splitstr.decode('ascii')
212        host = ri.netloc.split(splitstr)[0]
213
214        try:
215            _netrc = netrc(netrc_path).authenticators(host)
216            if _netrc:
217                # Return with login / password
218                login_i = (0 if _netrc[0] else 1)
219                return (_netrc[login_i], _netrc[2])
220        except (NetrcParseError, IOError):
221            # If there was a parsing error or a permissions issue reading the file,
222            # we'll just skip netrc auth unless explicitly asked to raise errors.
223            if raise_errors:
224                raise
225
226    # App Engine hackiness.
227    except (ImportError, AttributeError):
228        pass
229
230
231def guess_filename(obj):
232    """Tries to guess the filename of the given object."""
233    name = getattr(obj, 'name', None)
234    if (name and isinstance(name, basestring) and name[0] != '<' and
235            name[-1] != '>'):
236        return os.path.basename(name)
237
238
239def extract_zipped_paths(path):
240    """Replace nonexistent paths that look like they refer to a member of a zip
241    archive with the location of an extracted copy of the target, or else
242    just return the provided path unchanged.
243    """
244    if os.path.exists(path):
245        # this is already a valid path, no need to do anything further
246        return path
247
248    # find the first valid part of the provided path and treat that as a zip archive
249    # assume the rest of the path is the name of a member in the archive
250    archive, member = os.path.split(path)
251    while archive and not os.path.exists(archive):
252        archive, prefix = os.path.split(archive)
253        member = '/'.join([prefix, member])
254
255    if not zipfile.is_zipfile(archive):
256        return path
257
258    zip_file = zipfile.ZipFile(archive)
259    if member not in zip_file.namelist():
260        return path
261
262    # we have a valid zip archive and a valid member of that archive
263    tmp = tempfile.gettempdir()
264    extracted_path = os.path.join(tmp, *member.split('/'))
265    if not os.path.exists(extracted_path):
266        extracted_path = zip_file.extract(member, path=tmp)
267
268    return extracted_path
269
270
271def from_key_val_list(value):
272    """Take an object and test to see if it can be represented as a
273    dictionary. Unless it can not be represented as such, return an
274    OrderedDict, e.g.,
275
276    ::
277
278        >>> from_key_val_list([('key', 'val')])
279        OrderedDict([('key', 'val')])
280        >>> from_key_val_list('string')
281        Traceback (most recent call last):
282        ...
283        ValueError: cannot encode objects that are not 2-tuples
284        >>> from_key_val_list({'key': 'val'})
285        OrderedDict([('key', 'val')])
286
287    :rtype: OrderedDict
288    """
289    if value is None:
290        return None
291
292    if isinstance(value, (str, bytes, bool, int)):
293        raise ValueError('cannot encode objects that are not 2-tuples')
294
295    return OrderedDict(value)
296
297
298def to_key_val_list(value):
299    """Take an object and test to see if it can be represented as a
300    dictionary. If it can be, return a list of tuples, e.g.,
301
302    ::
303
304        >>> to_key_val_list([('key', 'val')])
305        [('key', 'val')]
306        >>> to_key_val_list({'key': 'val'})
307        [('key', 'val')]
308        >>> to_key_val_list('string')
309        Traceback (most recent call last):
310        ...
311        ValueError: cannot encode objects that are not 2-tuples
312
313    :rtype: list
314    """
315    if value is None:
316        return None
317
318    if isinstance(value, (str, bytes, bool, int)):
319        raise ValueError('cannot encode objects that are not 2-tuples')
320
321    if isinstance(value, Mapping):
322        value = value.items()
323
324    return list(value)
325
326
327# From mitsuhiko/werkzeug (used with permission).
328def parse_list_header(value):
329    """Parse lists as described by RFC 2068 Section 2.
330
331    In particular, parse comma-separated lists where the elements of
332    the list may include quoted-strings.  A quoted-string could
333    contain a comma.  A non-quoted string could have quotes in the
334    middle.  Quotes are removed automatically after parsing.
335
336    It basically works like :func:`parse_set_header` just that items
337    may appear multiple times and case sensitivity is preserved.
338
339    The return value is a standard :class:`list`:
340
341    >>> parse_list_header('token, "quoted value"')
342    ['token', 'quoted value']
343
344    To create a header from the :class:`list` again, use the
345    :func:`dump_header` function.
346
347    :param value: a string with a list header.
348    :return: :class:`list`
349    :rtype: list
350    """
351    result = []
352    for item in _parse_list_header(value):
353        if item[:1] == item[-1:] == '"':
354            item = unquote_header_value(item[1:-1])
355        result.append(item)
356    return result
357
358
359# From mitsuhiko/werkzeug (used with permission).
360def parse_dict_header(value):
361    """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
362    convert them into a python dict:
363
364    >>> d = parse_dict_header('foo="is a fish", bar="as well"')
365    >>> type(d) is dict
366    True
367    >>> sorted(d.items())
368    [('bar', 'as well'), ('foo', 'is a fish')]
369
370    If there is no value for a key it will be `None`:
371
372    >>> parse_dict_header('key_without_value')
373    {'key_without_value': None}
374
375    To create a header from the :class:`dict` again, use the
376    :func:`dump_header` function.
377
378    :param value: a string with a dict header.
379    :return: :class:`dict`
380    :rtype: dict
381    """
382    result = {}
383    for item in _parse_list_header(value):
384        if '=' not in item:
385            result[item] = None
386            continue
387        name, value = item.split('=', 1)
388        if value[:1] == value[-1:] == '"':
389            value = unquote_header_value(value[1:-1])
390        result[name] = value
391    return result
392
393
394# From mitsuhiko/werkzeug (used with permission).
395def unquote_header_value(value, is_filename=False):
396    r"""Unquotes a header value.  (Reversal of :func:`quote_header_value`).
397    This does not use the real unquoting but what browsers are actually
398    using for quoting.
399
400    :param value: the header value to unquote.
401    :rtype: str
402    """
403    if value and value[0] == value[-1] == '"':
404        # this is not the real unquoting, but fixing this so that the
405        # RFC is met will result in bugs with internet explorer and
406        # probably some other browsers as well.  IE for example is
407        # uploading files with "C:\foo\bar.txt" as filename
408        value = value[1:-1]
409
410        # if this is a filename and the starting characters look like
411        # a UNC path, then just return the value without quotes.  Using the
412        # replace sequence below on a UNC path has the effect of turning
413        # the leading double slash into a single slash and then
414        # _fix_ie_filename() doesn't work correctly.  See #458.
415        if not is_filename or value[:2] != '\\\\':
416            return value.replace('\\\\', '\\').replace('\\"', '"')
417    return value
418
419
420def dict_from_cookiejar(cj):
421    """Returns a key/value dictionary from a CookieJar.
422
423    :param cj: CookieJar object to extract cookies from.
424    :rtype: dict
425    """
426
427    cookie_dict = {}
428
429    for cookie in cj:
430        cookie_dict[cookie.name] = cookie.value
431
432    return cookie_dict
433
434
435def add_dict_to_cookiejar(cj, cookie_dict):
436    """Returns a CookieJar from a key/value dictionary.
437
438    :param cj: CookieJar to insert cookies into.
439    :param cookie_dict: Dict of key/values to insert into CookieJar.
440    :rtype: CookieJar
441    """
442
443    return cookiejar_from_dict(cookie_dict, cj)
444
445
446def get_encodings_from_content(content):
447    """Returns encodings from given content string.
448
449    :param content: bytestring to extract encodings from.
450    """
451    warnings.warn((
452        'In requests 3.0, get_encodings_from_content will be removed. For '
453        'more information, please see the discussion on issue #2266. (This'
454        ' warning should only appear once.)'),
455        DeprecationWarning)
456
457    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
458    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
459    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
460
461    return (charset_re.findall(content) +
462            pragma_re.findall(content) +
463            xml_re.findall(content))
464
465
466def _parse_content_type_header(header):
467    """Returns content type and parameters from given header
468
469    :param header: string
470    :return: tuple containing content type and dictionary of
471         parameters
472    """
473
474    tokens = header.split(';')
475    content_type, params = tokens[0].strip(), tokens[1:]
476    params_dict = {}
477    items_to_strip = "\"' "
478
479    for param in params:
480        param = param.strip()
481        if param:
482            key, value = param, True
483            index_of_equals = param.find("=")
484            if index_of_equals != -1:
485                key = param[:index_of_equals].strip(items_to_strip)
486                value = param[index_of_equals + 1:].strip(items_to_strip)
487            params_dict[key.lower()] = value
488    return content_type, params_dict
489
490
491def get_encoding_from_headers(headers):
492    """Returns encodings from given HTTP Header Dict.
493
494    :param headers: dictionary to extract encoding from.
495    :rtype: str
496    """
497
498    content_type = headers.get('content-type')
499
500    if not content_type:
501        return None
502
503    content_type, params = _parse_content_type_header(content_type)
504
505    if 'charset' in params:
506        return params['charset'].strip("'\"")
507
508    if 'text' in content_type:
509        return 'ISO-8859-1'
510
511    if 'application/json' in content_type:
512        # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset
513        return 'utf-8'
514
515
516def stream_decode_response_unicode(iterator, r):
517    """Stream decodes a iterator."""
518
519    if r.encoding is None:
520        for item in iterator:
521            yield item
522        return
523
524    decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
525    for chunk in iterator:
526        rv = decoder.decode(chunk)
527        if rv:
528            yield rv
529    rv = decoder.decode(b'', final=True)
530    if rv:
531        yield rv
532
533
534def iter_slices(string, slice_length):
535    """Iterate over slices of a string."""
536    pos = 0
537    if slice_length is None or slice_length <= 0:
538        slice_length = len(string)
539    while pos < len(string):
540        yield string[pos:pos + slice_length]
541        pos += slice_length
542
543
544def get_unicode_from_response(r):
545    """Returns the requested content back in unicode.
546
547    :param r: Response object to get unicode content from.
548
549    Tried:
550
551    1. charset from content-type
552    2. fall back and replace all unicode characters
553
554    :rtype: str
555    """
556    warnings.warn((
557        'In requests 3.0, get_unicode_from_response will be removed. For '
558        'more information, please see the discussion on issue #2266. (This'
559        ' warning should only appear once.)'),
560        DeprecationWarning)
561
562    tried_encodings = []
563
564    # Try charset from content-type
565    encoding = get_encoding_from_headers(r.headers)
566
567    if encoding:
568        try:
569            return str(r.content, encoding)
570        except UnicodeError:
571            tried_encodings.append(encoding)
572
573    # Fall back:
574    try:
575        return str(r.content, encoding, errors='replace')
576    except TypeError:
577        return r.content
578
579
580# The unreserved URI characters (RFC 3986)
581UNRESERVED_SET = frozenset(
582    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
583
584
585def unquote_unreserved(uri):
586    """Un-escape any percent-escape sequences in a URI that are unreserved
587    characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
588
589    :rtype: str
590    """
591    parts = uri.split('%')
592    for i in range(1, len(parts)):
593        h = parts[i][0:2]
594        if len(h) == 2 and h.isalnum():
595            try:
596                c = chr(int(h, 16))
597            except ValueError:
598                raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
599
600            if c in UNRESERVED_SET:
601                parts[i] = c + parts[i][2:]
602            else:
603                parts[i] = '%' + parts[i]
604        else:
605            parts[i] = '%' + parts[i]
606    return ''.join(parts)
607
608
609def requote_uri(uri):
610    """Re-quote the given URI.
611
612    This function passes the given URI through an unquote/quote cycle to
613    ensure that it is fully and consistently quoted.
614
615    :rtype: str
616    """
617    safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
618    safe_without_percent = "!#$&'()*+,/:;=?@[]~"
619    try:
620        # Unquote only the unreserved characters
621        # Then quote only illegal characters (do not quote reserved,
622        # unreserved, or '%')
623        return quote(unquote_unreserved(uri), safe=safe_with_percent)
624    except InvalidURL:
625        # We couldn't unquote the given URI, so let's try quoting it, but
626        # there may be unquoted '%'s in the URI. We need to make sure they're
627        # properly quoted so they do not cause issues elsewhere.
628        return quote(uri, safe=safe_without_percent)
629
630
631def address_in_network(ip, net):
632    """This function allows you to check if an IP belongs to a network subnet
633
634    Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
635             returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
636
637    :rtype: bool
638    """
639    ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
640    netaddr, bits = net.split('/')
641    netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
642    network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
643    return (ipaddr & netmask) == (network & netmask)
644
645
646def dotted_netmask(mask):
647    """Converts mask from /xx format to xxx.xxx.xxx.xxx
648
649    Example: if mask is 24 function returns 255.255.255.0
650
651    :rtype: str
652    """
653    bits = 0xffffffff ^ (1 << 32 - mask) - 1
654    return socket.inet_ntoa(struct.pack('>I', bits))
655
656
657def is_ipv4_address(string_ip):
658    """
659    :rtype: bool
660    """
661    try:
662        socket.inet_aton(string_ip)
663    except socket.error:
664        return False
665    return True
666
667
668def is_valid_cidr(string_network):
669    """
670    Very simple check of the cidr format in no_proxy variable.
671
672    :rtype: bool
673    """
674    if string_network.count('/') == 1:
675        try:
676            mask = int(string_network.split('/')[1])
677        except ValueError:
678            return False
679
680        if mask < 1 or mask > 32:
681            return False
682
683        try:
684            socket.inet_aton(string_network.split('/')[0])
685        except socket.error:
686            return False
687    else:
688        return False
689    return True
690
691
692@contextlib.contextmanager
693def set_environ(env_name, value):
694    """Set the environment variable 'env_name' to 'value'
695
696    Save previous value, yield, and then restore the previous value stored in
697    the environment variable 'env_name'.
698
699    If 'value' is None, do nothing"""
700    value_changed = value is not None
701    if value_changed:
702        old_value = os.environ.get(env_name)
703        os.environ[env_name] = value
704    try:
705        yield
706    finally:
707        if value_changed:
708            if old_value is None:
709                del os.environ[env_name]
710            else:
711                os.environ[env_name] = old_value
712
713
714def should_bypass_proxies(url, no_proxy):
715    """
716    Returns whether we should bypass proxies or not.
717
718    :rtype: bool
719    """
720    # Prioritize lowercase environment variables over uppercase
721    # to keep a consistent behaviour with other http projects (curl, wget).
722    get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
723
724    # First check whether no_proxy is defined. If it is, check that the URL
725    # we're getting isn't in the no_proxy list.
726    no_proxy_arg = no_proxy
727    if no_proxy is None:
728        no_proxy = get_proxy('no_proxy')
729    parsed = urlparse(url)
730
731    if parsed.hostname is None:
732        # URLs don't always have hostnames, e.g. file:/// urls.
733        return True
734
735    if no_proxy:
736        # We need to check whether we match here. We need to see if we match
737        # the end of the hostname, both with and without the port.
738        no_proxy = (
739            host for host in no_proxy.replace(' ', '').split(',') if host
740        )
741
742        if is_ipv4_address(parsed.hostname):
743            for proxy_ip in no_proxy:
744                if is_valid_cidr(proxy_ip):
745                    if address_in_network(parsed.hostname, proxy_ip):
746                        return True
747                elif parsed.hostname == proxy_ip:
748                    # If no_proxy ip was defined in plain IP notation instead of cidr notation &
749                    # matches the IP of the index
750                    return True
751        else:
752            host_with_port = parsed.hostname
753            if parsed.port:
754                host_with_port += ':{}'.format(parsed.port)
755
756            for host in no_proxy:
757                if parsed.hostname.endswith(host) or host_with_port.endswith(host):
758                    # The URL does match something in no_proxy, so we don't want
759                    # to apply the proxies on this URL.
760                    return True
761
762    with set_environ('no_proxy', no_proxy_arg):
763        # parsed.hostname can be `None` in cases such as a file URI.
764        try:
765            bypass = proxy_bypass(parsed.hostname)
766        except (TypeError, socket.gaierror):
767            bypass = False
768
769    if bypass:
770        return True
771
772    return False
773
774
775def get_environ_proxies(url, no_proxy=None):
776    """
777    Return a dict of environment proxies.
778
779    :rtype: dict
780    """
781    if should_bypass_proxies(url, no_proxy=no_proxy):
782        return {}
783    else:
784        return getproxies()
785
786
787def select_proxy(url, proxies):
788    """Select a proxy for the url, if applicable.
789
790    :param url: The url being for the request
791    :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
792    """
793    proxies = proxies or {}
794    urlparts = urlparse(url)
795    if urlparts.hostname is None:
796        return proxies.get(urlparts.scheme, proxies.get('all'))
797
798    proxy_keys = [
799        urlparts.scheme + '://' + urlparts.hostname,
800        urlparts.scheme,
801        'all://' + urlparts.hostname,
802        'all',
803    ]
804    proxy = None
805    for proxy_key in proxy_keys:
806        if proxy_key in proxies:
807            proxy = proxies[proxy_key]
808            break
809
810    return proxy
811
812
813def default_user_agent(name="python-requests"):
814    """
815    Return a string representing the default user agent.
816
817    :rtype: str
818    """
819    return '%s/%s' % (name, __version__)
820
821
822def default_headers():
823    """
824    :rtype: requests.structures.CaseInsensitiveDict
825    """
826    return CaseInsensitiveDict({
827        'User-Agent': default_user_agent(),
828        'Accept-Encoding': ', '.join(('gzip', 'deflate')),
829        'Accept': '*/*',
830        'Connection': 'keep-alive',
831    })
832
833
834def parse_header_links(value):
835    """Return a list of parsed link headers proxies.
836
837    i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
838
839    :rtype: list
840    """
841
842    links = []
843
844    replace_chars = ' \'"'
845
846    value = value.strip(replace_chars)
847    if not value:
848        return links
849
850    for val in re.split(', *<', value):
851        try:
852            url, params = val.split(';', 1)
853        except ValueError:
854            url, params = val, ''
855
856        link = {'url': url.strip('<> \'"')}
857
858        for param in params.split(';'):
859            try:
860                key, value = param.split('=')
861            except ValueError:
862                break
863
864            link[key.strip(replace_chars)] = value.strip(replace_chars)
865
866        links.append(link)
867
868    return links
869
870
871# Null bytes; no need to recreate these on each call to guess_json_utf
872_null = '\x00'.encode('ascii')  # encoding to ASCII for Python 3
873_null2 = _null * 2
874_null3 = _null * 3
875
876
877def guess_json_utf(data):
878    """
879    :rtype: str
880    """
881    # JSON always starts with two ASCII characters, so detection is as
882    # easy as counting the nulls and from their location and count
883    # determine the encoding. Also detect a BOM, if present.
884    sample = data[:4]
885    if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
886        return 'utf-32'     # BOM included
887    if sample[:3] == codecs.BOM_UTF8:
888        return 'utf-8-sig'  # BOM included, MS style (discouraged)
889    if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
890        return 'utf-16'     # BOM included
891    nullcount = sample.count(_null)
892    if nullcount == 0:
893        return 'utf-8'
894    if nullcount == 2:
895        if sample[::2] == _null2:   # 1st and 3rd are null
896            return 'utf-16-be'
897        if sample[1::2] == _null2:  # 2nd and 4th are null
898            return 'utf-16-le'
899        # Did not detect 2 valid UTF-16 ascii-range characters
900    if nullcount == 3:
901        if sample[:3] == _null3:
902            return 'utf-32-be'
903        if sample[1:] == _null3:
904            return 'utf-32-le'
905        # Did not detect a valid UTF-32 ascii-range character
906    return None
907
908
909def prepend_scheme_if_needed(url, new_scheme):
910    """Given a URL that may or may not have a scheme, prepend the given scheme.
911    Does not replace a present scheme with the one provided as an argument.
912
913    :rtype: str
914    """
915    scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
916
917    # urlparse is a finicky beast, and sometimes decides that there isn't a
918    # netloc present. Assume that it's being over-cautious, and switch netloc
919    # and path if urlparse decided there was no netloc.
920    if not netloc:
921        netloc, path = path, netloc
922
923    return urlunparse((scheme, netloc, path, params, query, fragment))
924
925
926def get_auth_from_url(url):
927    """Given a url with authentication components, extract them into a tuple of
928    username,password.
929
930    :rtype: (str,str)
931    """
932    parsed = urlparse(url)
933
934    try:
935        auth = (unquote(parsed.username), unquote(parsed.password))
936    except (AttributeError, TypeError):
937        auth = ('', '')
938
939    return auth
940
941
942# Moved outside of function to avoid recompile every call
943_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
944_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
945
946
947def check_header_validity(header):
948    """Verifies that header value is a string which doesn't contain
949    leading whitespace or return characters. This prevents unintended
950    header injection.
951
952    :param header: tuple, in the format (name, value).
953    """
954    name, value = header
955
956    if isinstance(value, bytes):
957        pat = _CLEAN_HEADER_REGEX_BYTE
958    else:
959        pat = _CLEAN_HEADER_REGEX_STR
960    try:
961        if not pat.match(value):
962            raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
963    except TypeError:
964        raise InvalidHeader("Value for header {%s: %s} must be of type str or "
965                            "bytes, not %s" % (name, value, type(value)))
966
967
968def urldefragauth(url):
969    """
970    Given a url remove the fragment and the authentication part.
971
972    :rtype: str
973    """
974    scheme, netloc, path, params, query, fragment = urlparse(url)
975
976    # see func:`prepend_scheme_if_needed`
977    if not netloc:
978        netloc, path = path, netloc
979
980    netloc = netloc.rsplit('@', 1)[-1]
981
982    return urlunparse((scheme, netloc, path, params, query, ''))
983
984
985def rewind_body(prepared_request):
986    """Move file pointer back to its recorded starting position
987    so it can be read again on redirect.
988    """
989    body_seek = getattr(prepared_request.body, 'seek', None)
990    if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
991        try:
992            body_seek(prepared_request._body_position)
993        except (IOError, OSError):
994            raise UnrewindableBodyError("An error occurred when rewinding request "
995                                        "body for redirect.")
996    else:
997        raise UnrewindableBodyError("Unable to rewind request body for redirect.")
998