1# -*- coding: utf-8 -*-
2
3"""
4requests.utils
5~~~~~~~~~~~~~~
6
7This module provides utility functions that are used within Requests
8that are also useful for external consumption.
9"""
10
11import codecs
12import contextlib
13import io
14import os
15import re
16import socket
17import struct
18import sys
19import tempfile
20import warnings
21import zipfile
22
23from .__version__ import __version__
24from . import certs
25# to_native_string is unused here, but imported here for backwards compatibility
26from ._internal_utils import to_native_string
27from .compat import parse_http_list as _parse_list_header
28from .compat import (
29    quote, urlparse, bytes, str, OrderedDict, unquote, getproxies,
30    proxy_bypass, urlunparse, basestring, integer_types, is_py3,
31    proxy_bypass_environment, getproxies_environment, Mapping)
32from .cookies import cookiejar_from_dict
33from .structures import CaseInsensitiveDict
34from .exceptions import (
35    InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
36
37NETRC_FILES = ('.netrc', '_netrc')
38
39DEFAULT_CA_BUNDLE_PATH = certs.where()
40
41DEFAULT_PORTS = {'http': 80, 'https': 443}
42
43
44if sys.platform == 'win32':
45    # provide a proxy_bypass version on Windows without DNS lookups
46
47    def proxy_bypass_registry(host):
48        try:
49            if is_py3:
50                import winreg
51            else:
52                import _winreg as winreg
53        except ImportError:
54            return False
55
56        try:
57            internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
58                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
59            # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
60            proxyEnable = int(winreg.QueryValueEx(internetSettings,
61                                              'ProxyEnable')[0])
62            # ProxyOverride is almost always a string
63            proxyOverride = winreg.QueryValueEx(internetSettings,
64                                                'ProxyOverride')[0]
65        except OSError:
66            return False
67        if not proxyEnable or not proxyOverride:
68            return False
69
70        # make a check value list from the registry entry: replace the
71        # '<local>' string by the localhost entry and the corresponding
72        # canonical entry.
73        proxyOverride = proxyOverride.split(';')
74        # now check if we match one of the registry values.
75        for test in proxyOverride:
76            if test == '<local>':
77                if '.' not in host:
78                    return True
79            test = test.replace(".", r"\.")     # mask dots
80            test = test.replace("*", r".*")     # change glob sequence
81            test = test.replace("?", r".")      # change glob char
82            if re.match(test, host, re.I):
83                return True
84        return False
85
86    def proxy_bypass(host):  # noqa
87        """Return True, if the host should be bypassed.
88
89        Checks proxy settings gathered from the environment, if specified,
90        or the registry.
91        """
92        if getproxies_environment():
93            return proxy_bypass_environment(host)
94        else:
95            return proxy_bypass_registry(host)
96
97
98def dict_to_sequence(d):
99    """Returns an internal sequence dictionary update."""
100
101    if hasattr(d, 'items'):
102        d = d.items()
103
104    return d
105
106
107def super_len(o):
108    total_length = None
109    current_position = 0
110
111    if hasattr(o, '__len__'):
112        total_length = len(o)
113
114    elif hasattr(o, 'len'):
115        total_length = o.len
116
117    elif hasattr(o, 'fileno'):
118        try:
119            fileno = o.fileno()
120        except io.UnsupportedOperation:
121            pass
122        else:
123            total_length = os.fstat(fileno).st_size
124
125            # Having used fstat to determine the file length, we need to
126            # confirm that this file was opened up in binary mode.
127            if 'b' not in o.mode:
128                warnings.warn((
129                    "Requests has determined the content-length for this "
130                    "request using the binary size of the file: however, the "
131                    "file has been opened in text mode (i.e. without the 'b' "
132                    "flag in the mode). This may lead to an incorrect "
133                    "content-length. In Requests 3.0, support will be removed "
134                    "for files in text mode."),
135                    FileModeWarning
136                )
137
138    if hasattr(o, 'tell'):
139        try:
140            current_position = o.tell()
141        except (OSError, IOError):
142            # This can happen in some weird situations, such as when the file
143            # is actually a special file descriptor like stdin. In this
144            # instance, we don't know what the length is, so set it to zero and
145            # let requests chunk it instead.
146            if total_length is not None:
147                current_position = total_length
148        else:
149            if hasattr(o, 'seek') and total_length is None:
150                # StringIO and BytesIO have seek but no useable fileno
151                try:
152                    # seek to end of file
153                    o.seek(0, 2)
154                    total_length = o.tell()
155
156                    # seek back to current position to support
157                    # partially read file-like objects
158                    o.seek(current_position or 0)
159                except (OSError, IOError):
160                    total_length = 0
161
162    if total_length is None:
163        total_length = 0
164
165    return max(0, total_length - current_position)
166
167
168def get_netrc_auth(url, raise_errors=False):
169    """Returns the Requests tuple auth for a given url from netrc."""
170
171    try:
172        from netrc import netrc, NetrcParseError
173
174        netrc_path = None
175
176        for f in NETRC_FILES:
177            try:
178                loc = os.path.expanduser('~/{}'.format(f))
179            except KeyError:
180                # os.path.expanduser can fail when $HOME is undefined and
181                # getpwuid fails. See https://bugs.python.org/issue20164 &
182                # https://github.com/requests/requests/issues/1846
183                return
184
185            if os.path.exists(loc):
186                netrc_path = loc
187                break
188
189        # Abort early if there isn't one.
190        if netrc_path is None:
191            return
192
193        ri = urlparse(url)
194
195        # Strip port numbers from netloc. This weird `if...encode`` dance is
196        # used for Python 3.2, which doesn't support unicode literals.
197        splitstr = b':'
198        if isinstance(url, str):
199            splitstr = splitstr.decode('ascii')
200        host = ri.netloc.split(splitstr)[0]
201
202        try:
203            _netrc = netrc(netrc_path).authenticators(host)
204            if _netrc:
205                # Return with login / password
206                login_i = (0 if _netrc[0] else 1)
207                return (_netrc[login_i], _netrc[2])
208        except (NetrcParseError, IOError):
209            # If there was a parsing error or a permissions issue reading the file,
210            # we'll just skip netrc auth unless explicitly asked to raise errors.
211            if raise_errors:
212                raise
213
214    # AppEngine hackiness.
215    except (ImportError, AttributeError):
216        pass
217
218
219def guess_filename(obj):
220    """Tries to guess the filename of the given object."""
221    name = getattr(obj, 'name', None)
222    if (name and isinstance(name, basestring) and name[0] != '<' and
223            name[-1] != '>'):
224        return os.path.basename(name)
225
226
227def extract_zipped_paths(path):
228    """Replace nonexistent paths that look like they refer to a member of a zip
229    archive with the location of an extracted copy of the target, or else
230    just return the provided path unchanged.
231    """
232    if os.path.exists(path):
233        # this is already a valid path, no need to do anything further
234        return path
235
236    # find the first valid part of the provided path and treat that as a zip archive
237    # assume the rest of the path is the name of a member in the archive
238    archive, member = os.path.split(path)
239    while archive and not os.path.exists(archive):
240        archive, prefix = os.path.split(archive)
241        member = '/'.join([prefix, member])
242
243    if not zipfile.is_zipfile(archive):
244        return path
245
246    zip_file = zipfile.ZipFile(archive)
247    if member not in zip_file.namelist():
248        return path
249
250    # we have a valid zip archive and a valid member of that archive
251    tmp = tempfile.gettempdir()
252    extracted_path = os.path.join(tmp, *member.split('/'))
253    if not os.path.exists(extracted_path):
254        extracted_path = zip_file.extract(member, path=tmp)
255
256    return extracted_path
257
258
259def from_key_val_list(value):
260    """Take an object and test to see if it can be represented as a
261    dictionary. Unless it can not be represented as such, return an
262    OrderedDict, e.g.,
263
264    ::
265
266        >>> from_key_val_list([('key', 'val')])
267        OrderedDict([('key', 'val')])
268        >>> from_key_val_list('string')
269        ValueError: cannot encode objects that are not 2-tuples
270        >>> from_key_val_list({'key': 'val'})
271        OrderedDict([('key', 'val')])
272
273    :rtype: OrderedDict
274    """
275    if value is None:
276        return None
277
278    if isinstance(value, (str, bytes, bool, int)):
279        raise ValueError('cannot encode objects that are not 2-tuples')
280
281    return OrderedDict(value)
282
283
284def to_key_val_list(value):
285    """Take an object and test to see if it can be represented as a
286    dictionary. If it can be, return a list of tuples, e.g.,
287
288    ::
289
290        >>> to_key_val_list([('key', 'val')])
291        [('key', 'val')]
292        >>> to_key_val_list({'key': 'val'})
293        [('key', 'val')]
294        >>> to_key_val_list('string')
295        ValueError: cannot encode objects that are not 2-tuples.
296
297    :rtype: list
298    """
299    if value is None:
300        return None
301
302    if isinstance(value, (str, bytes, bool, int)):
303        raise ValueError('cannot encode objects that are not 2-tuples')
304
305    if isinstance(value, Mapping):
306        value = value.items()
307
308    return list(value)
309
310
311# From mitsuhiko/werkzeug (used with permission).
312def parse_list_header(value):
313    """Parse lists as described by RFC 2068 Section 2.
314
315    In particular, parse comma-separated lists where the elements of
316    the list may include quoted-strings.  A quoted-string could
317    contain a comma.  A non-quoted string could have quotes in the
318    middle.  Quotes are removed automatically after parsing.
319
320    It basically works like :func:`parse_set_header` just that items
321    may appear multiple times and case sensitivity is preserved.
322
323    The return value is a standard :class:`list`:
324
325    >>> parse_list_header('token, "quoted value"')
326    ['token', 'quoted value']
327
328    To create a header from the :class:`list` again, use the
329    :func:`dump_header` function.
330
331    :param value: a string with a list header.
332    :return: :class:`list`
333    :rtype: list
334    """
335    result = []
336    for item in _parse_list_header(value):
337        if item[:1] == item[-1:] == '"':
338            item = unquote_header_value(item[1:-1])
339        result.append(item)
340    return result
341
342
343# From mitsuhiko/werkzeug (used with permission).
344def parse_dict_header(value):
345    """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
346    convert them into a python dict:
347
348    >>> d = parse_dict_header('foo="is a fish", bar="as well"')
349    >>> type(d) is dict
350    True
351    >>> sorted(d.items())
352    [('bar', 'as well'), ('foo', 'is a fish')]
353
354    If there is no value for a key it will be `None`:
355
356    >>> parse_dict_header('key_without_value')
357    {'key_without_value': None}
358
359    To create a header from the :class:`dict` again, use the
360    :func:`dump_header` function.
361
362    :param value: a string with a dict header.
363    :return: :class:`dict`
364    :rtype: dict
365    """
366    result = {}
367    for item in _parse_list_header(value):
368        if '=' not in item:
369            result[item] = None
370            continue
371        name, value = item.split('=', 1)
372        if value[:1] == value[-1:] == '"':
373            value = unquote_header_value(value[1:-1])
374        result[name] = value
375    return result
376
377
378# From mitsuhiko/werkzeug (used with permission).
379def unquote_header_value(value, is_filename=False):
380    r"""Unquotes a header value.  (Reversal of :func:`quote_header_value`).
381    This does not use the real unquoting but what browsers are actually
382    using for quoting.
383
384    :param value: the header value to unquote.
385    :rtype: str
386    """
387    if value and value[0] == value[-1] == '"':
388        # this is not the real unquoting, but fixing this so that the
389        # RFC is met will result in bugs with internet explorer and
390        # probably some other browsers as well.  IE for example is
391        # uploading files with "C:\foo\bar.txt" as filename
392        value = value[1:-1]
393
394        # if this is a filename and the starting characters look like
395        # a UNC path, then just return the value without quotes.  Using the
396        # replace sequence below on a UNC path has the effect of turning
397        # the leading double slash into a single slash and then
398        # _fix_ie_filename() doesn't work correctly.  See #458.
399        if not is_filename or value[:2] != '\\\\':
400            return value.replace('\\\\', '\\').replace('\\"', '"')
401    return value
402
403
404def dict_from_cookiejar(cj):
405    """Returns a key/value dictionary from a CookieJar.
406
407    :param cj: CookieJar object to extract cookies from.
408    :rtype: dict
409    """
410
411    cookie_dict = {}
412
413    for cookie in cj:
414        cookie_dict[cookie.name] = cookie.value
415
416    return cookie_dict
417
418
419def add_dict_to_cookiejar(cj, cookie_dict):
420    """Returns a CookieJar from a key/value dictionary.
421
422    :param cj: CookieJar to insert cookies into.
423    :param cookie_dict: Dict of key/values to insert into CookieJar.
424    :rtype: CookieJar
425    """
426
427    return cookiejar_from_dict(cookie_dict, cj)
428
429
430def get_encodings_from_content(content):
431    """Returns encodings from given content string.
432
433    :param content: bytestring to extract encodings from.
434    """
435    warnings.warn((
436        'In requests 3.0, get_encodings_from_content will be removed. For '
437        'more information, please see the discussion on issue #2266. (This'
438        ' warning should only appear once.)'),
439        DeprecationWarning)
440
441    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
442    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
443    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
444
445    return (charset_re.findall(content) +
446            pragma_re.findall(content) +
447            xml_re.findall(content))
448
449
450def _parse_content_type_header(header):
451    """Returns content type and parameters from given header
452
453    :param header: string
454    :return: tuple containing content type and dictionary of
455         parameters
456    """
457
458    tokens = header.split(';')
459    content_type, params = tokens[0].strip(), tokens[1:]
460    params_dict = {}
461    items_to_strip = "\"' "
462
463    for param in params:
464        param = param.strip()
465        if param:
466            key, value = param, True
467            index_of_equals = param.find("=")
468            if index_of_equals != -1:
469                key = param[:index_of_equals].strip(items_to_strip)
470                value = param[index_of_equals + 1:].strip(items_to_strip)
471            params_dict[key.lower()] = value
472    return content_type, params_dict
473
474
475def get_encoding_from_headers(headers):
476    """Returns encodings from given HTTP Header Dict.
477
478    :param headers: dictionary to extract encoding from.
479    :rtype: str
480    """
481
482    content_type = headers.get('content-type')
483
484    if not content_type:
485        return None
486
487    content_type, params = _parse_content_type_header(content_type)
488
489    if 'charset' in params:
490        return params['charset'].strip("'\"")
491
492    if 'text' in content_type:
493        return 'ISO-8859-1'
494
495
496def stream_decode_response_unicode(iterator, r):
497    """Stream decodes a iterator."""
498
499    if r.encoding is None:
500        for item in iterator:
501            yield item
502        return
503
504    decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
505    for chunk in iterator:
506        rv = decoder.decode(chunk)
507        if rv:
508            yield rv
509    rv = decoder.decode(b'', final=True)
510    if rv:
511        yield rv
512
513
514def iter_slices(string, slice_length):
515    """Iterate over slices of a string."""
516    pos = 0
517    if slice_length is None or slice_length <= 0:
518        slice_length = len(string)
519    while pos < len(string):
520        yield string[pos:pos + slice_length]
521        pos += slice_length
522
523
524def get_unicode_from_response(r):
525    """Returns the requested content back in unicode.
526
527    :param r: Response object to get unicode content from.
528
529    Tried:
530
531    1. charset from content-type
532    2. fall back and replace all unicode characters
533
534    :rtype: str
535    """
536    warnings.warn((
537        'In requests 3.0, get_unicode_from_response will be removed. For '
538        'more information, please see the discussion on issue #2266. (This'
539        ' warning should only appear once.)'),
540        DeprecationWarning)
541
542    tried_encodings = []
543
544    # Try charset from content-type
545    encoding = get_encoding_from_headers(r.headers)
546
547    if encoding:
548        try:
549            return str(r.content, encoding)
550        except UnicodeError:
551            tried_encodings.append(encoding)
552
553    # Fall back:
554    try:
555        return str(r.content, encoding, errors='replace')
556    except TypeError:
557        return r.content
558
559
560# The unreserved URI characters (RFC 3986)
561UNRESERVED_SET = frozenset(
562    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
563
564
565def unquote_unreserved(uri):
566    """Un-escape any percent-escape sequences in a URI that are unreserved
567    characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
568
569    :rtype: str
570    """
571    parts = uri.split('%')
572    for i in range(1, len(parts)):
573        h = parts[i][0:2]
574        if len(h) == 2 and h.isalnum():
575            try:
576                c = chr(int(h, 16))
577            except ValueError:
578                raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
579
580            if c in UNRESERVED_SET:
581                parts[i] = c + parts[i][2:]
582            else:
583                parts[i] = '%' + parts[i]
584        else:
585            parts[i] = '%' + parts[i]
586    return ''.join(parts)
587
588
589def requote_uri(uri):
590    """Re-quote the given URI.
591
592    This function passes the given URI through an unquote/quote cycle to
593    ensure that it is fully and consistently quoted.
594
595    :rtype: str
596    """
597    safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
598    safe_without_percent = "!#$&'()*+,/:;=?@[]~"
599    try:
600        # Unquote only the unreserved characters
601        # Then quote only illegal characters (do not quote reserved,
602        # unreserved, or '%')
603        return quote(unquote_unreserved(uri), safe=safe_with_percent)
604    except InvalidURL:
605        # We couldn't unquote the given URI, so let's try quoting it, but
606        # there may be unquoted '%'s in the URI. We need to make sure they're
607        # properly quoted so they do not cause issues elsewhere.
608        return quote(uri, safe=safe_without_percent)
609
610
611def address_in_network(ip, net):
612    """This function allows you to check if an IP belongs to a network subnet
613
614    Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
615             returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
616
617    :rtype: bool
618    """
619    ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
620    netaddr, bits = net.split('/')
621    netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
622    network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
623    return (ipaddr & netmask) == (network & netmask)
624
625
626def dotted_netmask(mask):
627    """Converts mask from /xx format to xxx.xxx.xxx.xxx
628
629    Example: if mask is 24 function returns 255.255.255.0
630
631    :rtype: str
632    """
633    bits = 0xffffffff ^ (1 << 32 - mask) - 1
634    return socket.inet_ntoa(struct.pack('>I', bits))
635
636
637def is_ipv4_address(string_ip):
638    """
639    :rtype: bool
640    """
641    try:
642        socket.inet_aton(string_ip)
643    except socket.error:
644        return False
645    return True
646
647
648def is_valid_cidr(string_network):
649    """
650    Very simple check of the cidr format in no_proxy variable.
651
652    :rtype: bool
653    """
654    if string_network.count('/') == 1:
655        try:
656            mask = int(string_network.split('/')[1])
657        except ValueError:
658            return False
659
660        if mask < 1 or mask > 32:
661            return False
662
663        try:
664            socket.inet_aton(string_network.split('/')[0])
665        except socket.error:
666            return False
667    else:
668        return False
669    return True
670
671
672@contextlib.contextmanager
673def set_environ(env_name, value):
674    """Set the environment variable 'env_name' to 'value'
675
676    Save previous value, yield, and then restore the previous value stored in
677    the environment variable 'env_name'.
678
679    If 'value' is None, do nothing"""
680    value_changed = value is not None
681    if value_changed:
682        old_value = os.environ.get(env_name)
683        os.environ[env_name] = value
684    try:
685        yield
686    finally:
687        if value_changed:
688            if old_value is None:
689                del os.environ[env_name]
690            else:
691                os.environ[env_name] = old_value
692
693
694def should_bypass_proxies(url, no_proxy):
695    """
696    Returns whether we should bypass proxies or not.
697
698    :rtype: bool
699    """
700    # Prioritize lowercase environment variables over uppercase
701    # to keep a consistent behaviour with other http projects (curl, wget).
702    get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
703
704    # First check whether no_proxy is defined. If it is, check that the URL
705    # we're getting isn't in the no_proxy list.
706    no_proxy_arg = no_proxy
707    if no_proxy is None:
708        no_proxy = get_proxy('no_proxy')
709    parsed = urlparse(url)
710
711    if parsed.hostname is None:
712        # URLs don't always have hostnames, e.g. file:/// urls.
713        return True
714
715    if no_proxy:
716        # We need to check whether we match here. We need to see if we match
717        # the end of the hostname, both with and without the port.
718        no_proxy = (
719            host for host in no_proxy.replace(' ', '').split(',') if host
720        )
721
722        if is_ipv4_address(parsed.hostname):
723            for proxy_ip in no_proxy:
724                if is_valid_cidr(proxy_ip):
725                    if address_in_network(parsed.hostname, proxy_ip):
726                        return True
727                elif parsed.hostname == proxy_ip:
728                    # If no_proxy ip was defined in plain IP notation instead of cidr notation &
729                    # matches the IP of the index
730                    return True
731        else:
732            host_with_port = parsed.hostname
733            if parsed.port:
734                host_with_port += ':{}'.format(parsed.port)
735
736            for host in no_proxy:
737                if parsed.hostname.endswith(host) or host_with_port.endswith(host):
738                    # The URL does match something in no_proxy, so we don't want
739                    # to apply the proxies on this URL.
740                    return True
741
742    with set_environ('no_proxy', no_proxy_arg):
743        # parsed.hostname can be `None` in cases such as a file URI.
744        try:
745            bypass = proxy_bypass(parsed.hostname)
746        except (TypeError, socket.gaierror):
747            bypass = False
748
749    if bypass:
750        return True
751
752    return False
753
754
755def get_environ_proxies(url, no_proxy=None):
756    """
757    Return a dict of environment proxies.
758
759    :rtype: dict
760    """
761    if should_bypass_proxies(url, no_proxy=no_proxy):
762        return {}
763    else:
764        return getproxies()
765
766
767def select_proxy(url, proxies):
768    """Select a proxy for the url, if applicable.
769
770    :param url: The url being for the request
771    :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
772    """
773    proxies = proxies or {}
774    urlparts = urlparse(url)
775    if urlparts.hostname is None:
776        return proxies.get(urlparts.scheme, proxies.get('all'))
777
778    proxy_keys = [
779        urlparts.scheme + '://' + urlparts.hostname,
780        urlparts.scheme,
781        'all://' + urlparts.hostname,
782        'all',
783    ]
784    proxy = None
785    for proxy_key in proxy_keys:
786        if proxy_key in proxies:
787            proxy = proxies[proxy_key]
788            break
789
790    return proxy
791
792
793def default_user_agent(name="python-requests"):
794    """
795    Return a string representing the default user agent.
796
797    :rtype: str
798    """
799    return '%s/%s' % (name, __version__)
800
801
802def default_headers():
803    """
804    :rtype: requests.structures.CaseInsensitiveDict
805    """
806    return CaseInsensitiveDict({
807        'User-Agent': default_user_agent(),
808        'Accept-Encoding': ', '.join(('gzip', 'deflate')),
809        'Accept': '*/*',
810        'Connection': 'keep-alive',
811    })
812
813
814def parse_header_links(value):
815    """Return a list of parsed link headers proxies.
816
817    i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
818
819    :rtype: list
820    """
821
822    links = []
823
824    replace_chars = ' \'"'
825
826    value = value.strip(replace_chars)
827    if not value:
828        return links
829
830    for val in re.split(', *<', value):
831        try:
832            url, params = val.split(';', 1)
833        except ValueError:
834            url, params = val, ''
835
836        link = {'url': url.strip('<> \'"')}
837
838        for param in params.split(';'):
839            try:
840                key, value = param.split('=')
841            except ValueError:
842                break
843
844            link[key.strip(replace_chars)] = value.strip(replace_chars)
845
846        links.append(link)
847
848    return links
849
850
851# Null bytes; no need to recreate these on each call to guess_json_utf
852_null = '\x00'.encode('ascii')  # encoding to ASCII for Python 3
853_null2 = _null * 2
854_null3 = _null * 3
855
856
857def guess_json_utf(data):
858    """
859    :rtype: str
860    """
861    # JSON always starts with two ASCII characters, so detection is as
862    # easy as counting the nulls and from their location and count
863    # determine the encoding. Also detect a BOM, if present.
864    sample = data[:4]
865    if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
866        return 'utf-32'     # BOM included
867    if sample[:3] == codecs.BOM_UTF8:
868        return 'utf-8-sig'  # BOM included, MS style (discouraged)
869    if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
870        return 'utf-16'     # BOM included
871    nullcount = sample.count(_null)
872    if nullcount == 0:
873        return 'utf-8'
874    if nullcount == 2:
875        if sample[::2] == _null2:   # 1st and 3rd are null
876            return 'utf-16-be'
877        if sample[1::2] == _null2:  # 2nd and 4th are null
878            return 'utf-16-le'
879        # Did not detect 2 valid UTF-16 ascii-range characters
880    if nullcount == 3:
881        if sample[:3] == _null3:
882            return 'utf-32-be'
883        if sample[1:] == _null3:
884            return 'utf-32-le'
885        # Did not detect a valid UTF-32 ascii-range character
886    return None
887
888
889def prepend_scheme_if_needed(url, new_scheme):
890    """Given a URL that may or may not have a scheme, prepend the given scheme.
891    Does not replace a present scheme with the one provided as an argument.
892
893    :rtype: str
894    """
895    scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
896
897    # urlparse is a finicky beast, and sometimes decides that there isn't a
898    # netloc present. Assume that it's being over-cautious, and switch netloc
899    # and path if urlparse decided there was no netloc.
900    if not netloc:
901        netloc, path = path, netloc
902
903    return urlunparse((scheme, netloc, path, params, query, fragment))
904
905
906def get_auth_from_url(url):
907    """Given a url with authentication components, extract them into a tuple of
908    username,password.
909
910    :rtype: (str,str)
911    """
912    parsed = urlparse(url)
913
914    try:
915        auth = (unquote(parsed.username), unquote(parsed.password))
916    except (AttributeError, TypeError):
917        auth = ('', '')
918
919    return auth
920
921
922# Moved outside of function to avoid recompile every call
923_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
924_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
925
926
927def check_header_validity(header):
928    """Verifies that header value is a string which doesn't contain
929    leading whitespace or return characters. This prevents unintended
930    header injection.
931
932    :param header: tuple, in the format (name, value).
933    """
934    name, value = header
935
936    if isinstance(value, bytes):
937        pat = _CLEAN_HEADER_REGEX_BYTE
938    else:
939        pat = _CLEAN_HEADER_REGEX_STR
940    try:
941        if not pat.match(value):
942            raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
943    except TypeError:
944        raise InvalidHeader("Value for header {%s: %s} must be of type str or "
945                            "bytes, not %s" % (name, value, type(value)))
946
947
948def urldefragauth(url):
949    """
950    Given a url remove the fragment and the authentication part.
951
952    :rtype: str
953    """
954    scheme, netloc, path, params, query, fragment = urlparse(url)
955
956    # see func:`prepend_scheme_if_needed`
957    if not netloc:
958        netloc, path = path, netloc
959
960    netloc = netloc.rsplit('@', 1)[-1]
961
962    return urlunparse((scheme, netloc, path, params, query, ''))
963
964
965def rewind_body(prepared_request):
966    """Move file pointer back to its recorded starting position
967    so it can be read again on redirect.
968    """
969    body_seek = getattr(prepared_request.body, 'seek', None)
970    if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
971        try:
972            body_seek(prepared_request._body_position)
973        except (IOError, OSError):
974            raise UnrewindableBodyError("An error occurred when rewinding request "
975                                        "body for redirect.")
976    else:
977        raise UnrewindableBodyError("Unable to rewind request body for redirect.")
978