1# -*- coding: utf-8 -*-
2
3"""
4requests.utils
5~~~~~~~~~~~~~~
6
7This module provides utility functions that are used within Requests
8that are also useful for external consumption.
9"""
10
11import codecs
12import contextlib
13import io
14import os
15import re
16import socket
17import struct
18import sys
19import tempfile
20import warnings
21import zipfile
22from collections import OrderedDict
23
24from .__version__ import __version__
25from . import certs
26# to_native_string is unused here, but imported here for backwards compatibility
27from ._internal_utils import to_native_string
28from .compat import parse_http_list as _parse_list_header
29from .compat import (
30    quote, urlparse, bytes, str, unquote, getproxies,
31    proxy_bypass, urlunparse, basestring, integer_types, is_py3,
32    proxy_bypass_environment, getproxies_environment, Mapping)
33from .cookies import cookiejar_from_dict
34from .structures import CaseInsensitiveDict
35from .exceptions import (
36    InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
37
38NETRC_FILES = ('.netrc', '_netrc')
39
40DEFAULT_CA_BUNDLE_PATH = certs.where()
41
42DEFAULT_PORTS = {'http': 80, 'https': 443}
43
44
45if sys.platform == 'win32':
46    # provide a proxy_bypass version on Windows without DNS lookups
47
48    def proxy_bypass_registry(host):
49        try:
50            if is_py3:
51                import winreg
52            else:
53                import _winreg as winreg
54        except ImportError:
55            return False
56
57        try:
58            internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
59                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
60            # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
61            proxyEnable = int(winreg.QueryValueEx(internetSettings,
62                                              'ProxyEnable')[0])
63            # ProxyOverride is almost always a string
64            proxyOverride = winreg.QueryValueEx(internetSettings,
65                                                'ProxyOverride')[0]
66        except OSError:
67            return False
68        if not proxyEnable or not proxyOverride:
69            return False
70
71        # make a check value list from the registry entry: replace the
72        # '<local>' string by the localhost entry and the corresponding
73        # canonical entry.
74        proxyOverride = proxyOverride.split(';')
75        # now check if we match one of the registry values.
76        for test in proxyOverride:
77            if test == '<local>':
78                if '.' not in host:
79                    return True
80            test = test.replace(".", r"\.")     # mask dots
81            test = test.replace("*", r".*")     # change glob sequence
82            test = test.replace("?", r".")      # change glob char
83            if re.match(test, host, re.I):
84                return True
85        return False
86
87    def proxy_bypass(host):  # noqa
88        """Return True, if the host should be bypassed.
89
90        Checks proxy settings gathered from the environment, if specified,
91        or the registry.
92        """
93        if getproxies_environment():
94            return proxy_bypass_environment(host)
95        else:
96            return proxy_bypass_registry(host)
97
98
99def dict_to_sequence(d):
100    """Returns an internal sequence dictionary update."""
101
102    if hasattr(d, 'items'):
103        d = d.items()
104
105    return d
106
107
108def super_len(o):
109    total_length = None
110    current_position = 0
111
112    if hasattr(o, '__len__'):
113        total_length = len(o)
114
115    elif hasattr(o, 'len'):
116        total_length = o.len
117
118    elif hasattr(o, 'fileno'):
119        try:
120            fileno = o.fileno()
121        except io.UnsupportedOperation:
122            pass
123        else:
124            total_length = os.fstat(fileno).st_size
125
126            # Having used fstat to determine the file length, we need to
127            # confirm that this file was opened up in binary mode.
128            if 'b' not in o.mode:
129                warnings.warn((
130                    "Requests has determined the content-length for this "
131                    "request using the binary size of the file: however, the "
132                    "file has been opened in text mode (i.e. without the 'b' "
133                    "flag in the mode). This may lead to an incorrect "
134                    "content-length. In Requests 3.0, support will be removed "
135                    "for files in text mode."),
136                    FileModeWarning
137                )
138
139    if hasattr(o, 'tell'):
140        try:
141            current_position = o.tell()
142        except (OSError, IOError):
143            # This can happen in some weird situations, such as when the file
144            # is actually a special file descriptor like stdin. In this
145            # instance, we don't know what the length is, so set it to zero and
146            # let requests chunk it instead.
147            if total_length is not None:
148                current_position = total_length
149        else:
150            if hasattr(o, 'seek') and total_length is None:
151                # StringIO and BytesIO have seek but no useable fileno
152                try:
153                    # seek to end of file
154                    o.seek(0, 2)
155                    total_length = o.tell()
156
157                    # seek back to current position to support
158                    # partially read file-like objects
159                    o.seek(current_position or 0)
160                except (OSError, IOError):
161                    total_length = 0
162
163    if total_length is None:
164        total_length = 0
165
166    return max(0, total_length - current_position)
167
168
169def get_netrc_auth(url, raise_errors=False):
170    """Returns the Requests tuple auth for a given url from netrc."""
171
172    netrc_file = os.environ.get('NETRC')
173    if netrc_file is not None:
174        netrc_locations = (netrc_file,)
175    else:
176        netrc_locations = ('~/{}'.format(f) for f in NETRC_FILES)
177
178    try:
179        from netrc import netrc, NetrcParseError
180
181        netrc_path = None
182
183        for f in netrc_locations:
184            try:
185                loc = os.path.expanduser(f)
186            except KeyError:
187                # os.path.expanduser can fail when $HOME is undefined and
188                # getpwuid fails. See https://bugs.python.org/issue20164 &
189                # https://github.com/psf/requests/issues/1846
190                return
191
192            if os.path.exists(loc):
193                netrc_path = loc
194                break
195
196        # Abort early if there isn't one.
197        if netrc_path is None:
198            return
199
200        ri = urlparse(url)
201
202        # Strip port numbers from netloc. This weird `if...encode`` dance is
203        # used for Python 3.2, which doesn't support unicode literals.
204        splitstr = b':'
205        if isinstance(url, str):
206            splitstr = splitstr.decode('ascii')
207        host = ri.netloc.split(splitstr)[0]
208
209        try:
210            _netrc = netrc(netrc_path).authenticators(host)
211            if _netrc:
212                # Return with login / password
213                login_i = (0 if _netrc[0] else 1)
214                return (_netrc[login_i], _netrc[2])
215        except (NetrcParseError, IOError):
216            # If there was a parsing error or a permissions issue reading the file,
217            # we'll just skip netrc auth unless explicitly asked to raise errors.
218            if raise_errors:
219                raise
220
221    # App Engine hackiness.
222    except (ImportError, AttributeError):
223        pass
224
225
226def guess_filename(obj):
227    """Tries to guess the filename of the given object."""
228    name = getattr(obj, 'name', None)
229    if (name and isinstance(name, basestring) and name[0] != '<' and
230            name[-1] != '>'):
231        return os.path.basename(name)
232
233
234def extract_zipped_paths(path):
235    """Replace nonexistent paths that look like they refer to a member of a zip
236    archive with the location of an extracted copy of the target, or else
237    just return the provided path unchanged.
238    """
239    if os.path.exists(path):
240        # this is already a valid path, no need to do anything further
241        return path
242
243    # find the first valid part of the provided path and treat that as a zip archive
244    # assume the rest of the path is the name of a member in the archive
245    archive, member = os.path.split(path)
246    while archive and not os.path.exists(archive):
247        archive, prefix = os.path.split(archive)
248        member = '/'.join([prefix, member])
249
250    if not zipfile.is_zipfile(archive):
251        return path
252
253    zip_file = zipfile.ZipFile(archive)
254    if member not in zip_file.namelist():
255        return path
256
257    # we have a valid zip archive and a valid member of that archive
258    tmp = tempfile.gettempdir()
259    extracted_path = os.path.join(tmp, *member.split('/'))
260    if not os.path.exists(extracted_path):
261        extracted_path = zip_file.extract(member, path=tmp)
262
263    return extracted_path
264
265
266def from_key_val_list(value):
267    """Take an object and test to see if it can be represented as a
268    dictionary. Unless it can not be represented as such, return an
269    OrderedDict, e.g.,
270
271    ::
272
273        >>> from_key_val_list([('key', 'val')])
274        OrderedDict([('key', 'val')])
275        >>> from_key_val_list('string')
276        Traceback (most recent call last):
277        ...
278        ValueError: cannot encode objects that are not 2-tuples
279        >>> from_key_val_list({'key': 'val'})
280        OrderedDict([('key', 'val')])
281
282    :rtype: OrderedDict
283    """
284    if value is None:
285        return None
286
287    if isinstance(value, (str, bytes, bool, int)):
288        raise ValueError('cannot encode objects that are not 2-tuples')
289
290    return OrderedDict(value)
291
292
293def to_key_val_list(value):
294    """Take an object and test to see if it can be represented as a
295    dictionary. If it can be, return a list of tuples, e.g.,
296
297    ::
298
299        >>> to_key_val_list([('key', 'val')])
300        [('key', 'val')]
301        >>> to_key_val_list({'key': 'val'})
302        [('key', 'val')]
303        >>> to_key_val_list('string')
304        Traceback (most recent call last):
305        ...
306        ValueError: cannot encode objects that are not 2-tuples
307
308    :rtype: list
309    """
310    if value is None:
311        return None
312
313    if isinstance(value, (str, bytes, bool, int)):
314        raise ValueError('cannot encode objects that are not 2-tuples')
315
316    if isinstance(value, Mapping):
317        value = value.items()
318
319    return list(value)
320
321
322# From mitsuhiko/werkzeug (used with permission).
323def parse_list_header(value):
324    """Parse lists as described by RFC 2068 Section 2.
325
326    In particular, parse comma-separated lists where the elements of
327    the list may include quoted-strings.  A quoted-string could
328    contain a comma.  A non-quoted string could have quotes in the
329    middle.  Quotes are removed automatically after parsing.
330
331    It basically works like :func:`parse_set_header` just that items
332    may appear multiple times and case sensitivity is preserved.
333
334    The return value is a standard :class:`list`:
335
336    >>> parse_list_header('token, "quoted value"')
337    ['token', 'quoted value']
338
339    To create a header from the :class:`list` again, use the
340    :func:`dump_header` function.
341
342    :param value: a string with a list header.
343    :return: :class:`list`
344    :rtype: list
345    """
346    result = []
347    for item in _parse_list_header(value):
348        if item[:1] == item[-1:] == '"':
349            item = unquote_header_value(item[1:-1])
350        result.append(item)
351    return result
352
353
354# From mitsuhiko/werkzeug (used with permission).
355def parse_dict_header(value):
356    """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
357    convert them into a python dict:
358
359    >>> d = parse_dict_header('foo="is a fish", bar="as well"')
360    >>> type(d) is dict
361    True
362    >>> sorted(d.items())
363    [('bar', 'as well'), ('foo', 'is a fish')]
364
365    If there is no value for a key it will be `None`:
366
367    >>> parse_dict_header('key_without_value')
368    {'key_without_value': None}
369
370    To create a header from the :class:`dict` again, use the
371    :func:`dump_header` function.
372
373    :param value: a string with a dict header.
374    :return: :class:`dict`
375    :rtype: dict
376    """
377    result = {}
378    for item in _parse_list_header(value):
379        if '=' not in item:
380            result[item] = None
381            continue
382        name, value = item.split('=', 1)
383        if value[:1] == value[-1:] == '"':
384            value = unquote_header_value(value[1:-1])
385        result[name] = value
386    return result
387
388
389# From mitsuhiko/werkzeug (used with permission).
390def unquote_header_value(value, is_filename=False):
391    r"""Unquotes a header value.  (Reversal of :func:`quote_header_value`).
392    This does not use the real unquoting but what browsers are actually
393    using for quoting.
394
395    :param value: the header value to unquote.
396    :rtype: str
397    """
398    if value and value[0] == value[-1] == '"':
399        # this is not the real unquoting, but fixing this so that the
400        # RFC is met will result in bugs with internet explorer and
401        # probably some other browsers as well.  IE for example is
402        # uploading files with "C:\foo\bar.txt" as filename
403        value = value[1:-1]
404
405        # if this is a filename and the starting characters look like
406        # a UNC path, then just return the value without quotes.  Using the
407        # replace sequence below on a UNC path has the effect of turning
408        # the leading double slash into a single slash and then
409        # _fix_ie_filename() doesn't work correctly.  See #458.
410        if not is_filename or value[:2] != '\\\\':
411            return value.replace('\\\\', '\\').replace('\\"', '"')
412    return value
413
414
415def dict_from_cookiejar(cj):
416    """Returns a key/value dictionary from a CookieJar.
417
418    :param cj: CookieJar object to extract cookies from.
419    :rtype: dict
420    """
421
422    cookie_dict = {}
423
424    for cookie in cj:
425        cookie_dict[cookie.name] = cookie.value
426
427    return cookie_dict
428
429
430def add_dict_to_cookiejar(cj, cookie_dict):
431    """Returns a CookieJar from a key/value dictionary.
432
433    :param cj: CookieJar to insert cookies into.
434    :param cookie_dict: Dict of key/values to insert into CookieJar.
435    :rtype: CookieJar
436    """
437
438    return cookiejar_from_dict(cookie_dict, cj)
439
440
441def get_encodings_from_content(content):
442    """Returns encodings from given content string.
443
444    :param content: bytestring to extract encodings from.
445    """
446    warnings.warn((
447        'In requests 3.0, get_encodings_from_content will be removed. For '
448        'more information, please see the discussion on issue #2266. (This'
449        ' warning should only appear once.)'),
450        DeprecationWarning)
451
452    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
453    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
454    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
455
456    return (charset_re.findall(content) +
457            pragma_re.findall(content) +
458            xml_re.findall(content))
459
460
461def _parse_content_type_header(header):
462    """Returns content type and parameters from given header
463
464    :param header: string
465    :return: tuple containing content type and dictionary of
466         parameters
467    """
468
469    tokens = header.split(';')
470    content_type, params = tokens[0].strip(), tokens[1:]
471    params_dict = {}
472    items_to_strip = "\"' "
473
474    for param in params:
475        param = param.strip()
476        if param:
477            key, value = param, True
478            index_of_equals = param.find("=")
479            if index_of_equals != -1:
480                key = param[:index_of_equals].strip(items_to_strip)
481                value = param[index_of_equals + 1:].strip(items_to_strip)
482            params_dict[key.lower()] = value
483    return content_type, params_dict
484
485
486def get_encoding_from_headers(headers):
487    """Returns encodings from given HTTP Header Dict.
488
489    :param headers: dictionary to extract encoding from.
490    :rtype: str
491    """
492
493    content_type = headers.get('content-type')
494
495    if not content_type:
496        return None
497
498    content_type, params = _parse_content_type_header(content_type)
499
500    if 'charset' in params:
501        return params['charset'].strip("'\"")
502
503    if 'text' in content_type:
504        return 'ISO-8859-1'
505
506    if 'application/json' in content_type:
507        # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset
508        return 'utf-8'
509
510
511def stream_decode_response_unicode(iterator, r):
512    """Stream decodes a iterator."""
513
514    if r.encoding is None:
515        for item in iterator:
516            yield item
517        return
518
519    decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
520    for chunk in iterator:
521        rv = decoder.decode(chunk)
522        if rv:
523            yield rv
524    rv = decoder.decode(b'', final=True)
525    if rv:
526        yield rv
527
528
529def iter_slices(string, slice_length):
530    """Iterate over slices of a string."""
531    pos = 0
532    if slice_length is None or slice_length <= 0:
533        slice_length = len(string)
534    while pos < len(string):
535        yield string[pos:pos + slice_length]
536        pos += slice_length
537
538
539def get_unicode_from_response(r):
540    """Returns the requested content back in unicode.
541
542    :param r: Response object to get unicode content from.
543
544    Tried:
545
546    1. charset from content-type
547    2. fall back and replace all unicode characters
548
549    :rtype: str
550    """
551    warnings.warn((
552        'In requests 3.0, get_unicode_from_response will be removed. For '
553        'more information, please see the discussion on issue #2266. (This'
554        ' warning should only appear once.)'),
555        DeprecationWarning)
556
557    tried_encodings = []
558
559    # Try charset from content-type
560    encoding = get_encoding_from_headers(r.headers)
561
562    if encoding:
563        try:
564            return str(r.content, encoding)
565        except UnicodeError:
566            tried_encodings.append(encoding)
567
568    # Fall back:
569    try:
570        return str(r.content, encoding, errors='replace')
571    except TypeError:
572        return r.content
573
574
575# The unreserved URI characters (RFC 3986)
576UNRESERVED_SET = frozenset(
577    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
578
579
580def unquote_unreserved(uri):
581    """Un-escape any percent-escape sequences in a URI that are unreserved
582    characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
583
584    :rtype: str
585    """
586    parts = uri.split('%')
587    for i in range(1, len(parts)):
588        h = parts[i][0:2]
589        if len(h) == 2 and h.isalnum():
590            try:
591                c = chr(int(h, 16))
592            except ValueError:
593                raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
594
595            if c in UNRESERVED_SET:
596                parts[i] = c + parts[i][2:]
597            else:
598                parts[i] = '%' + parts[i]
599        else:
600            parts[i] = '%' + parts[i]
601    return ''.join(parts)
602
603
604def requote_uri(uri):
605    """Re-quote the given URI.
606
607    This function passes the given URI through an unquote/quote cycle to
608    ensure that it is fully and consistently quoted.
609
610    :rtype: str
611    """
612    safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
613    safe_without_percent = "!#$&'()*+,/:;=?@[]~"
614    try:
615        # Unquote only the unreserved characters
616        # Then quote only illegal characters (do not quote reserved,
617        # unreserved, or '%')
618        return quote(unquote_unreserved(uri), safe=safe_with_percent)
619    except InvalidURL:
620        # We couldn't unquote the given URI, so let's try quoting it, but
621        # there may be unquoted '%'s in the URI. We need to make sure they're
622        # properly quoted so they do not cause issues elsewhere.
623        return quote(uri, safe=safe_without_percent)
624
625
626def address_in_network(ip, net):
627    """This function allows you to check if an IP belongs to a network subnet
628
629    Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
630             returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
631
632    :rtype: bool
633    """
634    ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
635    netaddr, bits = net.split('/')
636    netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
637    network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
638    return (ipaddr & netmask) == (network & netmask)
639
640
641def dotted_netmask(mask):
642    """Converts mask from /xx format to xxx.xxx.xxx.xxx
643
644    Example: if mask is 24 function returns 255.255.255.0
645
646    :rtype: str
647    """
648    bits = 0xffffffff ^ (1 << 32 - mask) - 1
649    return socket.inet_ntoa(struct.pack('>I', bits))
650
651
652def is_ipv4_address(string_ip):
653    """
654    :rtype: bool
655    """
656    try:
657        socket.inet_aton(string_ip)
658    except socket.error:
659        return False
660    return True
661
662
663def is_valid_cidr(string_network):
664    """
665    Very simple check of the cidr format in no_proxy variable.
666
667    :rtype: bool
668    """
669    if string_network.count('/') == 1:
670        try:
671            mask = int(string_network.split('/')[1])
672        except ValueError:
673            return False
674
675        if mask < 1 or mask > 32:
676            return False
677
678        try:
679            socket.inet_aton(string_network.split('/')[0])
680        except socket.error:
681            return False
682    else:
683        return False
684    return True
685
686
687@contextlib.contextmanager
688def set_environ(env_name, value):
689    """Set the environment variable 'env_name' to 'value'
690
691    Save previous value, yield, and then restore the previous value stored in
692    the environment variable 'env_name'.
693
694    If 'value' is None, do nothing"""
695    value_changed = value is not None
696    if value_changed:
697        old_value = os.environ.get(env_name)
698        os.environ[env_name] = value
699    try:
700        yield
701    finally:
702        if value_changed:
703            if old_value is None:
704                del os.environ[env_name]
705            else:
706                os.environ[env_name] = old_value
707
708
709def should_bypass_proxies(url, no_proxy):
710    """
711    Returns whether we should bypass proxies or not.
712
713    :rtype: bool
714    """
715    # Prioritize lowercase environment variables over uppercase
716    # to keep a consistent behaviour with other http projects (curl, wget).
717    get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
718
719    # First check whether no_proxy is defined. If it is, check that the URL
720    # we're getting isn't in the no_proxy list.
721    no_proxy_arg = no_proxy
722    if no_proxy is None:
723        no_proxy = get_proxy('no_proxy')
724    parsed = urlparse(url)
725
726    if parsed.hostname is None:
727        # URLs don't always have hostnames, e.g. file:/// urls.
728        return True
729
730    if no_proxy:
731        # We need to check whether we match here. We need to see if we match
732        # the end of the hostname, both with and without the port.
733        no_proxy = (
734            host for host in no_proxy.replace(' ', '').split(',') if host
735        )
736
737        if is_ipv4_address(parsed.hostname):
738            for proxy_ip in no_proxy:
739                if is_valid_cidr(proxy_ip):
740                    if address_in_network(parsed.hostname, proxy_ip):
741                        return True
742                elif parsed.hostname == proxy_ip:
743                    # If no_proxy ip was defined in plain IP notation instead of cidr notation &
744                    # matches the IP of the index
745                    return True
746        else:
747            host_with_port = parsed.hostname
748            if parsed.port:
749                host_with_port += ':{}'.format(parsed.port)
750
751            for host in no_proxy:
752                if parsed.hostname.endswith(host) or host_with_port.endswith(host):
753                    # The URL does match something in no_proxy, so we don't want
754                    # to apply the proxies on this URL.
755                    return True
756
757    with set_environ('no_proxy', no_proxy_arg):
758        # parsed.hostname can be `None` in cases such as a file URI.
759        try:
760            bypass = proxy_bypass(parsed.hostname)
761        except (TypeError, socket.gaierror):
762            bypass = False
763
764    if bypass:
765        return True
766
767    return False
768
769
770def get_environ_proxies(url, no_proxy=None):
771    """
772    Return a dict of environment proxies.
773
774    :rtype: dict
775    """
776    if should_bypass_proxies(url, no_proxy=no_proxy):
777        return {}
778    else:
779        return getproxies()
780
781
782def select_proxy(url, proxies):
783    """Select a proxy for the url, if applicable.
784
785    :param url: The url being for the request
786    :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
787    """
788    proxies = proxies or {}
789    urlparts = urlparse(url)
790    if urlparts.hostname is None:
791        return proxies.get(urlparts.scheme, proxies.get('all'))
792
793    proxy_keys = [
794        urlparts.scheme + '://' + urlparts.hostname,
795        urlparts.scheme,
796        'all://' + urlparts.hostname,
797        'all',
798    ]
799    proxy = None
800    for proxy_key in proxy_keys:
801        if proxy_key in proxies:
802            proxy = proxies[proxy_key]
803            break
804
805    return proxy
806
807
808def default_user_agent(name="python-requests"):
809    """
810    Return a string representing the default user agent.
811
812    :rtype: str
813    """
814    return '%s/%s' % (name, __version__)
815
816
817def default_headers():
818    """
819    :rtype: requests.structures.CaseInsensitiveDict
820    """
821    return CaseInsensitiveDict({
822        'User-Agent': default_user_agent(),
823        'Accept-Encoding': ', '.join(('gzip', 'deflate')),
824        'Accept': '*/*',
825        'Connection': 'keep-alive',
826    })
827
828
829def parse_header_links(value):
830    """Return a list of parsed link headers proxies.
831
832    i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
833
834    :rtype: list
835    """
836
837    links = []
838
839    replace_chars = ' \'"'
840
841    value = value.strip(replace_chars)
842    if not value:
843        return links
844
845    for val in re.split(', *<', value):
846        try:
847            url, params = val.split(';', 1)
848        except ValueError:
849            url, params = val, ''
850
851        link = {'url': url.strip('<> \'"')}
852
853        for param in params.split(';'):
854            try:
855                key, value = param.split('=')
856            except ValueError:
857                break
858
859            link[key.strip(replace_chars)] = value.strip(replace_chars)
860
861        links.append(link)
862
863    return links
864
865
866# Null bytes; no need to recreate these on each call to guess_json_utf
867_null = '\x00'.encode('ascii')  # encoding to ASCII for Python 3
868_null2 = _null * 2
869_null3 = _null * 3
870
871
872def guess_json_utf(data):
873    """
874    :rtype: str
875    """
876    # JSON always starts with two ASCII characters, so detection is as
877    # easy as counting the nulls and from their location and count
878    # determine the encoding. Also detect a BOM, if present.
879    sample = data[:4]
880    if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
881        return 'utf-32'     # BOM included
882    if sample[:3] == codecs.BOM_UTF8:
883        return 'utf-8-sig'  # BOM included, MS style (discouraged)
884    if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
885        return 'utf-16'     # BOM included
886    nullcount = sample.count(_null)
887    if nullcount == 0:
888        return 'utf-8'
889    if nullcount == 2:
890        if sample[::2] == _null2:   # 1st and 3rd are null
891            return 'utf-16-be'
892        if sample[1::2] == _null2:  # 2nd and 4th are null
893            return 'utf-16-le'
894        # Did not detect 2 valid UTF-16 ascii-range characters
895    if nullcount == 3:
896        if sample[:3] == _null3:
897            return 'utf-32-be'
898        if sample[1:] == _null3:
899            return 'utf-32-le'
900        # Did not detect a valid UTF-32 ascii-range character
901    return None
902
903
904def prepend_scheme_if_needed(url, new_scheme):
905    """Given a URL that may or may not have a scheme, prepend the given scheme.
906    Does not replace a present scheme with the one provided as an argument.
907
908    :rtype: str
909    """
910    scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
911
912    # urlparse is a finicky beast, and sometimes decides that there isn't a
913    # netloc present. Assume that it's being over-cautious, and switch netloc
914    # and path if urlparse decided there was no netloc.
915    if not netloc:
916        netloc, path = path, netloc
917
918    return urlunparse((scheme, netloc, path, params, query, fragment))
919
920
921def get_auth_from_url(url):
922    """Given a url with authentication components, extract them into a tuple of
923    username,password.
924
925    :rtype: (str,str)
926    """
927    parsed = urlparse(url)
928
929    try:
930        auth = (unquote(parsed.username), unquote(parsed.password))
931    except (AttributeError, TypeError):
932        auth = ('', '')
933
934    return auth
935
936
937# Moved outside of function to avoid recompile every call
938_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
939_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
940
941
942def check_header_validity(header):
943    """Verifies that header value is a string which doesn't contain
944    leading whitespace or return characters. This prevents unintended
945    header injection.
946
947    :param header: tuple, in the format (name, value).
948    """
949    name, value = header
950
951    if isinstance(value, bytes):
952        pat = _CLEAN_HEADER_REGEX_BYTE
953    else:
954        pat = _CLEAN_HEADER_REGEX_STR
955    try:
956        if not pat.match(value):
957            raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
958    except TypeError:
959        raise InvalidHeader("Value for header {%s: %s} must be of type str or "
960                            "bytes, not %s" % (name, value, type(value)))
961
962
963def urldefragauth(url):
964    """
965    Given a url remove the fragment and the authentication part.
966
967    :rtype: str
968    """
969    scheme, netloc, path, params, query, fragment = urlparse(url)
970
971    # see func:`prepend_scheme_if_needed`
972    if not netloc:
973        netloc, path = path, netloc
974
975    netloc = netloc.rsplit('@', 1)[-1]
976
977    return urlunparse((scheme, netloc, path, params, query, ''))
978
979
980def rewind_body(prepared_request):
981    """Move file pointer back to its recorded starting position
982    so it can be read again on redirect.
983    """
984    body_seek = getattr(prepared_request.body, 'seek', None)
985    if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
986        try:
987            body_seek(prepared_request._body_position)
988        except (IOError, OSError):
989            raise UnrewindableBodyError("An error occurred when rewinding request "
990                                        "body for redirect.")
991    else:
992        raise UnrewindableBodyError("Unable to rewind request body for redirect.")
993