1# -*- coding: utf-8 -*-
2
3"""
4requests.utils
5~~~~~~~~~~~~~~
6
7This module provides utility functions that are used within Requests
8that are also useful for external consumption.
9"""
10
11import cgi
12import codecs
13import collections
14import contextlib
15import io
16import os
17import platform
18import re
19import socket
20import struct
21import warnings
22
23from .__version__ import __version__
24from . import certs
25# to_native_string is unused here, but imported here for backwards compatibility
26from ._internal_utils import to_native_string
27from .compat import parse_http_list as _parse_list_header
28from .compat import (
29    quote, urlparse, bytes, str, OrderedDict, unquote, getproxies,
30    proxy_bypass, urlunparse, basestring, integer_types, is_py3,
31    proxy_bypass_environment, getproxies_environment)
32from .cookies import cookiejar_from_dict
33from .structures import CaseInsensitiveDict
34from .exceptions import (
35    InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
36
37NETRC_FILES = ('.netrc', '_netrc')
38
39DEFAULT_CA_BUNDLE_PATH = certs.where()
40
41
42if platform.system() == 'Windows':
43    # provide a proxy_bypass version on Windows without DNS lookups
44
45    def proxy_bypass_registry(host):
46        if is_py3:
47            import winreg
48        else:
49            import _winreg as winreg
50        try:
51            internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
52                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
53            proxyEnable = winreg.QueryValueEx(internetSettings,
54                                              'ProxyEnable')[0]
55            proxyOverride = winreg.QueryValueEx(internetSettings,
56                                                'ProxyOverride')[0]
57        except OSError:
58            return False
59        if not proxyEnable or not proxyOverride:
60            return False
61
62        # make a check value list from the registry entry: replace the
63        # '<local>' string by the localhost entry and the corresponding
64        # canonical entry.
65        proxyOverride = proxyOverride.split(';')
66        # now check if we match one of the registry values.
67        for test in proxyOverride:
68            if test == '<local>':
69                if '.' not in host:
70                    return True
71            test = test.replace(".", r"\.")     # mask dots
72            test = test.replace("*", r".*")     # change glob sequence
73            test = test.replace("?", r".")      # change glob char
74            if re.match(test, host, re.I):
75                return True
76        return False
77
78    def proxy_bypass(host):  # noqa
79        """Return True, if the host should be bypassed.
80
81        Checks proxy settings gathered from the environment, if specified,
82        or the registry.
83        """
84        if getproxies_environment():
85            return proxy_bypass_environment(host)
86        else:
87            return proxy_bypass_registry(host)
88
89
90def dict_to_sequence(d):
91    """Returns an internal sequence dictionary update."""
92
93    if hasattr(d, 'items'):
94        d = d.items()
95
96    return d
97
98
99def super_len(o):
100    total_length = None
101    current_position = 0
102
103    if hasattr(o, '__len__'):
104        total_length = len(o)
105
106    elif hasattr(o, 'len'):
107        total_length = o.len
108
109    elif hasattr(o, 'fileno'):
110        try:
111            fileno = o.fileno()
112        except io.UnsupportedOperation:
113            pass
114        else:
115            total_length = os.fstat(fileno).st_size
116
117            # Having used fstat to determine the file length, we need to
118            # confirm that this file was opened up in binary mode.
119            if 'b' not in o.mode:
120                warnings.warn((
121                    "Requests has determined the content-length for this "
122                    "request using the binary size of the file: however, the "
123                    "file has been opened in text mode (i.e. without the 'b' "
124                    "flag in the mode). This may lead to an incorrect "
125                    "content-length. In Requests 3.0, support will be removed "
126                    "for files in text mode."),
127                    FileModeWarning
128                )
129
130    if hasattr(o, 'tell'):
131        try:
132            current_position = o.tell()
133        except (OSError, IOError):
134            # This can happen in some weird situations, such as when the file
135            # is actually a special file descriptor like stdin. In this
136            # instance, we don't know what the length is, so set it to zero and
137            # let requests chunk it instead.
138            if total_length is not None:
139                current_position = total_length
140        else:
141            if hasattr(o, 'seek') and total_length is None:
142                # StringIO and BytesIO have seek but no useable fileno
143                try:
144                    # seek to end of file
145                    o.seek(0, 2)
146                    total_length = o.tell()
147
148                    # seek back to current position to support
149                    # partially read file-like objects
150                    o.seek(current_position or 0)
151                except (OSError, IOError):
152                    total_length = 0
153
154    if total_length is None:
155        total_length = 0
156
157    return max(0, total_length - current_position)
158
159
160def get_netrc_auth(url, raise_errors=False):
161    """Returns the Requests tuple auth for a given url from netrc."""
162
163    try:
164        from netrc import netrc, NetrcParseError
165
166        netrc_path = None
167
168        for f in NETRC_FILES:
169            try:
170                loc = os.path.expanduser('~/{0}'.format(f))
171            except KeyError:
172                # os.path.expanduser can fail when $HOME is undefined and
173                # getpwuid fails. See http://bugs.python.org/issue20164 &
174                # https://github.com/requests/requests/issues/1846
175                return
176
177            if os.path.exists(loc):
178                netrc_path = loc
179                break
180
181        # Abort early if there isn't one.
182        if netrc_path is None:
183            return
184
185        ri = urlparse(url)
186
187        # Strip port numbers from netloc. This weird `if...encode`` dance is
188        # used for Python 3.2, which doesn't support unicode literals.
189        splitstr = b':'
190        if isinstance(url, str):
191            splitstr = splitstr.decode('ascii')
192        host = ri.netloc.split(splitstr)[0]
193
194        try:
195            _netrc = netrc(netrc_path).authenticators(host)
196            if _netrc:
197                # Return with login / password
198                login_i = (0 if _netrc[0] else 1)
199                return (_netrc[login_i], _netrc[2])
200        except (NetrcParseError, IOError):
201            # If there was a parsing error or a permissions issue reading the file,
202            # we'll just skip netrc auth unless explicitly asked to raise errors.
203            if raise_errors:
204                raise
205
206    # AppEngine hackiness.
207    except (ImportError, AttributeError):
208        pass
209
210
211def guess_filename(obj):
212    """Tries to guess the filename of the given object."""
213    name = getattr(obj, 'name', None)
214    if (name and isinstance(name, basestring) and name[0] != '<' and
215            name[-1] != '>'):
216        return os.path.basename(name)
217
218
219def from_key_val_list(value):
220    """Take an object and test to see if it can be represented as a
221    dictionary. Unless it can not be represented as such, return an
222    OrderedDict, e.g.,
223
224    ::
225
226        >>> from_key_val_list([('key', 'val')])
227        OrderedDict([('key', 'val')])
228        >>> from_key_val_list('string')
229        ValueError: need more than 1 value to unpack
230        >>> from_key_val_list({'key': 'val'})
231        OrderedDict([('key', 'val')])
232
233    :rtype: OrderedDict
234    """
235    if value is None:
236        return None
237
238    if isinstance(value, (str, bytes, bool, int)):
239        raise ValueError('cannot encode objects that are not 2-tuples')
240
241    return OrderedDict(value)
242
243
244def to_key_val_list(value):
245    """Take an object and test to see if it can be represented as a
246    dictionary. If it can be, return a list of tuples, e.g.,
247
248    ::
249
250        >>> to_key_val_list([('key', 'val')])
251        [('key', 'val')]
252        >>> to_key_val_list({'key': 'val'})
253        [('key', 'val')]
254        >>> to_key_val_list('string')
255        ValueError: cannot encode objects that are not 2-tuples.
256
257    :rtype: list
258    """
259    if value is None:
260        return None
261
262    if isinstance(value, (str, bytes, bool, int)):
263        raise ValueError('cannot encode objects that are not 2-tuples')
264
265    if isinstance(value, collections.Mapping):
266        value = value.items()
267
268    return list(value)
269
270
271# From mitsuhiko/werkzeug (used with permission).
272def parse_list_header(value):
273    """Parse lists as described by RFC 2068 Section 2.
274
275    In particular, parse comma-separated lists where the elements of
276    the list may include quoted-strings.  A quoted-string could
277    contain a comma.  A non-quoted string could have quotes in the
278    middle.  Quotes are removed automatically after parsing.
279
280    It basically works like :func:`parse_set_header` just that items
281    may appear multiple times and case sensitivity is preserved.
282
283    The return value is a standard :class:`list`:
284
285    >>> parse_list_header('token, "quoted value"')
286    ['token', 'quoted value']
287
288    To create a header from the :class:`list` again, use the
289    :func:`dump_header` function.
290
291    :param value: a string with a list header.
292    :return: :class:`list`
293    :rtype: list
294    """
295    result = []
296    for item in _parse_list_header(value):
297        if item[:1] == item[-1:] == '"':
298            item = unquote_header_value(item[1:-1])
299        result.append(item)
300    return result
301
302
303# From mitsuhiko/werkzeug (used with permission).
304def parse_dict_header(value):
305    """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
306    convert them into a python dict:
307
308    >>> d = parse_dict_header('foo="is a fish", bar="as well"')
309    >>> type(d) is dict
310    True
311    >>> sorted(d.items())
312    [('bar', 'as well'), ('foo', 'is a fish')]
313
314    If there is no value for a key it will be `None`:
315
316    >>> parse_dict_header('key_without_value')
317    {'key_without_value': None}
318
319    To create a header from the :class:`dict` again, use the
320    :func:`dump_header` function.
321
322    :param value: a string with a dict header.
323    :return: :class:`dict`
324    :rtype: dict
325    """
326    result = {}
327    for item in _parse_list_header(value):
328        if '=' not in item:
329            result[item] = None
330            continue
331        name, value = item.split('=', 1)
332        if value[:1] == value[-1:] == '"':
333            value = unquote_header_value(value[1:-1])
334        result[name] = value
335    return result
336
337
338# From mitsuhiko/werkzeug (used with permission).
339def unquote_header_value(value, is_filename=False):
340    r"""Unquotes a header value.  (Reversal of :func:`quote_header_value`).
341    This does not use the real unquoting but what browsers are actually
342    using for quoting.
343
344    :param value: the header value to unquote.
345    :rtype: str
346    """
347    if value and value[0] == value[-1] == '"':
348        # this is not the real unquoting, but fixing this so that the
349        # RFC is met will result in bugs with internet explorer and
350        # probably some other browsers as well.  IE for example is
351        # uploading files with "C:\foo\bar.txt" as filename
352        value = value[1:-1]
353
354        # if this is a filename and the starting characters look like
355        # a UNC path, then just return the value without quotes.  Using the
356        # replace sequence below on a UNC path has the effect of turning
357        # the leading double slash into a single slash and then
358        # _fix_ie_filename() doesn't work correctly.  See #458.
359        if not is_filename or value[:2] != '\\\\':
360            return value.replace('\\\\', '\\').replace('\\"', '"')
361    return value
362
363
364def dict_from_cookiejar(cj):
365    """Returns a key/value dictionary from a CookieJar.
366
367    :param cj: CookieJar object to extract cookies from.
368    :rtype: dict
369    """
370
371    cookie_dict = {}
372
373    for cookie in cj:
374        cookie_dict[cookie.name] = cookie.value
375
376    return cookie_dict
377
378
379def add_dict_to_cookiejar(cj, cookie_dict):
380    """Returns a CookieJar from a key/value dictionary.
381
382    :param cj: CookieJar to insert cookies into.
383    :param cookie_dict: Dict of key/values to insert into CookieJar.
384    :rtype: CookieJar
385    """
386
387    return cookiejar_from_dict(cookie_dict, cj)
388
389
390def get_encodings_from_content(content):
391    """Returns encodings from given content string.
392
393    :param content: bytestring to extract encodings from.
394    """
395    warnings.warn((
396        'In requests 3.0, get_encodings_from_content will be removed. For '
397        'more information, please see the discussion on issue #2266. (This'
398        ' warning should only appear once.)'),
399        DeprecationWarning)
400
401    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
402    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
403    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
404
405    return (charset_re.findall(content) +
406            pragma_re.findall(content) +
407            xml_re.findall(content))
408
409
410def get_encoding_from_headers(headers):
411    """Returns encodings from given HTTP Header Dict.
412
413    :param headers: dictionary to extract encoding from.
414    :rtype: str
415    """
416
417    content_type = headers.get('content-type')
418
419    if not content_type:
420        return None
421
422    content_type, params = cgi.parse_header(content_type)
423
424    if 'charset' in params:
425        return params['charset'].strip("'\"")
426
427    if 'text' in content_type:
428        return 'ISO-8859-1'
429
430
431def stream_decode_response_unicode(iterator, r):
432    """Stream decodes a iterator."""
433
434    if r.encoding is None:
435        for item in iterator:
436            yield item
437        return
438
439    decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
440    for chunk in iterator:
441        rv = decoder.decode(chunk)
442        if rv:
443            yield rv
444    rv = decoder.decode(b'', final=True)
445    if rv:
446        yield rv
447
448
449def iter_slices(string, slice_length):
450    """Iterate over slices of a string."""
451    pos = 0
452    if slice_length is None or slice_length <= 0:
453        slice_length = len(string)
454    while pos < len(string):
455        yield string[pos:pos + slice_length]
456        pos += slice_length
457
458
459def get_unicode_from_response(r):
460    """Returns the requested content back in unicode.
461
462    :param r: Response object to get unicode content from.
463
464    Tried:
465
466    1. charset from content-type
467    2. fall back and replace all unicode characters
468
469    :rtype: str
470    """
471    warnings.warn((
472        'In requests 3.0, get_unicode_from_response will be removed. For '
473        'more information, please see the discussion on issue #2266. (This'
474        ' warning should only appear once.)'),
475        DeprecationWarning)
476
477    tried_encodings = []
478
479    # Try charset from content-type
480    encoding = get_encoding_from_headers(r.headers)
481
482    if encoding:
483        try:
484            return str(r.content, encoding)
485        except UnicodeError:
486            tried_encodings.append(encoding)
487
488    # Fall back:
489    try:
490        return str(r.content, encoding, errors='replace')
491    except TypeError:
492        return r.content
493
494
495# The unreserved URI characters (RFC 3986)
496UNRESERVED_SET = frozenset(
497    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
498
499
500def unquote_unreserved(uri):
501    """Un-escape any percent-escape sequences in a URI that are unreserved
502    characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
503
504    :rtype: str
505    """
506    parts = uri.split('%')
507    for i in range(1, len(parts)):
508        h = parts[i][0:2]
509        if len(h) == 2 and h.isalnum():
510            try:
511                c = chr(int(h, 16))
512            except ValueError:
513                raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
514
515            if c in UNRESERVED_SET:
516                parts[i] = c + parts[i][2:]
517            else:
518                parts[i] = '%' + parts[i]
519        else:
520            parts[i] = '%' + parts[i]
521    return ''.join(parts)
522
523
524def requote_uri(uri):
525    """Re-quote the given URI.
526
527    This function passes the given URI through an unquote/quote cycle to
528    ensure that it is fully and consistently quoted.
529
530    :rtype: str
531    """
532    safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
533    safe_without_percent = "!#$&'()*+,/:;=?@[]~"
534    try:
535        # Unquote only the unreserved characters
536        # Then quote only illegal characters (do not quote reserved,
537        # unreserved, or '%')
538        return quote(unquote_unreserved(uri), safe=safe_with_percent)
539    except InvalidURL:
540        # We couldn't unquote the given URI, so let's try quoting it, but
541        # there may be unquoted '%'s in the URI. We need to make sure they're
542        # properly quoted so they do not cause issues elsewhere.
543        return quote(uri, safe=safe_without_percent)
544
545
546def address_in_network(ip, net):
547    """This function allows you to check if an IP belongs to a network subnet
548
549    Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
550             returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
551
552    :rtype: bool
553    """
554    ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
555    netaddr, bits = net.split('/')
556    netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
557    network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
558    return (ipaddr & netmask) == (network & netmask)
559
560
561def dotted_netmask(mask):
562    """Converts mask from /xx format to xxx.xxx.xxx.xxx
563
564    Example: if mask is 24 function returns 255.255.255.0
565
566    :rtype: str
567    """
568    bits = 0xffffffff ^ (1 << 32 - mask) - 1
569    return socket.inet_ntoa(struct.pack('>I', bits))
570
571
572def is_ipv4_address(string_ip):
573    """
574    :rtype: bool
575    """
576    try:
577        socket.inet_aton(string_ip)
578    except socket.error:
579        return False
580    return True
581
582
583def is_valid_cidr(string_network):
584    """
585    Very simple check of the cidr format in no_proxy variable.
586
587    :rtype: bool
588    """
589    if string_network.count('/') == 1:
590        try:
591            mask = int(string_network.split('/')[1])
592        except ValueError:
593            return False
594
595        if mask < 1 or mask > 32:
596            return False
597
598        try:
599            socket.inet_aton(string_network.split('/')[0])
600        except socket.error:
601            return False
602    else:
603        return False
604    return True
605
606
607@contextlib.contextmanager
608def set_environ(env_name, value):
609    """Set the environment variable 'env_name' to 'value'
610
611    Save previous value, yield, and then restore the previous value stored in
612    the environment variable 'env_name'.
613
614    If 'value' is None, do nothing"""
615    value_changed = value is not None
616    if value_changed:
617        old_value = os.environ.get(env_name)
618        os.environ[env_name] = value
619    try:
620        yield
621    finally:
622        if value_changed:
623            if old_value is None:
624                del os.environ[env_name]
625            else:
626                os.environ[env_name] = old_value
627
628
629def should_bypass_proxies(url, no_proxy):
630    """
631    Returns whether we should bypass proxies or not.
632
633    :rtype: bool
634    """
635    get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
636
637    # First check whether no_proxy is defined. If it is, check that the URL
638    # we're getting isn't in the no_proxy list.
639    no_proxy_arg = no_proxy
640    if no_proxy is None:
641        no_proxy = get_proxy('no_proxy')
642    netloc = urlparse(url).netloc
643
644    if no_proxy:
645        # We need to check whether we match here. We need to see if we match
646        # the end of the netloc, both with and without the port.
647        no_proxy = (
648            host for host in no_proxy.replace(' ', '').split(',') if host
649        )
650
651        ip = netloc.split(':')[0]
652        if is_ipv4_address(ip):
653            for proxy_ip in no_proxy:
654                if is_valid_cidr(proxy_ip):
655                    if address_in_network(ip, proxy_ip):
656                        return True
657                elif ip == proxy_ip:
658                    # If no_proxy ip was defined in plain IP notation instead of cidr notation &
659                    # matches the IP of the index
660                    return True
661        else:
662            for host in no_proxy:
663                if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
664                    # The URL does match something in no_proxy, so we don't want
665                    # to apply the proxies on this URL.
666                    return True
667
668    # If the system proxy settings indicate that this URL should be bypassed,
669    # don't proxy.
670    # The proxy_bypass function is incredibly buggy on OS X in early versions
671    # of Python 2.6, so allow this call to fail. Only catch the specific
672    # exceptions we've seen, though: this call failing in other ways can reveal
673    # legitimate problems.
674    with set_environ('no_proxy', no_proxy_arg):
675        try:
676            bypass = proxy_bypass(netloc)
677        except (TypeError, socket.gaierror):
678            bypass = False
679
680    if bypass:
681        return True
682
683    return False
684
685
686def get_environ_proxies(url, no_proxy=None):
687    """
688    Return a dict of environment proxies.
689
690    :rtype: dict
691    """
692    if should_bypass_proxies(url, no_proxy=no_proxy):
693        return {}
694    else:
695        return getproxies()
696
697
698def select_proxy(url, proxies):
699    """Select a proxy for the url, if applicable.
700
701    :param url: The url being for the request
702    :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
703    """
704    proxies = proxies or {}
705    urlparts = urlparse(url)
706    if urlparts.hostname is None:
707        return proxies.get(urlparts.scheme, proxies.get('all'))
708
709    proxy_keys = [
710        urlparts.scheme + '://' + urlparts.hostname,
711        urlparts.scheme,
712        'all://' + urlparts.hostname,
713        'all',
714    ]
715    proxy = None
716    for proxy_key in proxy_keys:
717        if proxy_key in proxies:
718            proxy = proxies[proxy_key]
719            break
720
721    return proxy
722
723
724def default_user_agent(name="python-requests"):
725    """
726    Return a string representing the default user agent.
727
728    :rtype: str
729    """
730    return '%s/%s' % (name, __version__)
731
732
733def default_headers():
734    """
735    :rtype: requests.structures.CaseInsensitiveDict
736    """
737    return CaseInsensitiveDict({
738        'User-Agent': default_user_agent(),
739        'Accept-Encoding': ', '.join(('gzip', 'deflate')),
740        'Accept': '*/*',
741        'Connection': 'keep-alive',
742    })
743
744
745def parse_header_links(value):
746    """Return a dict of parsed link headers proxies.
747
748    i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
749
750    :rtype: list
751    """
752
753    links = []
754
755    replace_chars = ' \'"'
756
757    for val in re.split(', *<', value):
758        try:
759            url, params = val.split(';', 1)
760        except ValueError:
761            url, params = val, ''
762
763        link = {'url': url.strip('<> \'"')}
764
765        for param in params.split(';'):
766            try:
767                key, value = param.split('=')
768            except ValueError:
769                break
770
771            link[key.strip(replace_chars)] = value.strip(replace_chars)
772
773        links.append(link)
774
775    return links
776
777
778# Null bytes; no need to recreate these on each call to guess_json_utf
779_null = '\x00'.encode('ascii')  # encoding to ASCII for Python 3
780_null2 = _null * 2
781_null3 = _null * 3
782
783
784def guess_json_utf(data):
785    """
786    :rtype: str
787    """
788    # JSON always starts with two ASCII characters, so detection is as
789    # easy as counting the nulls and from their location and count
790    # determine the encoding. Also detect a BOM, if present.
791    sample = data[:4]
792    if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
793        return 'utf-32'     # BOM included
794    if sample[:3] == codecs.BOM_UTF8:
795        return 'utf-8-sig'  # BOM included, MS style (discouraged)
796    if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
797        return 'utf-16'     # BOM included
798    nullcount = sample.count(_null)
799    if nullcount == 0:
800        return 'utf-8'
801    if nullcount == 2:
802        if sample[::2] == _null2:   # 1st and 3rd are null
803            return 'utf-16-be'
804        if sample[1::2] == _null2:  # 2nd and 4th are null
805            return 'utf-16-le'
806        # Did not detect 2 valid UTF-16 ascii-range characters
807    if nullcount == 3:
808        if sample[:3] == _null3:
809            return 'utf-32-be'
810        if sample[1:] == _null3:
811            return 'utf-32-le'
812        # Did not detect a valid UTF-32 ascii-range character
813    return None
814
815
816def prepend_scheme_if_needed(url, new_scheme):
817    """Given a URL that may or may not have a scheme, prepend the given scheme.
818    Does not replace a present scheme with the one provided as an argument.
819
820    :rtype: str
821    """
822    scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
823
824    # urlparse is a finicky beast, and sometimes decides that there isn't a
825    # netloc present. Assume that it's being over-cautious, and switch netloc
826    # and path if urlparse decided there was no netloc.
827    if not netloc:
828        netloc, path = path, netloc
829
830    return urlunparse((scheme, netloc, path, params, query, fragment))
831
832
833def get_auth_from_url(url):
834    """Given a url with authentication components, extract them into a tuple of
835    username,password.
836
837    :rtype: (str,str)
838    """
839    parsed = urlparse(url)
840
841    try:
842        auth = (unquote(parsed.username), unquote(parsed.password))
843    except (AttributeError, TypeError):
844        auth = ('', '')
845
846    return auth
847
848
849# Moved outside of function to avoid recompile every call
850_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
851_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
852
853
854def check_header_validity(header):
855    """Verifies that header value is a string which doesn't contain
856    leading whitespace or return characters. This prevents unintended
857    header injection.
858
859    :param header: tuple, in the format (name, value).
860    """
861    name, value = header
862
863    if isinstance(value, bytes):
864        pat = _CLEAN_HEADER_REGEX_BYTE
865    else:
866        pat = _CLEAN_HEADER_REGEX_STR
867    try:
868        if not pat.match(value):
869            raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
870    except TypeError:
871        raise InvalidHeader("Value for header {%s: %s} must be of type str or "
872                            "bytes, not %s" % (name, value, type(value)))
873
874
875def urldefragauth(url):
876    """
877    Given a url remove the fragment and the authentication part.
878
879    :rtype: str
880    """
881    scheme, netloc, path, params, query, fragment = urlparse(url)
882
883    # see func:`prepend_scheme_if_needed`
884    if not netloc:
885        netloc, path = path, netloc
886
887    netloc = netloc.rsplit('@', 1)[-1]
888
889    return urlunparse((scheme, netloc, path, params, query, ''))
890
891
892def rewind_body(prepared_request):
893    """Move file pointer back to its recorded starting position
894    so it can be read again on redirect.
895    """
896    body_seek = getattr(prepared_request.body, 'seek', None)
897    if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
898        try:
899            body_seek(prepared_request._body_position)
900        except (IOError, OSError):
901            raise UnrewindableBodyError("An error occurred when rewinding request "
902                                        "body for redirect.")
903    else:
904        raise UnrewindableBodyError("Unable to rewind request body for redirect.")
905