1# -*- coding: utf-8 -*- 2 3""" 4requests.utils 5~~~~~~~~~~~~~~ 6 7This module provides utility functions that are used within Requests 8that are also useful for external consumption. 9""" 10 11import cgi 12import codecs 13import collections 14import contextlib 15import io 16import os 17import platform 18import re 19import socket 20import struct 21import warnings 22 23from .__version__ import __version__ 24from . import certs 25# to_native_string is unused here, but imported here for backwards compatibility 26from ._internal_utils import to_native_string 27from .compat import parse_http_list as _parse_list_header 28from .compat import ( 29 quote, urlparse, bytes, str, OrderedDict, unquote, getproxies, 30 proxy_bypass, urlunparse, basestring, integer_types, is_py3, 31 proxy_bypass_environment, getproxies_environment) 32from .cookies import cookiejar_from_dict 33from .structures import CaseInsensitiveDict 34from .exceptions import ( 35 InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError) 36 37NETRC_FILES = ('.netrc', '_netrc') 38 39DEFAULT_CA_BUNDLE_PATH = certs.where() 40 41 42if platform.system() == 'Windows': 43 # provide a proxy_bypass version on Windows without DNS lookups 44 45 def proxy_bypass_registry(host): 46 if is_py3: 47 import winreg 48 else: 49 import _winreg as winreg 50 try: 51 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, 52 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 53 proxyEnable = winreg.QueryValueEx(internetSettings, 54 'ProxyEnable')[0] 55 proxyOverride = winreg.QueryValueEx(internetSettings, 56 'ProxyOverride')[0] 57 except OSError: 58 return False 59 if not proxyEnable or not proxyOverride: 60 return False 61 62 # make a check value list from the registry entry: replace the 63 # '<local>' string by the localhost entry and the corresponding 64 # canonical entry. 65 proxyOverride = proxyOverride.split(';') 66 # now check if we match one of the registry values. 67 for test in proxyOverride: 68 if test == '<local>': 69 if '.' not in host: 70 return True 71 test = test.replace(".", r"\.") # mask dots 72 test = test.replace("*", r".*") # change glob sequence 73 test = test.replace("?", r".") # change glob char 74 if re.match(test, host, re.I): 75 return True 76 return False 77 78 def proxy_bypass(host): # noqa 79 """Return True, if the host should be bypassed. 80 81 Checks proxy settings gathered from the environment, if specified, 82 or the registry. 83 """ 84 if getproxies_environment(): 85 return proxy_bypass_environment(host) 86 else: 87 return proxy_bypass_registry(host) 88 89 90def dict_to_sequence(d): 91 """Returns an internal sequence dictionary update.""" 92 93 if hasattr(d, 'items'): 94 d = d.items() 95 96 return d 97 98 99def super_len(o): 100 total_length = None 101 current_position = 0 102 103 if hasattr(o, '__len__'): 104 total_length = len(o) 105 106 elif hasattr(o, 'len'): 107 total_length = o.len 108 109 elif hasattr(o, 'fileno'): 110 try: 111 fileno = o.fileno() 112 except io.UnsupportedOperation: 113 pass 114 else: 115 total_length = os.fstat(fileno).st_size 116 117 # Having used fstat to determine the file length, we need to 118 # confirm that this file was opened up in binary mode. 119 if 'b' not in o.mode: 120 warnings.warn(( 121 "Requests has determined the content-length for this " 122 "request using the binary size of the file: however, the " 123 "file has been opened in text mode (i.e. without the 'b' " 124 "flag in the mode). This may lead to an incorrect " 125 "content-length. In Requests 3.0, support will be removed " 126 "for files in text mode."), 127 FileModeWarning 128 ) 129 130 if hasattr(o, 'tell'): 131 try: 132 current_position = o.tell() 133 except (OSError, IOError): 134 # This can happen in some weird situations, such as when the file 135 # is actually a special file descriptor like stdin. In this 136 # instance, we don't know what the length is, so set it to zero and 137 # let requests chunk it instead. 138 if total_length is not None: 139 current_position = total_length 140 else: 141 if hasattr(o, 'seek') and total_length is None: 142 # StringIO and BytesIO have seek but no useable fileno 143 try: 144 # seek to end of file 145 o.seek(0, 2) 146 total_length = o.tell() 147 148 # seek back to current position to support 149 # partially read file-like objects 150 o.seek(current_position or 0) 151 except (OSError, IOError): 152 total_length = 0 153 154 if total_length is None: 155 total_length = 0 156 157 return max(0, total_length - current_position) 158 159 160def get_netrc_auth(url, raise_errors=False): 161 """Returns the Requests tuple auth for a given url from netrc.""" 162 163 try: 164 from netrc import netrc, NetrcParseError 165 166 netrc_path = None 167 168 for f in NETRC_FILES: 169 try: 170 loc = os.path.expanduser('~/{0}'.format(f)) 171 except KeyError: 172 # os.path.expanduser can fail when $HOME is undefined and 173 # getpwuid fails. See http://bugs.python.org/issue20164 & 174 # https://github.com/requests/requests/issues/1846 175 return 176 177 if os.path.exists(loc): 178 netrc_path = loc 179 break 180 181 # Abort early if there isn't one. 182 if netrc_path is None: 183 return 184 185 ri = urlparse(url) 186 187 # Strip port numbers from netloc. This weird `if...encode`` dance is 188 # used for Python 3.2, which doesn't support unicode literals. 189 splitstr = b':' 190 if isinstance(url, str): 191 splitstr = splitstr.decode('ascii') 192 host = ri.netloc.split(splitstr)[0] 193 194 try: 195 _netrc = netrc(netrc_path).authenticators(host) 196 if _netrc: 197 # Return with login / password 198 login_i = (0 if _netrc[0] else 1) 199 return (_netrc[login_i], _netrc[2]) 200 except (NetrcParseError, IOError): 201 # If there was a parsing error or a permissions issue reading the file, 202 # we'll just skip netrc auth unless explicitly asked to raise errors. 203 if raise_errors: 204 raise 205 206 # AppEngine hackiness. 207 except (ImportError, AttributeError): 208 pass 209 210 211def guess_filename(obj): 212 """Tries to guess the filename of the given object.""" 213 name = getattr(obj, 'name', None) 214 if (name and isinstance(name, basestring) and name[0] != '<' and 215 name[-1] != '>'): 216 return os.path.basename(name) 217 218 219def from_key_val_list(value): 220 """Take an object and test to see if it can be represented as a 221 dictionary. Unless it can not be represented as such, return an 222 OrderedDict, e.g., 223 224 :: 225 226 >>> from_key_val_list([('key', 'val')]) 227 OrderedDict([('key', 'val')]) 228 >>> from_key_val_list('string') 229 ValueError: need more than 1 value to unpack 230 >>> from_key_val_list({'key': 'val'}) 231 OrderedDict([('key', 'val')]) 232 233 :rtype: OrderedDict 234 """ 235 if value is None: 236 return None 237 238 if isinstance(value, (str, bytes, bool, int)): 239 raise ValueError('cannot encode objects that are not 2-tuples') 240 241 return OrderedDict(value) 242 243 244def to_key_val_list(value): 245 """Take an object and test to see if it can be represented as a 246 dictionary. If it can be, return a list of tuples, e.g., 247 248 :: 249 250 >>> to_key_val_list([('key', 'val')]) 251 [('key', 'val')] 252 >>> to_key_val_list({'key': 'val'}) 253 [('key', 'val')] 254 >>> to_key_val_list('string') 255 ValueError: cannot encode objects that are not 2-tuples. 256 257 :rtype: list 258 """ 259 if value is None: 260 return None 261 262 if isinstance(value, (str, bytes, bool, int)): 263 raise ValueError('cannot encode objects that are not 2-tuples') 264 265 if isinstance(value, collections.Mapping): 266 value = value.items() 267 268 return list(value) 269 270 271# From mitsuhiko/werkzeug (used with permission). 272def parse_list_header(value): 273 """Parse lists as described by RFC 2068 Section 2. 274 275 In particular, parse comma-separated lists where the elements of 276 the list may include quoted-strings. A quoted-string could 277 contain a comma. A non-quoted string could have quotes in the 278 middle. Quotes are removed automatically after parsing. 279 280 It basically works like :func:`parse_set_header` just that items 281 may appear multiple times and case sensitivity is preserved. 282 283 The return value is a standard :class:`list`: 284 285 >>> parse_list_header('token, "quoted value"') 286 ['token', 'quoted value'] 287 288 To create a header from the :class:`list` again, use the 289 :func:`dump_header` function. 290 291 :param value: a string with a list header. 292 :return: :class:`list` 293 :rtype: list 294 """ 295 result = [] 296 for item in _parse_list_header(value): 297 if item[:1] == item[-1:] == '"': 298 item = unquote_header_value(item[1:-1]) 299 result.append(item) 300 return result 301 302 303# From mitsuhiko/werkzeug (used with permission). 304def parse_dict_header(value): 305 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and 306 convert them into a python dict: 307 308 >>> d = parse_dict_header('foo="is a fish", bar="as well"') 309 >>> type(d) is dict 310 True 311 >>> sorted(d.items()) 312 [('bar', 'as well'), ('foo', 'is a fish')] 313 314 If there is no value for a key it will be `None`: 315 316 >>> parse_dict_header('key_without_value') 317 {'key_without_value': None} 318 319 To create a header from the :class:`dict` again, use the 320 :func:`dump_header` function. 321 322 :param value: a string with a dict header. 323 :return: :class:`dict` 324 :rtype: dict 325 """ 326 result = {} 327 for item in _parse_list_header(value): 328 if '=' not in item: 329 result[item] = None 330 continue 331 name, value = item.split('=', 1) 332 if value[:1] == value[-1:] == '"': 333 value = unquote_header_value(value[1:-1]) 334 result[name] = value 335 return result 336 337 338# From mitsuhiko/werkzeug (used with permission). 339def unquote_header_value(value, is_filename=False): 340 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). 341 This does not use the real unquoting but what browsers are actually 342 using for quoting. 343 344 :param value: the header value to unquote. 345 :rtype: str 346 """ 347 if value and value[0] == value[-1] == '"': 348 # this is not the real unquoting, but fixing this so that the 349 # RFC is met will result in bugs with internet explorer and 350 # probably some other browsers as well. IE for example is 351 # uploading files with "C:\foo\bar.txt" as filename 352 value = value[1:-1] 353 354 # if this is a filename and the starting characters look like 355 # a UNC path, then just return the value without quotes. Using the 356 # replace sequence below on a UNC path has the effect of turning 357 # the leading double slash into a single slash and then 358 # _fix_ie_filename() doesn't work correctly. See #458. 359 if not is_filename or value[:2] != '\\\\': 360 return value.replace('\\\\', '\\').replace('\\"', '"') 361 return value 362 363 364def dict_from_cookiejar(cj): 365 """Returns a key/value dictionary from a CookieJar. 366 367 :param cj: CookieJar object to extract cookies from. 368 :rtype: dict 369 """ 370 371 cookie_dict = {} 372 373 for cookie in cj: 374 cookie_dict[cookie.name] = cookie.value 375 376 return cookie_dict 377 378 379def add_dict_to_cookiejar(cj, cookie_dict): 380 """Returns a CookieJar from a key/value dictionary. 381 382 :param cj: CookieJar to insert cookies into. 383 :param cookie_dict: Dict of key/values to insert into CookieJar. 384 :rtype: CookieJar 385 """ 386 387 return cookiejar_from_dict(cookie_dict, cj) 388 389 390def get_encodings_from_content(content): 391 """Returns encodings from given content string. 392 393 :param content: bytestring to extract encodings from. 394 """ 395 warnings.warn(( 396 'In requests 3.0, get_encodings_from_content will be removed. For ' 397 'more information, please see the discussion on issue #2266. (This' 398 ' warning should only appear once.)'), 399 DeprecationWarning) 400 401 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) 402 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I) 403 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') 404 405 return (charset_re.findall(content) + 406 pragma_re.findall(content) + 407 xml_re.findall(content)) 408 409 410def get_encoding_from_headers(headers): 411 """Returns encodings from given HTTP Header Dict. 412 413 :param headers: dictionary to extract encoding from. 414 :rtype: str 415 """ 416 417 content_type = headers.get('content-type') 418 419 if not content_type: 420 return None 421 422 content_type, params = cgi.parse_header(content_type) 423 424 if 'charset' in params: 425 return params['charset'].strip("'\"") 426 427 if 'text' in content_type: 428 return 'ISO-8859-1' 429 430 431def stream_decode_response_unicode(iterator, r): 432 """Stream decodes a iterator.""" 433 434 if r.encoding is None: 435 for item in iterator: 436 yield item 437 return 438 439 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') 440 for chunk in iterator: 441 rv = decoder.decode(chunk) 442 if rv: 443 yield rv 444 rv = decoder.decode(b'', final=True) 445 if rv: 446 yield rv 447 448 449def iter_slices(string, slice_length): 450 """Iterate over slices of a string.""" 451 pos = 0 452 if slice_length is None or slice_length <= 0: 453 slice_length = len(string) 454 while pos < len(string): 455 yield string[pos:pos + slice_length] 456 pos += slice_length 457 458 459def get_unicode_from_response(r): 460 """Returns the requested content back in unicode. 461 462 :param r: Response object to get unicode content from. 463 464 Tried: 465 466 1. charset from content-type 467 2. fall back and replace all unicode characters 468 469 :rtype: str 470 """ 471 warnings.warn(( 472 'In requests 3.0, get_unicode_from_response will be removed. For ' 473 'more information, please see the discussion on issue #2266. (This' 474 ' warning should only appear once.)'), 475 DeprecationWarning) 476 477 tried_encodings = [] 478 479 # Try charset from content-type 480 encoding = get_encoding_from_headers(r.headers) 481 482 if encoding: 483 try: 484 return str(r.content, encoding) 485 except UnicodeError: 486 tried_encodings.append(encoding) 487 488 # Fall back: 489 try: 490 return str(r.content, encoding, errors='replace') 491 except TypeError: 492 return r.content 493 494 495# The unreserved URI characters (RFC 3986) 496UNRESERVED_SET = frozenset( 497 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~") 498 499 500def unquote_unreserved(uri): 501 """Un-escape any percent-escape sequences in a URI that are unreserved 502 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. 503 504 :rtype: str 505 """ 506 parts = uri.split('%') 507 for i in range(1, len(parts)): 508 h = parts[i][0:2] 509 if len(h) == 2 and h.isalnum(): 510 try: 511 c = chr(int(h, 16)) 512 except ValueError: 513 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) 514 515 if c in UNRESERVED_SET: 516 parts[i] = c + parts[i][2:] 517 else: 518 parts[i] = '%' + parts[i] 519 else: 520 parts[i] = '%' + parts[i] 521 return ''.join(parts) 522 523 524def requote_uri(uri): 525 """Re-quote the given URI. 526 527 This function passes the given URI through an unquote/quote cycle to 528 ensure that it is fully and consistently quoted. 529 530 :rtype: str 531 """ 532 safe_with_percent = "!#$%&'()*+,/:;=?@[]~" 533 safe_without_percent = "!#$&'()*+,/:;=?@[]~" 534 try: 535 # Unquote only the unreserved characters 536 # Then quote only illegal characters (do not quote reserved, 537 # unreserved, or '%') 538 return quote(unquote_unreserved(uri), safe=safe_with_percent) 539 except InvalidURL: 540 # We couldn't unquote the given URI, so let's try quoting it, but 541 # there may be unquoted '%'s in the URI. We need to make sure they're 542 # properly quoted so they do not cause issues elsewhere. 543 return quote(uri, safe=safe_without_percent) 544 545 546def address_in_network(ip, net): 547 """This function allows you to check if an IP belongs to a network subnet 548 549 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24 550 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24 551 552 :rtype: bool 553 """ 554 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0] 555 netaddr, bits = net.split('/') 556 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0] 557 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask 558 return (ipaddr & netmask) == (network & netmask) 559 560 561def dotted_netmask(mask): 562 """Converts mask from /xx format to xxx.xxx.xxx.xxx 563 564 Example: if mask is 24 function returns 255.255.255.0 565 566 :rtype: str 567 """ 568 bits = 0xffffffff ^ (1 << 32 - mask) - 1 569 return socket.inet_ntoa(struct.pack('>I', bits)) 570 571 572def is_ipv4_address(string_ip): 573 """ 574 :rtype: bool 575 """ 576 try: 577 socket.inet_aton(string_ip) 578 except socket.error: 579 return False 580 return True 581 582 583def is_valid_cidr(string_network): 584 """ 585 Very simple check of the cidr format in no_proxy variable. 586 587 :rtype: bool 588 """ 589 if string_network.count('/') == 1: 590 try: 591 mask = int(string_network.split('/')[1]) 592 except ValueError: 593 return False 594 595 if mask < 1 or mask > 32: 596 return False 597 598 try: 599 socket.inet_aton(string_network.split('/')[0]) 600 except socket.error: 601 return False 602 else: 603 return False 604 return True 605 606 607@contextlib.contextmanager 608def set_environ(env_name, value): 609 """Set the environment variable 'env_name' to 'value' 610 611 Save previous value, yield, and then restore the previous value stored in 612 the environment variable 'env_name'. 613 614 If 'value' is None, do nothing""" 615 value_changed = value is not None 616 if value_changed: 617 old_value = os.environ.get(env_name) 618 os.environ[env_name] = value 619 try: 620 yield 621 finally: 622 if value_changed: 623 if old_value is None: 624 del os.environ[env_name] 625 else: 626 os.environ[env_name] = old_value 627 628 629def should_bypass_proxies(url, no_proxy): 630 """ 631 Returns whether we should bypass proxies or not. 632 633 :rtype: bool 634 """ 635 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) 636 637 # First check whether no_proxy is defined. If it is, check that the URL 638 # we're getting isn't in the no_proxy list. 639 no_proxy_arg = no_proxy 640 if no_proxy is None: 641 no_proxy = get_proxy('no_proxy') 642 netloc = urlparse(url).netloc 643 644 if no_proxy: 645 # We need to check whether we match here. We need to see if we match 646 # the end of the netloc, both with and without the port. 647 no_proxy = ( 648 host for host in no_proxy.replace(' ', '').split(',') if host 649 ) 650 651 ip = netloc.split(':')[0] 652 if is_ipv4_address(ip): 653 for proxy_ip in no_proxy: 654 if is_valid_cidr(proxy_ip): 655 if address_in_network(ip, proxy_ip): 656 return True 657 elif ip == proxy_ip: 658 # If no_proxy ip was defined in plain IP notation instead of cidr notation & 659 # matches the IP of the index 660 return True 661 else: 662 for host in no_proxy: 663 if netloc.endswith(host) or netloc.split(':')[0].endswith(host): 664 # The URL does match something in no_proxy, so we don't want 665 # to apply the proxies on this URL. 666 return True 667 668 # If the system proxy settings indicate that this URL should be bypassed, 669 # don't proxy. 670 # The proxy_bypass function is incredibly buggy on OS X in early versions 671 # of Python 2.6, so allow this call to fail. Only catch the specific 672 # exceptions we've seen, though: this call failing in other ways can reveal 673 # legitimate problems. 674 with set_environ('no_proxy', no_proxy_arg): 675 try: 676 bypass = proxy_bypass(netloc) 677 except (TypeError, socket.gaierror): 678 bypass = False 679 680 if bypass: 681 return True 682 683 return False 684 685 686def get_environ_proxies(url, no_proxy=None): 687 """ 688 Return a dict of environment proxies. 689 690 :rtype: dict 691 """ 692 if should_bypass_proxies(url, no_proxy=no_proxy): 693 return {} 694 else: 695 return getproxies() 696 697 698def select_proxy(url, proxies): 699 """Select a proxy for the url, if applicable. 700 701 :param url: The url being for the request 702 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs 703 """ 704 proxies = proxies or {} 705 urlparts = urlparse(url) 706 if urlparts.hostname is None: 707 return proxies.get(urlparts.scheme, proxies.get('all')) 708 709 proxy_keys = [ 710 urlparts.scheme + '://' + urlparts.hostname, 711 urlparts.scheme, 712 'all://' + urlparts.hostname, 713 'all', 714 ] 715 proxy = None 716 for proxy_key in proxy_keys: 717 if proxy_key in proxies: 718 proxy = proxies[proxy_key] 719 break 720 721 return proxy 722 723 724def default_user_agent(name="python-requests"): 725 """ 726 Return a string representing the default user agent. 727 728 :rtype: str 729 """ 730 return '%s/%s' % (name, __version__) 731 732 733def default_headers(): 734 """ 735 :rtype: requests.structures.CaseInsensitiveDict 736 """ 737 return CaseInsensitiveDict({ 738 'User-Agent': default_user_agent(), 739 'Accept-Encoding': ', '.join(('gzip', 'deflate')), 740 'Accept': '*/*', 741 'Connection': 'keep-alive', 742 }) 743 744 745def parse_header_links(value): 746 """Return a dict of parsed link headers proxies. 747 748 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg" 749 750 :rtype: list 751 """ 752 753 links = [] 754 755 replace_chars = ' \'"' 756 757 for val in re.split(', *<', value): 758 try: 759 url, params = val.split(';', 1) 760 except ValueError: 761 url, params = val, '' 762 763 link = {'url': url.strip('<> \'"')} 764 765 for param in params.split(';'): 766 try: 767 key, value = param.split('=') 768 except ValueError: 769 break 770 771 link[key.strip(replace_chars)] = value.strip(replace_chars) 772 773 links.append(link) 774 775 return links 776 777 778# Null bytes; no need to recreate these on each call to guess_json_utf 779_null = '\x00'.encode('ascii') # encoding to ASCII for Python 3 780_null2 = _null * 2 781_null3 = _null * 3 782 783 784def guess_json_utf(data): 785 """ 786 :rtype: str 787 """ 788 # JSON always starts with two ASCII characters, so detection is as 789 # easy as counting the nulls and from their location and count 790 # determine the encoding. Also detect a BOM, if present. 791 sample = data[:4] 792 if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): 793 return 'utf-32' # BOM included 794 if sample[:3] == codecs.BOM_UTF8: 795 return 'utf-8-sig' # BOM included, MS style (discouraged) 796 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): 797 return 'utf-16' # BOM included 798 nullcount = sample.count(_null) 799 if nullcount == 0: 800 return 'utf-8' 801 if nullcount == 2: 802 if sample[::2] == _null2: # 1st and 3rd are null 803 return 'utf-16-be' 804 if sample[1::2] == _null2: # 2nd and 4th are null 805 return 'utf-16-le' 806 # Did not detect 2 valid UTF-16 ascii-range characters 807 if nullcount == 3: 808 if sample[:3] == _null3: 809 return 'utf-32-be' 810 if sample[1:] == _null3: 811 return 'utf-32-le' 812 # Did not detect a valid UTF-32 ascii-range character 813 return None 814 815 816def prepend_scheme_if_needed(url, new_scheme): 817 """Given a URL that may or may not have a scheme, prepend the given scheme. 818 Does not replace a present scheme with the one provided as an argument. 819 820 :rtype: str 821 """ 822 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) 823 824 # urlparse is a finicky beast, and sometimes decides that there isn't a 825 # netloc present. Assume that it's being over-cautious, and switch netloc 826 # and path if urlparse decided there was no netloc. 827 if not netloc: 828 netloc, path = path, netloc 829 830 return urlunparse((scheme, netloc, path, params, query, fragment)) 831 832 833def get_auth_from_url(url): 834 """Given a url with authentication components, extract them into a tuple of 835 username,password. 836 837 :rtype: (str,str) 838 """ 839 parsed = urlparse(url) 840 841 try: 842 auth = (unquote(parsed.username), unquote(parsed.password)) 843 except (AttributeError, TypeError): 844 auth = ('', '') 845 846 return auth 847 848 849# Moved outside of function to avoid recompile every call 850_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$') 851_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$') 852 853 854def check_header_validity(header): 855 """Verifies that header value is a string which doesn't contain 856 leading whitespace or return characters. This prevents unintended 857 header injection. 858 859 :param header: tuple, in the format (name, value). 860 """ 861 name, value = header 862 863 if isinstance(value, bytes): 864 pat = _CLEAN_HEADER_REGEX_BYTE 865 else: 866 pat = _CLEAN_HEADER_REGEX_STR 867 try: 868 if not pat.match(value): 869 raise InvalidHeader("Invalid return character or leading space in header: %s" % name) 870 except TypeError: 871 raise InvalidHeader("Value for header {%s: %s} must be of type str or " 872 "bytes, not %s" % (name, value, type(value))) 873 874 875def urldefragauth(url): 876 """ 877 Given a url remove the fragment and the authentication part. 878 879 :rtype: str 880 """ 881 scheme, netloc, path, params, query, fragment = urlparse(url) 882 883 # see func:`prepend_scheme_if_needed` 884 if not netloc: 885 netloc, path = path, netloc 886 887 netloc = netloc.rsplit('@', 1)[-1] 888 889 return urlunparse((scheme, netloc, path, params, query, '')) 890 891 892def rewind_body(prepared_request): 893 """Move file pointer back to its recorded starting position 894 so it can be read again on redirect. 895 """ 896 body_seek = getattr(prepared_request.body, 'seek', None) 897 if body_seek is not None and isinstance(prepared_request._body_position, integer_types): 898 try: 899 body_seek(prepared_request._body_position) 900 except (IOError, OSError): 901 raise UnrewindableBodyError("An error occurred when rewinding request " 902 "body for redirect.") 903 else: 904 raise UnrewindableBodyError("Unable to rewind request body for redirect.") 905