1# -*- coding: utf-8 -*- 2 3""" 4requests.session 5~~~~~~~~~~~~~~~~ 6 7This module provides a Session object to manage and persist settings across 8requests (cookies, auth, proxies). 9""" 10import os 11from collections import Mapping 12from datetime import datetime 13 14from .auth import _basic_auth_str 15from .compat import cookielib, OrderedDict, urljoin, urlparse 16from .cookies import ( 17 cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) 18from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT 19from .hooks import default_hooks, dispatch_hook 20from ._internal_utils import to_native_string 21from .utils import to_key_val_list, default_headers 22from .exceptions import ( 23 TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) 24from .packages.urllib3._collections import RecentlyUsedContainer 25from .structures import CaseInsensitiveDict 26 27from .adapters import HTTPAdapter 28 29from .utils import ( 30 requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, 31 get_auth_from_url, rewind_body 32) 33 34from .status_codes import codes 35 36# formerly defined here, reexposed here for backward compatibility 37from .models import REDIRECT_STATI 38 39REDIRECT_CACHE_SIZE = 1000 40 41 42def merge_setting(request_setting, session_setting, dict_class=OrderedDict): 43 """Determines appropriate setting for a given request, taking into account 44 the explicit setting on that request, and the setting in the session. If a 45 setting is a dictionary, they will be merged together using `dict_class` 46 """ 47 48 if session_setting is None: 49 return request_setting 50 51 if request_setting is None: 52 return session_setting 53 54 # Bypass if not a dictionary (e.g. verify) 55 if not ( 56 isinstance(session_setting, Mapping) and 57 isinstance(request_setting, Mapping) 58 ): 59 return request_setting 60 61 merged_setting = dict_class(to_key_val_list(session_setting)) 62 merged_setting.update(to_key_val_list(request_setting)) 63 64 # Remove keys that are set to None. Extract keys first to avoid altering 65 # the dictionary during iteration. 66 none_keys = [k for (k, v) in merged_setting.items() if v is None] 67 for key in none_keys: 68 del merged_setting[key] 69 70 return merged_setting 71 72 73def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict): 74 """Properly merges both requests and session hooks. 75 76 This is necessary because when request_hooks == {'response': []}, the 77 merge breaks Session hooks entirely. 78 """ 79 if session_hooks is None or session_hooks.get('response') == []: 80 return request_hooks 81 82 if request_hooks is None or request_hooks.get('response') == []: 83 return session_hooks 84 85 return merge_setting(request_hooks, session_hooks, dict_class) 86 87 88class SessionRedirectMixin(object): 89 def resolve_redirects(self, resp, req, stream=False, timeout=None, 90 verify=True, cert=None, proxies=None, **adapter_kwargs): 91 """Receives a Response. Returns a generator of Responses.""" 92 93 i = 0 94 hist = [] # keep track of history 95 96 while resp.is_redirect: 97 prepared_request = req.copy() 98 99 if i > 0: 100 # Update history and keep track of redirects. 101 hist.append(resp) 102 new_hist = list(hist) 103 resp.history = new_hist 104 105 try: 106 resp.content # Consume socket so it can be released 107 except (ChunkedEncodingError, ContentDecodingError, RuntimeError): 108 resp.raw.read(decode_content=False) 109 110 if i >= self.max_redirects: 111 raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects, response=resp) 112 113 # Release the connection back into the pool. 114 resp.close() 115 116 url = resp.headers['location'] 117 118 # Handle redirection without scheme (see: RFC 1808 Section 4) 119 if url.startswith('//'): 120 parsed_rurl = urlparse(resp.url) 121 url = '%s:%s' % (parsed_rurl.scheme, url) 122 123 # The scheme should be lower case... 124 parsed = urlparse(url) 125 url = parsed.geturl() 126 127 # Facilitate relative 'location' headers, as allowed by RFC 7231. 128 # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') 129 # Compliant with RFC3986, we percent encode the url. 130 if not parsed.netloc: 131 url = urljoin(resp.url, requote_uri(url)) 132 else: 133 url = requote_uri(url) 134 135 prepared_request.url = to_native_string(url) 136 # Cache the url, unless it redirects to itself. 137 if resp.is_permanent_redirect and req.url != prepared_request.url: 138 self.redirect_cache[req.url] = prepared_request.url 139 140 self.rebuild_method(prepared_request, resp) 141 142 # https://github.com/kennethreitz/requests/issues/1084 143 if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): 144 # https://github.com/kennethreitz/requests/issues/3490 145 purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding') 146 for header in purged_headers: 147 prepared_request.headers.pop(header, None) 148 prepared_request.body = None 149 150 headers = prepared_request.headers 151 try: 152 del headers['Cookie'] 153 except KeyError: 154 pass 155 156 # Extract any cookies sent on the response to the cookiejar 157 # in the new request. Because we've mutated our copied prepared 158 # request, use the old one that we haven't yet touched. 159 extract_cookies_to_jar(prepared_request._cookies, req, resp.raw) 160 merge_cookies(prepared_request._cookies, self.cookies) 161 prepared_request.prepare_cookies(prepared_request._cookies) 162 163 # Rebuild auth and proxy information. 164 proxies = self.rebuild_proxies(prepared_request, proxies) 165 self.rebuild_auth(prepared_request, resp) 166 167 # A failed tell() sets `_body_position` to `object()`. This non-None 168 # value ensures `rewindable` will be True, allowing us to raise an 169 # UnrewindableBodyError, instead of hanging the connection. 170 rewindable = ( 171 prepared_request._body_position is not None and 172 ('Content-Length' in headers or 'Transfer-Encoding' in headers) 173 ) 174 175 # Attempt to rewind consumed file-like object. 176 if rewindable: 177 rewind_body(prepared_request) 178 179 # Override the original request. 180 req = prepared_request 181 182 resp = self.send( 183 req, 184 stream=stream, 185 timeout=timeout, 186 verify=verify, 187 cert=cert, 188 proxies=proxies, 189 allow_redirects=False, 190 **adapter_kwargs 191 ) 192 193 extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) 194 195 i += 1 196 yield resp 197 198 def rebuild_auth(self, prepared_request, response): 199 """When being redirected we may want to strip authentication from the 200 request to avoid leaking credentials. This method intelligently removes 201 and reapplies authentication where possible to avoid credential loss. 202 """ 203 headers = prepared_request.headers 204 url = prepared_request.url 205 206 if 'Authorization' in headers: 207 # If we get redirected to a new host, we should strip out any 208 # authentication headers. 209 original_parsed = urlparse(response.request.url) 210 redirect_parsed = urlparse(url) 211 212 if (original_parsed.hostname != redirect_parsed.hostname): 213 del headers['Authorization'] 214 215 # .netrc might have more auth for us on our new host. 216 new_auth = get_netrc_auth(url) if self.trust_env else None 217 if new_auth is not None: 218 prepared_request.prepare_auth(new_auth) 219 220 return 221 222 def rebuild_proxies(self, prepared_request, proxies): 223 """This method re-evaluates the proxy configuration by considering the 224 environment variables. If we are redirected to a URL covered by 225 NO_PROXY, we strip the proxy configuration. Otherwise, we set missing 226 proxy keys for this URL (in case they were stripped by a previous 227 redirect). 228 229 This method also replaces the Proxy-Authorization header where 230 necessary. 231 232 :rtype: dict 233 """ 234 headers = prepared_request.headers 235 url = prepared_request.url 236 scheme = urlparse(url).scheme 237 new_proxies = proxies.copy() if proxies is not None else {} 238 239 if self.trust_env and not should_bypass_proxies(url): 240 environ_proxies = get_environ_proxies(url) 241 242 proxy = environ_proxies.get(scheme, environ_proxies.get('all')) 243 244 if proxy: 245 new_proxies.setdefault(scheme, proxy) 246 247 if 'Proxy-Authorization' in headers: 248 del headers['Proxy-Authorization'] 249 250 try: 251 username, password = get_auth_from_url(new_proxies[scheme]) 252 except KeyError: 253 username, password = None, None 254 255 if username and password: 256 headers['Proxy-Authorization'] = _basic_auth_str(username, password) 257 258 return new_proxies 259 260 def rebuild_method(self, prepared_request, response): 261 """When being redirected we may want to change the method of the request 262 based on certain specs or browser behavior. 263 """ 264 method = prepared_request.method 265 266 # http://tools.ietf.org/html/rfc7231#section-6.4.4 267 if response.status_code == codes.see_other and method != 'HEAD': 268 method = 'GET' 269 270 # Do what the browsers do, despite standards... 271 # First, turn 302s into GETs. 272 if response.status_code == codes.found and method != 'HEAD': 273 method = 'GET' 274 275 # Second, if a POST is responded to with a 301, turn it into a GET. 276 # This bizarre behaviour is explained in Issue 1704. 277 if response.status_code == codes.moved and method == 'POST': 278 method = 'GET' 279 280 prepared_request.method = method 281 282 283class Session(SessionRedirectMixin): 284 """A Requests session. 285 286 Provides cookie persistence, connection-pooling, and configuration. 287 288 Basic Usage:: 289 290 >>> import requests 291 >>> s = requests.Session() 292 >>> s.get('http://httpbin.org/get') 293 <Response [200]> 294 295 Or as a context manager:: 296 297 >>> with requests.Session() as s: 298 >>> s.get('http://httpbin.org/get') 299 <Response [200]> 300 """ 301 302 __attrs__ = [ 303 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', 304 'cert', 'prefetch', 'adapters', 'stream', 'trust_env', 305 'max_redirects', 306 ] 307 308 def __init__(self): 309 310 #: A case-insensitive dictionary of headers to be sent on each 311 #: :class:`Request <Request>` sent from this 312 #: :class:`Session <Session>`. 313 self.headers = default_headers() 314 315 #: Default Authentication tuple or object to attach to 316 #: :class:`Request <Request>`. 317 self.auth = None 318 319 #: Dictionary mapping protocol or protocol and host to the URL of the proxy 320 #: (e.g. {'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}) to 321 #: be used on each :class:`Request <Request>`. 322 self.proxies = {} 323 324 #: Event-handling hooks. 325 self.hooks = default_hooks() 326 327 #: Dictionary of querystring data to attach to each 328 #: :class:`Request <Request>`. The dictionary values may be lists for 329 #: representing multivalued query parameters. 330 self.params = {} 331 332 #: Stream response content default. 333 self.stream = False 334 335 #: SSL Verification default. 336 self.verify = True 337 338 #: SSL client certificate default. 339 self.cert = None 340 341 #: Maximum number of redirects allowed. If the request exceeds this 342 #: limit, a :class:`TooManyRedirects` exception is raised. 343 #: This defaults to requests.models.DEFAULT_REDIRECT_LIMIT, which is 344 #: 30. 345 self.max_redirects = DEFAULT_REDIRECT_LIMIT 346 347 #: Trust environment settings for proxy configuration, default 348 #: authentication and similar. 349 self.trust_env = True 350 351 #: A CookieJar containing all currently outstanding cookies set on this 352 #: session. By default it is a 353 #: :class:`RequestsCookieJar <requests.cookies.RequestsCookieJar>`, but 354 #: may be any other ``cookielib.CookieJar`` compatible object. 355 self.cookies = cookiejar_from_dict({}) 356 357 # Default connection adapters. 358 self.adapters = OrderedDict() 359 self.mount('https://', HTTPAdapter()) 360 self.mount('http://', HTTPAdapter()) 361 362 # Only store 1000 redirects to prevent using infinite memory 363 self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) 364 365 def __enter__(self): 366 return self 367 368 def __exit__(self, *args): 369 self.close() 370 371 def prepare_request(self, request): 372 """Constructs a :class:`PreparedRequest <PreparedRequest>` for 373 transmission and returns it. The :class:`PreparedRequest` has settings 374 merged from the :class:`Request <Request>` instance and those of the 375 :class:`Session`. 376 377 :param request: :class:`Request` instance to prepare with this 378 session's settings. 379 :rtype: requests.PreparedRequest 380 """ 381 cookies = request.cookies or {} 382 383 # Bootstrap CookieJar. 384 if not isinstance(cookies, cookielib.CookieJar): 385 cookies = cookiejar_from_dict(cookies) 386 387 # Merge with session cookies 388 merged_cookies = merge_cookies( 389 merge_cookies(RequestsCookieJar(), self.cookies), cookies) 390 391 # Set environment's basic authentication if not explicitly set. 392 auth = request.auth 393 if self.trust_env and not auth and not self.auth: 394 auth = get_netrc_auth(request.url) 395 396 p = PreparedRequest() 397 p.prepare( 398 method=request.method.upper(), 399 url=request.url, 400 files=request.files, 401 data=request.data, 402 json=request.json, 403 headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), 404 params=merge_setting(request.params, self.params), 405 auth=merge_setting(auth, self.auth), 406 cookies=merged_cookies, 407 hooks=merge_hooks(request.hooks, self.hooks), 408 ) 409 return p 410 411 def request(self, method, url, 412 params=None, 413 data=None, 414 headers=None, 415 cookies=None, 416 files=None, 417 auth=None, 418 timeout=None, 419 allow_redirects=True, 420 proxies=None, 421 hooks=None, 422 stream=None, 423 verify=None, 424 cert=None, 425 json=None): 426 """Constructs a :class:`Request <Request>`, prepares it and sends it. 427 Returns :class:`Response <Response>` object. 428 429 :param method: method for the new :class:`Request` object. 430 :param url: URL for the new :class:`Request` object. 431 :param params: (optional) Dictionary or bytes to be sent in the query 432 string for the :class:`Request`. 433 :param data: (optional) Dictionary, bytes, or file-like object to send 434 in the body of the :class:`Request`. 435 :param json: (optional) json to send in the body of the 436 :class:`Request`. 437 :param headers: (optional) Dictionary of HTTP Headers to send with the 438 :class:`Request`. 439 :param cookies: (optional) Dict or CookieJar object to send with the 440 :class:`Request`. 441 :param files: (optional) Dictionary of ``'filename': file-like-objects`` 442 for multipart encoding upload. 443 :param auth: (optional) Auth tuple or callable to enable 444 Basic/Digest/Custom HTTP Auth. 445 :param timeout: (optional) How long to wait for the server to send 446 data before giving up, as a float, or a :ref:`(connect timeout, 447 read timeout) <timeouts>` tuple. 448 :type timeout: float or tuple 449 :param allow_redirects: (optional) Set to True by default. 450 :type allow_redirects: bool 451 :param proxies: (optional) Dictionary mapping protocol or protocol and 452 hostname to the URL of the proxy. 453 :param stream: (optional) whether to immediately download the response 454 content. Defaults to ``False``. 455 :param verify: (optional) whether the SSL cert will be verified. 456 A CA_BUNDLE path can also be provided. Defaults to ``True``. 457 :param cert: (optional) if String, path to ssl client cert file (.pem). 458 If Tuple, ('cert', 'key') pair. 459 :rtype: requests.Response 460 """ 461 # Create the Request. 462 req = Request( 463 method = method.upper(), 464 url = url, 465 headers = headers, 466 files = files, 467 data = data or {}, 468 json = json, 469 params = params or {}, 470 auth = auth, 471 cookies = cookies, 472 hooks = hooks, 473 ) 474 prep = self.prepare_request(req) 475 476 proxies = proxies or {} 477 478 settings = self.merge_environment_settings( 479 prep.url, proxies, stream, verify, cert 480 ) 481 482 # Send the request. 483 send_kwargs = { 484 'timeout': timeout, 485 'allow_redirects': allow_redirects, 486 } 487 send_kwargs.update(settings) 488 resp = self.send(prep, **send_kwargs) 489 490 return resp 491 492 def get(self, url, **kwargs): 493 """Sends a GET request. Returns :class:`Response` object. 494 495 :param url: URL for the new :class:`Request` object. 496 :param \*\*kwargs: Optional arguments that ``request`` takes. 497 :rtype: requests.Response 498 """ 499 500 kwargs.setdefault('allow_redirects', True) 501 return self.request('GET', url, **kwargs) 502 503 def options(self, url, **kwargs): 504 """Sends a OPTIONS request. Returns :class:`Response` object. 505 506 :param url: URL for the new :class:`Request` object. 507 :param \*\*kwargs: Optional arguments that ``request`` takes. 508 :rtype: requests.Response 509 """ 510 511 kwargs.setdefault('allow_redirects', True) 512 return self.request('OPTIONS', url, **kwargs) 513 514 def head(self, url, **kwargs): 515 """Sends a HEAD request. Returns :class:`Response` object. 516 517 :param url: URL for the new :class:`Request` object. 518 :param \*\*kwargs: Optional arguments that ``request`` takes. 519 :rtype: requests.Response 520 """ 521 522 kwargs.setdefault('allow_redirects', False) 523 return self.request('HEAD', url, **kwargs) 524 525 def post(self, url, data=None, json=None, **kwargs): 526 """Sends a POST request. Returns :class:`Response` object. 527 528 :param url: URL for the new :class:`Request` object. 529 :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 530 :param json: (optional) json to send in the body of the :class:`Request`. 531 :param \*\*kwargs: Optional arguments that ``request`` takes. 532 :rtype: requests.Response 533 """ 534 535 return self.request('POST', url, data=data, json=json, **kwargs) 536 537 def put(self, url, data=None, **kwargs): 538 """Sends a PUT request. Returns :class:`Response` object. 539 540 :param url: URL for the new :class:`Request` object. 541 :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 542 :param \*\*kwargs: Optional arguments that ``request`` takes. 543 :rtype: requests.Response 544 """ 545 546 return self.request('PUT', url, data=data, **kwargs) 547 548 def patch(self, url, data=None, **kwargs): 549 """Sends a PATCH request. Returns :class:`Response` object. 550 551 :param url: URL for the new :class:`Request` object. 552 :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 553 :param \*\*kwargs: Optional arguments that ``request`` takes. 554 :rtype: requests.Response 555 """ 556 557 return self.request('PATCH', url, data=data, **kwargs) 558 559 def delete(self, url, **kwargs): 560 """Sends a DELETE request. Returns :class:`Response` object. 561 562 :param url: URL for the new :class:`Request` object. 563 :param \*\*kwargs: Optional arguments that ``request`` takes. 564 :rtype: requests.Response 565 """ 566 567 return self.request('DELETE', url, **kwargs) 568 569 def send(self, request, **kwargs): 570 """ 571 Send a given PreparedRequest. 572 573 :rtype: requests.Response 574 """ 575 # Set defaults that the hooks can utilize to ensure they always have 576 # the correct parameters to reproduce the previous request. 577 kwargs.setdefault('stream', self.stream) 578 kwargs.setdefault('verify', self.verify) 579 kwargs.setdefault('cert', self.cert) 580 kwargs.setdefault('proxies', self.proxies) 581 582 # It's possible that users might accidentally send a Request object. 583 # Guard against that specific failure case. 584 if isinstance(request, Request): 585 raise ValueError('You can only send PreparedRequests.') 586 587 # Set up variables needed for resolve_redirects and dispatching of hooks 588 allow_redirects = kwargs.pop('allow_redirects', True) 589 stream = kwargs.get('stream') 590 hooks = request.hooks 591 592 # Resolve URL in redirect cache, if available. 593 if allow_redirects: 594 checked_urls = set() 595 while request.url in self.redirect_cache: 596 checked_urls.add(request.url) 597 new_url = self.redirect_cache.get(request.url) 598 if new_url in checked_urls: 599 break 600 request.url = new_url 601 602 # Get the appropriate adapter to use 603 adapter = self.get_adapter(url=request.url) 604 605 # Start time (approximately) of the request 606 start = datetime.utcnow() 607 608 # Send the request 609 r = adapter.send(request, **kwargs) 610 611 # Total elapsed time of the request (approximately) 612 r.elapsed = datetime.utcnow() - start 613 614 # Response manipulation hooks 615 r = dispatch_hook('response', hooks, r, **kwargs) 616 617 # Persist cookies 618 if r.history: 619 620 # If the hooks create history then we want those cookies too 621 for resp in r.history: 622 extract_cookies_to_jar(self.cookies, resp.request, resp.raw) 623 624 extract_cookies_to_jar(self.cookies, request, r.raw) 625 626 # Redirect resolving generator. 627 gen = self.resolve_redirects(r, request, **kwargs) 628 629 # Resolve redirects if allowed. 630 history = [resp for resp in gen] if allow_redirects else [] 631 632 # Shuffle things around if there's history. 633 if history: 634 # Insert the first (original) request at the start 635 history.insert(0, r) 636 # Get the last request made 637 r = history.pop() 638 r.history = history 639 640 if not stream: 641 r.content 642 643 return r 644 645 def merge_environment_settings(self, url, proxies, stream, verify, cert): 646 """ 647 Check the environment and merge it with some settings. 648 649 :rtype: dict 650 """ 651 # Gather clues from the surrounding environment. 652 if self.trust_env: 653 # Set environment's proxies. 654 env_proxies = get_environ_proxies(url) or {} 655 for (k, v) in env_proxies.items(): 656 proxies.setdefault(k, v) 657 658 # Look for requests environment configuration and be compatible 659 # with cURL. 660 if verify is True or verify is None: 661 verify = (os.environ.get('REQUESTS_CA_BUNDLE') or 662 os.environ.get('CURL_CA_BUNDLE')) 663 664 # Merge all the kwargs. 665 proxies = merge_setting(proxies, self.proxies) 666 stream = merge_setting(stream, self.stream) 667 verify = merge_setting(verify, self.verify) 668 cert = merge_setting(cert, self.cert) 669 670 return {'verify': verify, 'proxies': proxies, 'stream': stream, 671 'cert': cert} 672 673 def get_adapter(self, url): 674 """ 675 Returns the appropriate connection adapter for the given URL. 676 677 :rtype: requests.adapters.BaseAdapter 678 """ 679 for (prefix, adapter) in self.adapters.items(): 680 681 if url.lower().startswith(prefix): 682 return adapter 683 684 # Nothing matches :-/ 685 raise InvalidSchema("No connection adapters were found for '%s'" % url) 686 687 def close(self): 688 """Closes all adapters and as such the session""" 689 for v in self.adapters.values(): 690 v.close() 691 692 def mount(self, prefix, adapter): 693 """Registers a connection adapter to a prefix. 694 695 Adapters are sorted in descending order by key length. 696 """ 697 self.adapters[prefix] = adapter 698 keys_to_move = [k for k in self.adapters if len(k) < len(prefix)] 699 700 for key in keys_to_move: 701 self.adapters[key] = self.adapters.pop(key) 702 703 def __getstate__(self): 704 state = dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) 705 state['redirect_cache'] = dict(self.redirect_cache) 706 return state 707 708 def __setstate__(self, state): 709 redirect_cache = state.pop('redirect_cache', {}) 710 for attr, value in state.items(): 711 setattr(self, attr, value) 712 713 self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) 714 for redirect, to in redirect_cache.items(): 715 self.redirect_cache[redirect] = to 716 717 718def session(): 719 """ 720 Returns a :class:`Session` for context-management. 721 722 :rtype: Session 723 """ 724 725 return Session() 726