1"""Blocking and non-blocking HTTP client interfaces. 2 3This module defines a common interface shared by two implementations, 4``simple_httpclient`` and ``curl_httpclient``. Applications may either 5instantiate their chosen implementation class directly or use the 6`AsyncHTTPClient` class from this module, which selects an implementation 7that can be overridden with the `AsyncHTTPClient.configure` method. 8 9The default implementation is ``simple_httpclient``, and this is expected 10to be suitable for most users' needs. However, some applications may wish 11to switch to ``curl_httpclient`` for reasons such as the following: 12 13* ``curl_httpclient`` has some features not found in ``simple_httpclient``, 14 including support for HTTP proxies and the ability to use a specified 15 network interface. 16 17* ``curl_httpclient`` is more likely to be compatible with sites that are 18 not-quite-compliant with the HTTP spec, or sites that use little-exercised 19 features of HTTP. 20 21* ``curl_httpclient`` is faster. 22 23* ``curl_httpclient`` was the default prior to Tornado 2.0. 24 25Note that if you are using ``curl_httpclient``, it is highly 26recommended that you use a recent version of ``libcurl`` and 27``pycurl``. Currently the minimum supported version of libcurl is 287.22.0, and the minimum version of pycurl is 7.18.2. It is highly 29recommended that your ``libcurl`` installation is built with 30asynchronous DNS resolver (threaded or c-ares), otherwise you may 31encounter various problems with request timeouts (for more 32information, see 33http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUTMS 34and comments in curl_httpclient.py). 35 36To select ``curl_httpclient``, call `AsyncHTTPClient.configure` at startup:: 37 38 AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") 39""" 40# pylint: skip-file 41 42from __future__ import absolute_import, division, print_function 43 44import functools 45import time 46import weakref 47 48from salt.ext.tornado.concurrent import TracebackFuture 49from salt.ext.tornado.escape import utf8, native_str 50from salt.ext.tornado import httputil, stack_context 51from salt.ext.tornado.ioloop import IOLoop 52from salt.ext.tornado.util import Configurable 53 54 55class HTTPClient(object): 56 """A blocking HTTP client. 57 58 This interface is provided for convenience and testing; most applications 59 that are running an IOLoop will want to use `AsyncHTTPClient` instead. 60 Typical usage looks like this:: 61 62 http_client = httpclient.HTTPClient() 63 try: 64 response = http_client.fetch("http://www.google.com/") 65 print(response.body) 66 except httpclient.HTTPError as e: 67 # HTTPError is raised for non-200 responses; the response 68 # can be found in e.response. 69 print("Error: " + str(e)) 70 except Exception as e: 71 # Other errors are possible, such as IOError. 72 print("Error: " + str(e)) 73 http_client.close() 74 """ 75 def __init__(self, async_client_class=None, **kwargs): 76 self._io_loop = IOLoop(make_current=False) 77 if async_client_class is None: 78 async_client_class = AsyncHTTPClient 79 self._async_client = async_client_class(self._io_loop, **kwargs) 80 self._closed = False 81 82 def __del__(self): 83 self.close() 84 85 def close(self): 86 """Closes the HTTPClient, freeing any resources used.""" 87 if not self._closed: 88 self._async_client.close() 89 self._io_loop.close() 90 self._closed = True 91 92 def fetch(self, request, **kwargs): 93 """Executes a request, returning an `HTTPResponse`. 94 95 The request may be either a string URL or an `HTTPRequest` object. 96 If it is a string, we construct an `HTTPRequest` using any additional 97 kwargs: ``HTTPRequest(request, **kwargs)`` 98 99 If an error occurs during the fetch, we raise an `HTTPError` unless 100 the ``raise_error`` keyword argument is set to False. 101 """ 102 response = self._io_loop.run_sync(functools.partial( 103 self._async_client.fetch, request, **kwargs)) 104 return response 105 106 107class AsyncHTTPClient(Configurable): 108 """An non-blocking HTTP client. 109 110 Example usage:: 111 112 def handle_response(response): 113 if response.error: 114 print("Error: %s" % response.error) 115 else: 116 print(response.body) 117 118 http_client = AsyncHTTPClient() 119 http_client.fetch("http://www.google.com/", handle_response) 120 121 The constructor for this class is magic in several respects: It 122 actually creates an instance of an implementation-specific 123 subclass, and instances are reused as a kind of pseudo-singleton 124 (one per `.IOLoop`). The keyword argument ``force_instance=True`` 125 can be used to suppress this singleton behavior. Unless 126 ``force_instance=True`` is used, no arguments other than 127 ``io_loop`` should be passed to the `AsyncHTTPClient` constructor. 128 The implementation subclass as well as arguments to its 129 constructor can be set with the static method `configure()` 130 131 All `AsyncHTTPClient` implementations support a ``defaults`` 132 keyword argument, which can be used to set default values for 133 `HTTPRequest` attributes. For example:: 134 135 AsyncHTTPClient.configure( 136 None, defaults=dict(user_agent="MyUserAgent")) 137 # or with force_instance: 138 client = AsyncHTTPClient(force_instance=True, 139 defaults=dict(user_agent="MyUserAgent")) 140 141 .. versionchanged:: 4.1 142 The ``io_loop`` argument is deprecated. 143 """ 144 @classmethod 145 def configurable_base(cls): 146 return AsyncHTTPClient 147 148 @classmethod 149 def configurable_default(cls): 150 from salt.ext.tornado.simple_httpclient import SimpleAsyncHTTPClient 151 return SimpleAsyncHTTPClient 152 153 @classmethod 154 def _async_clients(cls): 155 attr_name = '_async_client_dict_' + cls.__name__ 156 if not hasattr(cls, attr_name): 157 setattr(cls, attr_name, weakref.WeakKeyDictionary()) 158 return getattr(cls, attr_name) 159 160 def __new__(cls, io_loop=None, force_instance=False, **kwargs): 161 io_loop = io_loop or IOLoop.current() 162 if force_instance: 163 instance_cache = None 164 else: 165 instance_cache = cls._async_clients() 166 if instance_cache is not None and io_loop in instance_cache: 167 return instance_cache[io_loop] 168 instance = super(AsyncHTTPClient, cls).__new__(cls, io_loop=io_loop, 169 **kwargs) 170 # Make sure the instance knows which cache to remove itself from. 171 # It can't simply call _async_clients() because we may be in 172 # __new__(AsyncHTTPClient) but instance.__class__ may be 173 # SimpleAsyncHTTPClient. 174 instance._instance_cache = instance_cache 175 if instance_cache is not None: 176 instance_cache[instance.io_loop] = instance 177 return instance 178 179 def initialize(self, io_loop, defaults=None): 180 self.io_loop = io_loop 181 self.defaults = dict(HTTPRequest._DEFAULTS) 182 if defaults is not None: 183 self.defaults.update(defaults) 184 self._closed = False 185 186 def close(self): 187 """Destroys this HTTP client, freeing any file descriptors used. 188 189 This method is **not needed in normal use** due to the way 190 that `AsyncHTTPClient` objects are transparently reused. 191 ``close()`` is generally only necessary when either the 192 `.IOLoop` is also being closed, or the ``force_instance=True`` 193 argument was used when creating the `AsyncHTTPClient`. 194 195 No other methods may be called on the `AsyncHTTPClient` after 196 ``close()``. 197 198 """ 199 if self._closed: 200 return 201 self._closed = True 202 if self._instance_cache is not None: 203 if self._instance_cache.get(self.io_loop) is not self: 204 raise RuntimeError("inconsistent AsyncHTTPClient cache") 205 del self._instance_cache[self.io_loop] 206 207 def fetch(self, request, callback=None, raise_error=True, **kwargs): 208 """Executes a request, asynchronously returning an `HTTPResponse`. 209 210 The request may be either a string URL or an `HTTPRequest` object. 211 If it is a string, we construct an `HTTPRequest` using any additional 212 kwargs: ``HTTPRequest(request, **kwargs)`` 213 214 This method returns a `.Future` whose result is an 215 `HTTPResponse`. By default, the ``Future`` will raise an 216 `HTTPError` if the request returned a non-200 response code 217 (other errors may also be raised if the server could not be 218 contacted). Instead, if ``raise_error`` is set to False, the 219 response will always be returned regardless of the response 220 code. 221 222 If a ``callback`` is given, it will be invoked with the `HTTPResponse`. 223 In the callback interface, `HTTPError` is not automatically raised. 224 Instead, you must check the response's ``error`` attribute or 225 call its `~HTTPResponse.rethrow` method. 226 """ 227 if self._closed: 228 raise RuntimeError("fetch() called on closed AsyncHTTPClient") 229 if not isinstance(request, HTTPRequest): 230 request = HTTPRequest(url=request, **kwargs) 231 else: 232 if kwargs: 233 raise ValueError("kwargs can't be used if request is an HTTPRequest object") 234 # We may modify this (to add Host, Accept-Encoding, etc), 235 # so make sure we don't modify the caller's object. This is also 236 # where normal dicts get converted to HTTPHeaders objects. 237 request.headers = httputil.HTTPHeaders(request.headers) 238 request = _RequestProxy(request, self.defaults) 239 future = TracebackFuture() 240 if callback is not None: 241 callback = stack_context.wrap(callback) 242 243 def handle_future(future): 244 exc = future.exception() 245 if isinstance(exc, HTTPError) and exc.response is not None: 246 response = exc.response 247 elif exc is not None: 248 response = HTTPResponse( 249 request, 599, error=exc, 250 request_time=time.time() - request.start_time) 251 else: 252 response = future.result() 253 self.io_loop.add_callback(callback, response) 254 future.add_done_callback(handle_future) 255 256 def handle_response(response): 257 if raise_error and response.error: 258 future.set_exception(response.error) 259 else: 260 future.set_result(response) 261 self.fetch_impl(request, handle_response) 262 return future 263 264 def fetch_impl(self, request, callback): 265 raise NotImplementedError() 266 267 @classmethod 268 def configure(cls, impl, **kwargs): 269 """Configures the `AsyncHTTPClient` subclass to use. 270 271 ``AsyncHTTPClient()`` actually creates an instance of a subclass. 272 This method may be called with either a class object or the 273 fully-qualified name of such a class (or ``None`` to use the default, 274 ``SimpleAsyncHTTPClient``) 275 276 If additional keyword arguments are given, they will be passed 277 to the constructor of each subclass instance created. The 278 keyword argument ``max_clients`` determines the maximum number 279 of simultaneous `~AsyncHTTPClient.fetch()` operations that can 280 execute in parallel on each `.IOLoop`. Additional arguments 281 may be supported depending on the implementation class in use. 282 283 Example:: 284 285 AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") 286 """ 287 super(AsyncHTTPClient, cls).configure(impl, **kwargs) 288 289 290class HTTPRequest(object): 291 """HTTP client request object.""" 292 293 # Default values for HTTPRequest parameters. 294 # Merged with the values on the request object by AsyncHTTPClient 295 # implementations. 296 _DEFAULTS = dict( 297 connect_timeout=20.0, 298 request_timeout=20.0, 299 follow_redirects=True, 300 max_redirects=5, 301 decompress_response=True, 302 proxy_password='', 303 allow_nonstandard_methods=False, 304 validate_cert=True) 305 306 def __init__(self, url, method="GET", headers=None, body=None, 307 auth_username=None, auth_password=None, auth_mode=None, 308 connect_timeout=None, request_timeout=None, 309 if_modified_since=None, follow_redirects=None, 310 max_redirects=None, user_agent=None, use_gzip=None, 311 network_interface=None, streaming_callback=None, 312 header_callback=None, prepare_curl_callback=None, 313 proxy_host=None, proxy_port=None, proxy_username=None, 314 proxy_password=None, proxy_auth_mode=None, 315 allow_nonstandard_methods=None, validate_cert=None, 316 ca_certs=None, allow_ipv6=None, client_key=None, 317 client_cert=None, body_producer=None, 318 expect_100_continue=False, decompress_response=None, 319 ssl_options=None): 320 r"""All parameters except ``url`` are optional. 321 322 :arg string url: URL to fetch 323 :arg string method: HTTP method, e.g. "GET" or "POST" 324 :arg headers: Additional HTTP headers to pass on the request 325 :type headers: `~tornado.httputil.HTTPHeaders` or `dict` 326 :arg body: HTTP request body as a string (byte or unicode; if unicode 327 the utf-8 encoding will be used) 328 :arg body_producer: Callable used for lazy/asynchronous request bodies. 329 It is called with one argument, a ``write`` function, and should 330 return a `.Future`. It should call the write function with new 331 data as it becomes available. The write function returns a 332 `.Future` which can be used for flow control. 333 Only one of ``body`` and ``body_producer`` may 334 be specified. ``body_producer`` is not supported on 335 ``curl_httpclient``. When using ``body_producer`` it is recommended 336 to pass a ``Content-Length`` in the headers as otherwise chunked 337 encoding will be used, and many servers do not support chunked 338 encoding on requests. New in Tornado 4.0 339 :arg string auth_username: Username for HTTP authentication 340 :arg string auth_password: Password for HTTP authentication 341 :arg string auth_mode: Authentication mode; default is "basic". 342 Allowed values are implementation-defined; ``curl_httpclient`` 343 supports "basic" and "digest"; ``simple_httpclient`` only supports 344 "basic" 345 :arg float connect_timeout: Timeout for initial connection in seconds, 346 default 20 seconds 347 :arg float request_timeout: Timeout for entire request in seconds, 348 default 20 seconds 349 :arg if_modified_since: Timestamp for ``If-Modified-Since`` header 350 :type if_modified_since: `datetime` or `float` 351 :arg bool follow_redirects: Should redirects be followed automatically 352 or return the 3xx response? Default True. 353 :arg int max_redirects: Limit for ``follow_redirects``, default 5. 354 :arg string user_agent: String to send as ``User-Agent`` header 355 :arg bool decompress_response: Request a compressed response from 356 the server and decompress it after downloading. Default is True. 357 New in Tornado 4.0. 358 :arg bool use_gzip: Deprecated alias for ``decompress_response`` 359 since Tornado 4.0. 360 :arg string network_interface: Network interface to use for request. 361 ``curl_httpclient`` only; see note below. 362 :arg callable streaming_callback: If set, ``streaming_callback`` will 363 be run with each chunk of data as it is received, and 364 ``HTTPResponse.body`` and ``HTTPResponse.buffer`` will be empty in 365 the final response. 366 :arg callable header_callback: If set, ``header_callback`` will 367 be run with each header line as it is received (including the 368 first line, e.g. ``HTTP/1.0 200 OK\r\n``, and a final line 369 containing only ``\r\n``. All lines include the trailing newline 370 characters). ``HTTPResponse.headers`` will be empty in the final 371 response. This is most useful in conjunction with 372 ``streaming_callback``, because it's the only way to get access to 373 header data while the request is in progress. 374 :arg callable prepare_curl_callback: If set, will be called with 375 a ``pycurl.Curl`` object to allow the application to make additional 376 ``setopt`` calls. 377 :arg string proxy_host: HTTP proxy hostname. To use proxies, 378 ``proxy_host`` and ``proxy_port`` must be set; ``proxy_username``, 379 ``proxy_pass`` and ``proxy_auth_mode`` are optional. Proxies are 380 currently only supported with ``curl_httpclient``. 381 :arg int proxy_port: HTTP proxy port 382 :arg string proxy_username: HTTP proxy username 383 :arg string proxy_password: HTTP proxy password 384 :arg string proxy_auth_mode: HTTP proxy Authentication mode; 385 default is "basic". supports "basic" and "digest" 386 :arg bool allow_nonstandard_methods: Allow unknown values for ``method`` 387 argument? Default is False. 388 :arg bool validate_cert: For HTTPS requests, validate the server's 389 certificate? Default is True. 390 :arg string ca_certs: filename of CA certificates in PEM format, 391 or None to use defaults. See note below when used with 392 ``curl_httpclient``. 393 :arg string client_key: Filename for client SSL key, if any. See 394 note below when used with ``curl_httpclient``. 395 :arg string client_cert: Filename for client SSL certificate, if any. 396 See note below when used with ``curl_httpclient``. 397 :arg ssl.SSLContext ssl_options: `ssl.SSLContext` object for use in 398 ``simple_httpclient`` (unsupported by ``curl_httpclient``). 399 Overrides ``validate_cert``, ``ca_certs``, ``client_key``, 400 and ``client_cert``. 401 :arg bool allow_ipv6: Use IPv6 when available? Default is true. 402 :arg bool expect_100_continue: If true, send the 403 ``Expect: 100-continue`` header and wait for a continue response 404 before sending the request body. Only supported with 405 simple_httpclient. 406 407 .. note:: 408 409 When using ``curl_httpclient`` certain options may be 410 inherited by subsequent fetches because ``pycurl`` does 411 not allow them to be cleanly reset. This applies to the 412 ``ca_certs``, ``client_key``, ``client_cert``, and 413 ``network_interface`` arguments. If you use these 414 options, you should pass them on every request (you don't 415 have to always use the same values, but it's not possible 416 to mix requests that specify these options with ones that 417 use the defaults). 418 419 .. versionadded:: 3.1 420 The ``auth_mode`` argument. 421 422 .. versionadded:: 4.0 423 The ``body_producer`` and ``expect_100_continue`` arguments. 424 425 .. versionadded:: 4.2 426 The ``ssl_options`` argument. 427 428 .. versionadded:: 4.5 429 The ``proxy_auth_mode`` argument. 430 """ 431 # Note that some of these attributes go through property setters 432 # defined below. 433 self.headers = headers 434 if if_modified_since: 435 self.headers["If-Modified-Since"] = httputil.format_timestamp( 436 if_modified_since) 437 self.proxy_host = proxy_host 438 self.proxy_port = proxy_port 439 self.proxy_username = proxy_username 440 self.proxy_password = proxy_password 441 self.proxy_auth_mode = proxy_auth_mode 442 self.url = url 443 self.method = method 444 self.body = body 445 self.body_producer = body_producer 446 self.auth_username = auth_username 447 self.auth_password = auth_password 448 self.auth_mode = auth_mode 449 self.connect_timeout = connect_timeout 450 self.request_timeout = request_timeout 451 self.follow_redirects = follow_redirects 452 self.max_redirects = max_redirects 453 self.user_agent = user_agent 454 if decompress_response is not None: 455 self.decompress_response = decompress_response 456 else: 457 self.decompress_response = use_gzip 458 self.network_interface = network_interface 459 self.streaming_callback = streaming_callback 460 self.header_callback = header_callback 461 self.prepare_curl_callback = prepare_curl_callback 462 self.allow_nonstandard_methods = allow_nonstandard_methods 463 self.validate_cert = validate_cert 464 self.ca_certs = ca_certs 465 self.allow_ipv6 = allow_ipv6 466 self.client_key = client_key 467 self.client_cert = client_cert 468 self.ssl_options = ssl_options 469 self.expect_100_continue = expect_100_continue 470 self.start_time = time.time() 471 472 @property 473 def headers(self): 474 return self._headers 475 476 @headers.setter 477 def headers(self, value): 478 if value is None: 479 self._headers = httputil.HTTPHeaders() 480 else: 481 self._headers = value 482 483 @property 484 def body(self): 485 return self._body 486 487 @body.setter 488 def body(self, value): 489 self._body = utf8(value) 490 491 @property 492 def body_producer(self): 493 return self._body_producer 494 495 @body_producer.setter 496 def body_producer(self, value): 497 self._body_producer = stack_context.wrap(value) 498 499 @property 500 def streaming_callback(self): 501 return self._streaming_callback 502 503 @streaming_callback.setter 504 def streaming_callback(self, value): 505 self._streaming_callback = stack_context.wrap(value) 506 507 @property 508 def header_callback(self): 509 return self._header_callback 510 511 @header_callback.setter 512 def header_callback(self, value): 513 self._header_callback = stack_context.wrap(value) 514 515 @property 516 def prepare_curl_callback(self): 517 return self._prepare_curl_callback 518 519 @prepare_curl_callback.setter 520 def prepare_curl_callback(self, value): 521 self._prepare_curl_callback = stack_context.wrap(value) 522 523 524class HTTPResponse(object): 525 """HTTP Response object. 526 527 Attributes: 528 529 * request: HTTPRequest object 530 531 * code: numeric HTTP status code, e.g. 200 or 404 532 533 * reason: human-readable reason phrase describing the status code 534 535 * headers: `tornado.httputil.HTTPHeaders` object 536 537 * effective_url: final location of the resource after following any 538 redirects 539 540 * buffer: ``cStringIO`` object for response body 541 542 * body: response body as bytes (created on demand from ``self.buffer``) 543 544 * error: Exception object, if any 545 546 * request_time: seconds from request start to finish 547 548 * time_info: dictionary of diagnostic timing information from the request. 549 Available data are subject to change, but currently uses timings 550 available from http://curl.haxx.se/libcurl/c/curl_easy_getinfo.html, 551 plus ``queue``, which is the delay (if any) introduced by waiting for 552 a slot under `AsyncHTTPClient`'s ``max_clients`` setting. 553 """ 554 def __init__(self, request, code, headers=None, buffer=None, 555 effective_url=None, error=None, request_time=None, 556 time_info=None, reason=None): 557 if isinstance(request, _RequestProxy): 558 self.request = request.request 559 else: 560 self.request = request 561 self.code = code 562 self.reason = reason or httputil.responses.get(code, "Unknown") 563 if headers is not None: 564 self.headers = headers 565 else: 566 self.headers = httputil.HTTPHeaders() 567 self.buffer = buffer 568 self._body = None 569 if effective_url is None: 570 self.effective_url = request.url 571 else: 572 self.effective_url = effective_url 573 if error is None: 574 if self.code < 200 or self.code >= 300: 575 self.error = HTTPError(self.code, message=self.reason, 576 response=self) 577 else: 578 self.error = None 579 else: 580 self.error = error 581 self.request_time = request_time 582 self.time_info = time_info or {} 583 584 @property 585 def body(self): 586 if self.buffer is None: 587 return None 588 elif self._body is None: 589 self._body = self.buffer.getvalue() 590 591 return self._body 592 593 def rethrow(self): 594 """If there was an error on the request, raise an `HTTPError`.""" 595 if self.error: 596 raise self.error 597 598 def __repr__(self): 599 args = ",".join("%s=%r" % i for i in sorted(self.__dict__.items())) 600 return "%s(%s)" % (self.__class__.__name__, args) 601 602 603class HTTPError(Exception): 604 """Exception thrown for an unsuccessful HTTP request. 605 606 Attributes: 607 608 * ``code`` - HTTP error integer error code, e.g. 404. Error code 599 is 609 used when no HTTP response was received, e.g. for a timeout. 610 611 * ``response`` - `HTTPResponse` object, if any. 612 613 Note that if ``follow_redirects`` is False, redirects become HTTPErrors, 614 and you can look at ``error.response.headers['Location']`` to see the 615 destination of the redirect. 616 """ 617 def __init__(self, code, message=None, response=None): 618 self.code = code 619 self.message = message or httputil.responses.get(code, "Unknown") 620 self.response = response 621 super(HTTPError, self).__init__(code, message, response) 622 623 def __str__(self): 624 return "HTTP %d: %s" % (self.code, self.message) 625 626 # There is a cyclic reference between self and self.response, 627 # which breaks the default __repr__ implementation. 628 # (especially on pypy, which doesn't have the same recursion 629 # detection as cpython). 630 __repr__ = __str__ 631 632 633class _RequestProxy(object): 634 """Combines an object with a dictionary of defaults. 635 636 Used internally by AsyncHTTPClient implementations. 637 """ 638 def __init__(self, request, defaults): 639 self.request = request 640 self.defaults = defaults 641 642 def __getattr__(self, name): 643 request_attr = getattr(self.request, name) 644 if request_attr is not None: 645 return request_attr 646 elif self.defaults is not None: 647 return self.defaults.get(name, None) 648 else: 649 return None 650 651 652def main(): 653 from salt.ext.tornado.options import define, options, parse_command_line 654 define("print_headers", type=bool, default=False) 655 define("print_body", type=bool, default=True) 656 define("follow_redirects", type=bool, default=True) 657 define("validate_cert", type=bool, default=True) 658 args = parse_command_line() 659 client = HTTPClient() 660 for arg in args: 661 try: 662 response = client.fetch(arg, 663 follow_redirects=options.follow_redirects, 664 validate_cert=options.validate_cert, 665 ) 666 except HTTPError as e: 667 if e.response is not None: 668 response = e.response 669 else: 670 raise 671 if options.print_headers: 672 print(response.headers) 673 if options.print_body: 674 print(native_str(response.body)) 675 client.close() 676 677 678if __name__ == "__main__": 679 main() 680