1"""HTTP/1.1 client library 2 3A backport of the Python 3.3 http/client.py module for python-future. 4 5<intro stuff goes here> 6<other stuff, too> 7 8HTTPConnection goes through a number of "states", which define when a client 9may legally make another request or fetch the response for a particular 10request. This diagram details these state transitions: 11 12 (null) 13 | 14 | HTTPConnection() 15 v 16 Idle 17 | 18 | putrequest() 19 v 20 Request-started 21 | 22 | ( putheader() )* endheaders() 23 v 24 Request-sent 25 | 26 | response = getresponse() 27 v 28 Unread-response [Response-headers-read] 29 |\____________________ 30 | | 31 | response.read() | putrequest() 32 v v 33 Idle Req-started-unread-response 34 ______/| 35 / | 36 response.read() | | ( putheader() )* endheaders() 37 v v 38 Request-started Req-sent-unread-response 39 | 40 | response.read() 41 v 42 Request-sent 43 44This diagram presents the following rules: 45 -- a second request may not be started until {response-headers-read} 46 -- a response [object] cannot be retrieved until {request-sent} 47 -- there is no differentiation between an unread response body and a 48 partially read response body 49 50Note: this enforcement is applied by the HTTPConnection class. The 51 HTTPResponse class does not enforce this state machine, which 52 implies sophisticated clients may accelerate the request/response 53 pipeline. Caution should be taken, though: accelerating the states 54 beyond the above pattern may imply knowledge of the server's 55 connection-close behavior for certain requests. For example, it 56 is impossible to tell whether the server will close the connection 57 UNTIL the response headers have been read; this means that further 58 requests cannot be placed into the pipeline until it is known that 59 the server will NOT be closing the connection. 60 61Logical State __state __response 62------------- ------- ---------- 63Idle _CS_IDLE None 64Request-started _CS_REQ_STARTED None 65Request-sent _CS_REQ_SENT None 66Unread-response _CS_IDLE <response_class> 67Req-started-unread-response _CS_REQ_STARTED <response_class> 68Req-sent-unread-response _CS_REQ_SENT <response_class> 69""" 70 71from __future__ import (absolute_import, division, 72 print_function, unicode_literals) 73from future.builtins import bytes, int, str, super 74from future.utils import PY2 75 76from future.backports.email import parser as email_parser 77from future.backports.email import message as email_message 78from future.backports.misc import create_connection as socket_create_connection 79import io 80import os 81import socket 82from future.backports.urllib.parse import urlsplit 83import warnings 84from array import array 85 86if PY2: 87 from collections import Iterable 88else: 89 from collections.abc import Iterable 90 91__all__ = ["HTTPResponse", "HTTPConnection", 92 "HTTPException", "NotConnected", "UnknownProtocol", 93 "UnknownTransferEncoding", "UnimplementedFileMode", 94 "IncompleteRead", "InvalidURL", "ImproperConnectionState", 95 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", 96 "BadStatusLine", "error", "responses"] 97 98HTTP_PORT = 80 99HTTPS_PORT = 443 100 101_UNKNOWN = 'UNKNOWN' 102 103# connection states 104_CS_IDLE = 'Idle' 105_CS_REQ_STARTED = 'Request-started' 106_CS_REQ_SENT = 'Request-sent' 107 108# status codes 109# informational 110CONTINUE = 100 111SWITCHING_PROTOCOLS = 101 112PROCESSING = 102 113 114# successful 115OK = 200 116CREATED = 201 117ACCEPTED = 202 118NON_AUTHORITATIVE_INFORMATION = 203 119NO_CONTENT = 204 120RESET_CONTENT = 205 121PARTIAL_CONTENT = 206 122MULTI_STATUS = 207 123IM_USED = 226 124 125# redirection 126MULTIPLE_CHOICES = 300 127MOVED_PERMANENTLY = 301 128FOUND = 302 129SEE_OTHER = 303 130NOT_MODIFIED = 304 131USE_PROXY = 305 132TEMPORARY_REDIRECT = 307 133 134# client error 135BAD_REQUEST = 400 136UNAUTHORIZED = 401 137PAYMENT_REQUIRED = 402 138FORBIDDEN = 403 139NOT_FOUND = 404 140METHOD_NOT_ALLOWED = 405 141NOT_ACCEPTABLE = 406 142PROXY_AUTHENTICATION_REQUIRED = 407 143REQUEST_TIMEOUT = 408 144CONFLICT = 409 145GONE = 410 146LENGTH_REQUIRED = 411 147PRECONDITION_FAILED = 412 148REQUEST_ENTITY_TOO_LARGE = 413 149REQUEST_URI_TOO_LONG = 414 150UNSUPPORTED_MEDIA_TYPE = 415 151REQUESTED_RANGE_NOT_SATISFIABLE = 416 152EXPECTATION_FAILED = 417 153UNPROCESSABLE_ENTITY = 422 154LOCKED = 423 155FAILED_DEPENDENCY = 424 156UPGRADE_REQUIRED = 426 157PRECONDITION_REQUIRED = 428 158TOO_MANY_REQUESTS = 429 159REQUEST_HEADER_FIELDS_TOO_LARGE = 431 160 161# server error 162INTERNAL_SERVER_ERROR = 500 163NOT_IMPLEMENTED = 501 164BAD_GATEWAY = 502 165SERVICE_UNAVAILABLE = 503 166GATEWAY_TIMEOUT = 504 167HTTP_VERSION_NOT_SUPPORTED = 505 168INSUFFICIENT_STORAGE = 507 169NOT_EXTENDED = 510 170NETWORK_AUTHENTICATION_REQUIRED = 511 171 172# Mapping status codes to official W3C names 173responses = { 174 100: 'Continue', 175 101: 'Switching Protocols', 176 177 200: 'OK', 178 201: 'Created', 179 202: 'Accepted', 180 203: 'Non-Authoritative Information', 181 204: 'No Content', 182 205: 'Reset Content', 183 206: 'Partial Content', 184 185 300: 'Multiple Choices', 186 301: 'Moved Permanently', 187 302: 'Found', 188 303: 'See Other', 189 304: 'Not Modified', 190 305: 'Use Proxy', 191 306: '(Unused)', 192 307: 'Temporary Redirect', 193 194 400: 'Bad Request', 195 401: 'Unauthorized', 196 402: 'Payment Required', 197 403: 'Forbidden', 198 404: 'Not Found', 199 405: 'Method Not Allowed', 200 406: 'Not Acceptable', 201 407: 'Proxy Authentication Required', 202 408: 'Request Timeout', 203 409: 'Conflict', 204 410: 'Gone', 205 411: 'Length Required', 206 412: 'Precondition Failed', 207 413: 'Request Entity Too Large', 208 414: 'Request-URI Too Long', 209 415: 'Unsupported Media Type', 210 416: 'Requested Range Not Satisfiable', 211 417: 'Expectation Failed', 212 428: 'Precondition Required', 213 429: 'Too Many Requests', 214 431: 'Request Header Fields Too Large', 215 216 500: 'Internal Server Error', 217 501: 'Not Implemented', 218 502: 'Bad Gateway', 219 503: 'Service Unavailable', 220 504: 'Gateway Timeout', 221 505: 'HTTP Version Not Supported', 222 511: 'Network Authentication Required', 223} 224 225# maximal amount of data to read at one time in _safe_read 226MAXAMOUNT = 1048576 227 228# maximal line length when calling readline(). 229_MAXLINE = 65536 230_MAXHEADERS = 100 231 232 233class HTTPMessage(email_message.Message): 234 # XXX The only usage of this method is in 235 # http.server.CGIHTTPRequestHandler. Maybe move the code there so 236 # that it doesn't need to be part of the public API. The API has 237 # never been defined so this could cause backwards compatibility 238 # issues. 239 240 def getallmatchingheaders(self, name): 241 """Find all header lines matching a given header name. 242 243 Look through the list of headers and find all lines matching a given 244 header name (and their continuation lines). A list of the lines is 245 returned, without interpretation. If the header does not occur, an 246 empty list is returned. If the header occurs multiple times, all 247 occurrences are returned. Case is not important in the header name. 248 249 """ 250 name = name.lower() + ':' 251 n = len(name) 252 lst = [] 253 hit = 0 254 for line in self.keys(): 255 if line[:n].lower() == name: 256 hit = 1 257 elif not line[:1].isspace(): 258 hit = 0 259 if hit: 260 lst.append(line) 261 return lst 262 263def parse_headers(fp, _class=HTTPMessage): 264 """Parses only RFC2822 headers from a file pointer. 265 266 email Parser wants to see strings rather than bytes. 267 But a TextIOWrapper around self.rfile would buffer too many bytes 268 from the stream, bytes which we later need to read as bytes. 269 So we read the correct bytes here, as bytes, for email Parser 270 to parse. 271 272 """ 273 headers = [] 274 while True: 275 line = fp.readline(_MAXLINE + 1) 276 if len(line) > _MAXLINE: 277 raise LineTooLong("header line") 278 headers.append(line) 279 if len(headers) > _MAXHEADERS: 280 raise HTTPException("got more than %d headers" % _MAXHEADERS) 281 if line in (b'\r\n', b'\n', b''): 282 break 283 hstring = bytes(b'').join(headers).decode('iso-8859-1') 284 return email_parser.Parser(_class=_class).parsestr(hstring) 285 286 287_strict_sentinel = object() 288 289class HTTPResponse(io.RawIOBase): 290 291 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. 292 293 # The bytes from the socket object are iso-8859-1 strings. 294 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded 295 # text following RFC 2047. The basic status line parsing only 296 # accepts iso-8859-1. 297 298 def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None): 299 # If the response includes a content-length header, we need to 300 # make sure that the client doesn't read more than the 301 # specified number of bytes. If it does, it will block until 302 # the server times out and closes the connection. This will 303 # happen if a self.fp.read() is done (without a size) whether 304 # self.fp is buffered or not. So, no self.fp.read() by 305 # clients unless they know what they are doing. 306 self.fp = sock.makefile("rb") 307 self.debuglevel = debuglevel 308 if strict is not _strict_sentinel: 309 warnings.warn("the 'strict' argument isn't supported anymore; " 310 "http.client now always assumes HTTP/1.x compliant servers.", 311 DeprecationWarning, 2) 312 self._method = method 313 314 # The HTTPResponse object is returned via urllib. The clients 315 # of http and urllib expect different attributes for the 316 # headers. headers is used here and supports urllib. msg is 317 # provided as a backwards compatibility layer for http 318 # clients. 319 320 self.headers = self.msg = None 321 322 # from the Status-Line of the response 323 self.version = _UNKNOWN # HTTP-Version 324 self.status = _UNKNOWN # Status-Code 325 self.reason = _UNKNOWN # Reason-Phrase 326 327 self.chunked = _UNKNOWN # is "chunked" being used? 328 self.chunk_left = _UNKNOWN # bytes left to read in current chunk 329 self.length = _UNKNOWN # number of bytes left in response 330 self.will_close = _UNKNOWN # conn will close at end of response 331 332 def _read_status(self): 333 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") 334 if len(line) > _MAXLINE: 335 raise LineTooLong("status line") 336 if self.debuglevel > 0: 337 print("reply:", repr(line)) 338 if not line: 339 # Presumably, the server closed the connection before 340 # sending a valid response. 341 raise BadStatusLine(line) 342 try: 343 version, status, reason = line.split(None, 2) 344 except ValueError: 345 try: 346 version, status = line.split(None, 1) 347 reason = "" 348 except ValueError: 349 # empty version will cause next test to fail. 350 version = "" 351 if not version.startswith("HTTP/"): 352 self._close_conn() 353 raise BadStatusLine(line) 354 355 # The status code is a three-digit number 356 try: 357 status = int(status) 358 if status < 100 or status > 999: 359 raise BadStatusLine(line) 360 except ValueError: 361 raise BadStatusLine(line) 362 return version, status, reason 363 364 def begin(self): 365 if self.headers is not None: 366 # we've already started reading the response 367 return 368 369 # read until we get a non-100 response 370 while True: 371 version, status, reason = self._read_status() 372 if status != CONTINUE: 373 break 374 # skip the header from the 100 response 375 while True: 376 skip = self.fp.readline(_MAXLINE + 1) 377 if len(skip) > _MAXLINE: 378 raise LineTooLong("header line") 379 skip = skip.strip() 380 if not skip: 381 break 382 if self.debuglevel > 0: 383 print("header:", skip) 384 385 self.code = self.status = status 386 self.reason = reason.strip() 387 if version in ("HTTP/1.0", "HTTP/0.9"): 388 # Some servers might still return "0.9", treat it as 1.0 anyway 389 self.version = 10 390 elif version.startswith("HTTP/1."): 391 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 392 else: 393 raise UnknownProtocol(version) 394 395 self.headers = self.msg = parse_headers(self.fp) 396 397 if self.debuglevel > 0: 398 for hdr in self.headers: 399 print("header:", hdr, end=" ") 400 401 # are we using the chunked-style of transfer encoding? 402 tr_enc = self.headers.get("transfer-encoding") 403 if tr_enc and tr_enc.lower() == "chunked": 404 self.chunked = True 405 self.chunk_left = None 406 else: 407 self.chunked = False 408 409 # will the connection close at the end of the response? 410 self.will_close = self._check_close() 411 412 # do we have a Content-Length? 413 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" 414 self.length = None 415 length = self.headers.get("content-length") 416 417 # are we using the chunked-style of transfer encoding? 418 tr_enc = self.headers.get("transfer-encoding") 419 if length and not self.chunked: 420 try: 421 self.length = int(length) 422 except ValueError: 423 self.length = None 424 else: 425 if self.length < 0: # ignore nonsensical negative lengths 426 self.length = None 427 else: 428 self.length = None 429 430 # does the body have a fixed length? (of zero) 431 if (status == NO_CONTENT or status == NOT_MODIFIED or 432 100 <= status < 200 or # 1xx codes 433 self._method == "HEAD"): 434 self.length = 0 435 436 # if the connection remains open, and we aren't using chunked, and 437 # a content-length was not provided, then assume that the connection 438 # WILL close. 439 if (not self.will_close and 440 not self.chunked and 441 self.length is None): 442 self.will_close = True 443 444 def _check_close(self): 445 conn = self.headers.get("connection") 446 if self.version == 11: 447 # An HTTP/1.1 proxy is assumed to stay open unless 448 # explicitly closed. 449 conn = self.headers.get("connection") 450 if conn and "close" in conn.lower(): 451 return True 452 return False 453 454 # Some HTTP/1.0 implementations have support for persistent 455 # connections, using rules different than HTTP/1.1. 456 457 # For older HTTP, Keep-Alive indicates persistent connection. 458 if self.headers.get("keep-alive"): 459 return False 460 461 # At least Akamai returns a "Connection: Keep-Alive" header, 462 # which was supposed to be sent by the client. 463 if conn and "keep-alive" in conn.lower(): 464 return False 465 466 # Proxy-Connection is a netscape hack. 467 pconn = self.headers.get("proxy-connection") 468 if pconn and "keep-alive" in pconn.lower(): 469 return False 470 471 # otherwise, assume it will close 472 return True 473 474 def _close_conn(self): 475 fp = self.fp 476 self.fp = None 477 fp.close() 478 479 def close(self): 480 super().close() # set "closed" flag 481 if self.fp: 482 self._close_conn() 483 484 # These implementations are for the benefit of io.BufferedReader. 485 486 # XXX This class should probably be revised to act more like 487 # the "raw stream" that BufferedReader expects. 488 489 def flush(self): 490 super().flush() 491 if self.fp: 492 self.fp.flush() 493 494 def readable(self): 495 return True 496 497 # End of "raw stream" methods 498 499 def isclosed(self): 500 """True if the connection is closed.""" 501 # NOTE: it is possible that we will not ever call self.close(). This 502 # case occurs when will_close is TRUE, length is None, and we 503 # read up to the last byte, but NOT past it. 504 # 505 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be 506 # called, meaning self.isclosed() is meaningful. 507 return self.fp is None 508 509 def read(self, amt=None): 510 if self.fp is None: 511 return bytes(b"") 512 513 if self._method == "HEAD": 514 self._close_conn() 515 return bytes(b"") 516 517 if amt is not None: 518 # Amount is given, so call base class version 519 # (which is implemented in terms of self.readinto) 520 return bytes(super(HTTPResponse, self).read(amt)) 521 else: 522 # Amount is not given (unbounded read) so we must check self.length 523 # and self.chunked 524 525 if self.chunked: 526 return self._readall_chunked() 527 528 if self.length is None: 529 s = self.fp.read() 530 else: 531 try: 532 s = self._safe_read(self.length) 533 except IncompleteRead: 534 self._close_conn() 535 raise 536 self.length = 0 537 self._close_conn() # we read everything 538 return bytes(s) 539 540 def readinto(self, b): 541 if self.fp is None: 542 return 0 543 544 if self._method == "HEAD": 545 self._close_conn() 546 return 0 547 548 if self.chunked: 549 return self._readinto_chunked(b) 550 551 if self.length is not None: 552 if len(b) > self.length: 553 # clip the read to the "end of response" 554 b = memoryview(b)[0:self.length] 555 556 # we do not use _safe_read() here because this may be a .will_close 557 # connection, and the user is reading more bytes than will be provided 558 # (for example, reading in 1k chunks) 559 560 if PY2: 561 data = self.fp.read(len(b)) 562 n = len(data) 563 b[:n] = data 564 else: 565 n = self.fp.readinto(b) 566 567 if not n and b: 568 # Ideally, we would raise IncompleteRead if the content-length 569 # wasn't satisfied, but it might break compatibility. 570 self._close_conn() 571 elif self.length is not None: 572 self.length -= n 573 if not self.length: 574 self._close_conn() 575 return n 576 577 def _read_next_chunk_size(self): 578 # Read the next chunk size from the file 579 line = self.fp.readline(_MAXLINE + 1) 580 if len(line) > _MAXLINE: 581 raise LineTooLong("chunk size") 582 i = line.find(b";") 583 if i >= 0: 584 line = line[:i] # strip chunk-extensions 585 try: 586 return int(line, 16) 587 except ValueError: 588 # close the connection as protocol synchronisation is 589 # probably lost 590 self._close_conn() 591 raise 592 593 def _read_and_discard_trailer(self): 594 # read and discard trailer up to the CRLF terminator 595 ### note: we shouldn't have any trailers! 596 while True: 597 line = self.fp.readline(_MAXLINE + 1) 598 if len(line) > _MAXLINE: 599 raise LineTooLong("trailer line") 600 if not line: 601 # a vanishingly small number of sites EOF without 602 # sending the trailer 603 break 604 if line in (b'\r\n', b'\n', b''): 605 break 606 607 def _readall_chunked(self): 608 assert self.chunked != _UNKNOWN 609 chunk_left = self.chunk_left 610 value = [] 611 while True: 612 if chunk_left is None: 613 try: 614 chunk_left = self._read_next_chunk_size() 615 if chunk_left == 0: 616 break 617 except ValueError: 618 raise IncompleteRead(bytes(b'').join(value)) 619 value.append(self._safe_read(chunk_left)) 620 621 # we read the whole chunk, get another 622 self._safe_read(2) # toss the CRLF at the end of the chunk 623 chunk_left = None 624 625 self._read_and_discard_trailer() 626 627 # we read everything; close the "file" 628 self._close_conn() 629 630 return bytes(b'').join(value) 631 632 def _readinto_chunked(self, b): 633 assert self.chunked != _UNKNOWN 634 chunk_left = self.chunk_left 635 636 total_bytes = 0 637 mvb = memoryview(b) 638 while True: 639 if chunk_left is None: 640 try: 641 chunk_left = self._read_next_chunk_size() 642 if chunk_left == 0: 643 break 644 except ValueError: 645 raise IncompleteRead(bytes(b[0:total_bytes])) 646 647 if len(mvb) < chunk_left: 648 n = self._safe_readinto(mvb) 649 self.chunk_left = chunk_left - n 650 return total_bytes + n 651 elif len(mvb) == chunk_left: 652 n = self._safe_readinto(mvb) 653 self._safe_read(2) # toss the CRLF at the end of the chunk 654 self.chunk_left = None 655 return total_bytes + n 656 else: 657 temp_mvb = mvb[0:chunk_left] 658 n = self._safe_readinto(temp_mvb) 659 mvb = mvb[n:] 660 total_bytes += n 661 662 # we read the whole chunk, get another 663 self._safe_read(2) # toss the CRLF at the end of the chunk 664 chunk_left = None 665 666 self._read_and_discard_trailer() 667 668 # we read everything; close the "file" 669 self._close_conn() 670 671 return total_bytes 672 673 def _safe_read(self, amt): 674 """Read the number of bytes requested, compensating for partial reads. 675 676 Normally, we have a blocking socket, but a read() can be interrupted 677 by a signal (resulting in a partial read). 678 679 Note that we cannot distinguish between EOF and an interrupt when zero 680 bytes have been read. IncompleteRead() will be raised in this 681 situation. 682 683 This function should be used when <amt> bytes "should" be present for 684 reading. If the bytes are truly not available (due to EOF), then the 685 IncompleteRead exception can be used to detect the problem. 686 """ 687 s = [] 688 while amt > 0: 689 chunk = self.fp.read(min(amt, MAXAMOUNT)) 690 if not chunk: 691 raise IncompleteRead(bytes(b'').join(s), amt) 692 s.append(chunk) 693 amt -= len(chunk) 694 return bytes(b"").join(s) 695 696 def _safe_readinto(self, b): 697 """Same as _safe_read, but for reading into a buffer.""" 698 total_bytes = 0 699 mvb = memoryview(b) 700 while total_bytes < len(b): 701 if MAXAMOUNT < len(mvb): 702 temp_mvb = mvb[0:MAXAMOUNT] 703 if PY2: 704 data = self.fp.read(len(temp_mvb)) 705 n = len(data) 706 temp_mvb[:n] = data 707 else: 708 n = self.fp.readinto(temp_mvb) 709 else: 710 if PY2: 711 data = self.fp.read(len(mvb)) 712 n = len(data) 713 mvb[:n] = data 714 else: 715 n = self.fp.readinto(mvb) 716 if not n: 717 raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) 718 mvb = mvb[n:] 719 total_bytes += n 720 return total_bytes 721 722 def fileno(self): 723 return self.fp.fileno() 724 725 def getheader(self, name, default=None): 726 if self.headers is None: 727 raise ResponseNotReady() 728 headers = self.headers.get_all(name) or default 729 if isinstance(headers, str) or not hasattr(headers, '__iter__'): 730 return headers 731 else: 732 return ', '.join(headers) 733 734 def getheaders(self): 735 """Return list of (header, value) tuples.""" 736 if self.headers is None: 737 raise ResponseNotReady() 738 return list(self.headers.items()) 739 740 # We override IOBase.__iter__ so that it doesn't check for closed-ness 741 742 def __iter__(self): 743 return self 744 745 # For compatibility with old-style urllib responses. 746 747 def info(self): 748 return self.headers 749 750 def geturl(self): 751 return self.url 752 753 def getcode(self): 754 return self.status 755 756class HTTPConnection(object): 757 758 _http_vsn = 11 759 _http_vsn_str = 'HTTP/1.1' 760 761 response_class = HTTPResponse 762 default_port = HTTP_PORT 763 auto_open = 1 764 debuglevel = 0 765 766 def __init__(self, host, port=None, strict=_strict_sentinel, 767 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): 768 if strict is not _strict_sentinel: 769 warnings.warn("the 'strict' argument isn't supported anymore; " 770 "http.client now always assumes HTTP/1.x compliant servers.", 771 DeprecationWarning, 2) 772 self.timeout = timeout 773 self.source_address = source_address 774 self.sock = None 775 self._buffer = [] 776 self.__response = None 777 self.__state = _CS_IDLE 778 self._method = None 779 self._tunnel_host = None 780 self._tunnel_port = None 781 self._tunnel_headers = {} 782 783 self._set_hostport(host, port) 784 785 def set_tunnel(self, host, port=None, headers=None): 786 """ Sets up the host and the port for the HTTP CONNECT Tunnelling. 787 788 The headers argument should be a mapping of extra HTTP headers 789 to send with the CONNECT request. 790 """ 791 self._tunnel_host = host 792 self._tunnel_port = port 793 if headers: 794 self._tunnel_headers = headers 795 else: 796 self._tunnel_headers.clear() 797 798 def _set_hostport(self, host, port): 799 if port is None: 800 i = host.rfind(':') 801 j = host.rfind(']') # ipv6 addresses have [...] 802 if i > j: 803 try: 804 port = int(host[i+1:]) 805 except ValueError: 806 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ 807 port = self.default_port 808 else: 809 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) 810 host = host[:i] 811 else: 812 port = self.default_port 813 if host and host[0] == '[' and host[-1] == ']': 814 host = host[1:-1] 815 self.host = host 816 self.port = port 817 818 def set_debuglevel(self, level): 819 self.debuglevel = level 820 821 def _tunnel(self): 822 self._set_hostport(self._tunnel_host, self._tunnel_port) 823 connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port) 824 connect_bytes = connect_str.encode("ascii") 825 self.send(connect_bytes) 826 for header, value in self._tunnel_headers.items(): 827 header_str = "%s: %s\r\n" % (header, value) 828 header_bytes = header_str.encode("latin-1") 829 self.send(header_bytes) 830 self.send(bytes(b'\r\n')) 831 832 response = self.response_class(self.sock, method=self._method) 833 (version, code, message) = response._read_status() 834 835 if code != 200: 836 self.close() 837 raise socket.error("Tunnel connection failed: %d %s" % (code, 838 message.strip())) 839 while True: 840 line = response.fp.readline(_MAXLINE + 1) 841 if len(line) > _MAXLINE: 842 raise LineTooLong("header line") 843 if not line: 844 # for sites which EOF without sending a trailer 845 break 846 if line in (b'\r\n', b'\n', b''): 847 break 848 849 def connect(self): 850 """Connect to the host and port specified in __init__.""" 851 self.sock = socket_create_connection((self.host,self.port), 852 self.timeout, self.source_address) 853 if self._tunnel_host: 854 self._tunnel() 855 856 def close(self): 857 """Close the connection to the HTTP server.""" 858 if self.sock: 859 self.sock.close() # close it manually... there may be other refs 860 self.sock = None 861 if self.__response: 862 self.__response.close() 863 self.__response = None 864 self.__state = _CS_IDLE 865 866 def send(self, data): 867 """Send `data' to the server. 868 ``data`` can be a string object, a bytes object, an array object, a 869 file-like object that supports a .read() method, or an iterable object. 870 """ 871 872 if self.sock is None: 873 if self.auto_open: 874 self.connect() 875 else: 876 raise NotConnected() 877 878 if self.debuglevel > 0: 879 print("send:", repr(data)) 880 blocksize = 8192 881 # Python 2.7 array objects have a read method which is incompatible 882 # with the 2-arg calling syntax below. 883 if hasattr(data, "read") and not isinstance(data, array): 884 if self.debuglevel > 0: 885 print("sendIng a read()able") 886 encode = False 887 try: 888 mode = data.mode 889 except AttributeError: 890 # io.BytesIO and other file-like objects don't have a `mode` 891 # attribute. 892 pass 893 else: 894 if "b" not in mode: 895 encode = True 896 if self.debuglevel > 0: 897 print("encoding file using iso-8859-1") 898 while 1: 899 datablock = data.read(blocksize) 900 if not datablock: 901 break 902 if encode: 903 datablock = datablock.encode("iso-8859-1") 904 self.sock.sendall(datablock) 905 return 906 try: 907 self.sock.sendall(data) 908 except TypeError: 909 if isinstance(data, Iterable): 910 for d in data: 911 self.sock.sendall(d) 912 else: 913 raise TypeError("data should be a bytes-like object " 914 "or an iterable, got %r" % type(data)) 915 916 def _output(self, s): 917 """Add a line of output to the current request buffer. 918 919 Assumes that the line does *not* end with \\r\\n. 920 """ 921 self._buffer.append(s) 922 923 def _send_output(self, message_body=None): 924 """Send the currently buffered request and clear the buffer. 925 926 Appends an extra \\r\\n to the buffer. 927 A message_body may be specified, to be appended to the request. 928 """ 929 self._buffer.extend((bytes(b""), bytes(b""))) 930 msg = bytes(b"\r\n").join(self._buffer) 931 del self._buffer[:] 932 # If msg and message_body are sent in a single send() call, 933 # it will avoid performance problems caused by the interaction 934 # between delayed ack and the Nagle algorithm. 935 if isinstance(message_body, bytes): 936 msg += message_body 937 message_body = None 938 self.send(msg) 939 if message_body is not None: 940 # message_body was not a string (i.e. it is a file), and 941 # we must run the risk of Nagle. 942 self.send(message_body) 943 944 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): 945 """Send a request to the server. 946 947 `method' specifies an HTTP request method, e.g. 'GET'. 948 `url' specifies the object being requested, e.g. '/index.html'. 949 `skip_host' if True does not add automatically a 'Host:' header 950 `skip_accept_encoding' if True does not add automatically an 951 'Accept-Encoding:' header 952 """ 953 954 # if a prior response has been completed, then forget about it. 955 if self.__response and self.__response.isclosed(): 956 self.__response = None 957 958 959 # in certain cases, we cannot issue another request on this connection. 960 # this occurs when: 961 # 1) we are in the process of sending a request. (_CS_REQ_STARTED) 962 # 2) a response to a previous request has signalled that it is going 963 # to close the connection upon completion. 964 # 3) the headers for the previous response have not been read, thus 965 # we cannot determine whether point (2) is true. (_CS_REQ_SENT) 966 # 967 # if there is no prior response, then we can request at will. 968 # 969 # if point (2) is true, then we will have passed the socket to the 970 # response (effectively meaning, "there is no prior response"), and 971 # will open a new one when a new request is made. 972 # 973 # Note: if a prior response exists, then we *can* start a new request. 974 # We are not allowed to begin fetching the response to this new 975 # request, however, until that prior response is complete. 976 # 977 if self.__state == _CS_IDLE: 978 self.__state = _CS_REQ_STARTED 979 else: 980 raise CannotSendRequest(self.__state) 981 982 # Save the method we use, we need it later in the response phase 983 self._method = method 984 if not url: 985 url = '/' 986 request = '%s %s %s' % (method, url, self._http_vsn_str) 987 988 # Non-ASCII characters should have been eliminated earlier 989 self._output(request.encode('ascii')) 990 991 if self._http_vsn == 11: 992 # Issue some standard headers for better HTTP/1.1 compliance 993 994 if not skip_host: 995 # this header is issued *only* for HTTP/1.1 996 # connections. more specifically, this means it is 997 # only issued when the client uses the new 998 # HTTPConnection() class. backwards-compat clients 999 # will be using HTTP/1.0 and those clients may be 1000 # issuing this header themselves. we should NOT issue 1001 # it twice; some web servers (such as Apache) barf 1002 # when they see two Host: headers 1003 1004 # If we need a non-standard port,include it in the 1005 # header. If the request is going through a proxy, 1006 # but the host of the actual URL, not the host of the 1007 # proxy. 1008 1009 netloc = '' 1010 if url.startswith('http'): 1011 nil, netloc, nil, nil, nil = urlsplit(url) 1012 1013 if netloc: 1014 try: 1015 netloc_enc = netloc.encode("ascii") 1016 except UnicodeEncodeError: 1017 netloc_enc = netloc.encode("idna") 1018 self.putheader('Host', netloc_enc) 1019 else: 1020 try: 1021 host_enc = self.host.encode("ascii") 1022 except UnicodeEncodeError: 1023 host_enc = self.host.encode("idna") 1024 1025 # As per RFC 273, IPv6 address should be wrapped with [] 1026 # when used as Host header 1027 1028 if self.host.find(':') >= 0: 1029 host_enc = bytes(b'[' + host_enc + b']') 1030 1031 if self.port == self.default_port: 1032 self.putheader('Host', host_enc) 1033 else: 1034 host_enc = host_enc.decode("ascii") 1035 self.putheader('Host', "%s:%s" % (host_enc, self.port)) 1036 1037 # note: we are assuming that clients will not attempt to set these 1038 # headers since *this* library must deal with the 1039 # consequences. this also means that when the supporting 1040 # libraries are updated to recognize other forms, then this 1041 # code should be changed (removed or updated). 1042 1043 # we only want a Content-Encoding of "identity" since we don't 1044 # support encodings such as x-gzip or x-deflate. 1045 if not skip_accept_encoding: 1046 self.putheader('Accept-Encoding', 'identity') 1047 1048 # we can accept "chunked" Transfer-Encodings, but no others 1049 # NOTE: no TE header implies *only* "chunked" 1050 #self.putheader('TE', 'chunked') 1051 1052 # if TE is supplied in the header, then it must appear in a 1053 # Connection header. 1054 #self.putheader('Connection', 'TE') 1055 1056 else: 1057 # For HTTP/1.0, the server will assume "not chunked" 1058 pass 1059 1060 def putheader(self, header, *values): 1061 """Send a request header line to the server. 1062 1063 For example: h.putheader('Accept', 'text/html') 1064 """ 1065 if self.__state != _CS_REQ_STARTED: 1066 raise CannotSendHeader() 1067 1068 if hasattr(header, 'encode'): 1069 header = header.encode('ascii') 1070 values = list(values) 1071 for i, one_value in enumerate(values): 1072 if hasattr(one_value, 'encode'): 1073 values[i] = one_value.encode('latin-1') 1074 elif isinstance(one_value, int): 1075 values[i] = str(one_value).encode('ascii') 1076 value = bytes(b'\r\n\t').join(values) 1077 header = header + bytes(b': ') + value 1078 self._output(header) 1079 1080 def endheaders(self, message_body=None): 1081 """Indicate that the last header line has been sent to the server. 1082 1083 This method sends the request to the server. The optional message_body 1084 argument can be used to pass a message body associated with the 1085 request. The message body will be sent in the same packet as the 1086 message headers if it is a string, otherwise it is sent as a separate 1087 packet. 1088 """ 1089 if self.__state == _CS_REQ_STARTED: 1090 self.__state = _CS_REQ_SENT 1091 else: 1092 raise CannotSendHeader() 1093 self._send_output(message_body) 1094 1095 def request(self, method, url, body=None, headers={}): 1096 """Send a complete request to the server.""" 1097 self._send_request(method, url, body, headers) 1098 1099 def _set_content_length(self, body): 1100 # Set the content-length based on the body. 1101 thelen = None 1102 try: 1103 thelen = str(len(body)) 1104 except TypeError as te: 1105 # If this is a file-like object, try to 1106 # fstat its file descriptor 1107 try: 1108 thelen = str(os.fstat(body.fileno()).st_size) 1109 except (AttributeError, OSError): 1110 # Don't send a length if this failed 1111 if self.debuglevel > 0: print("Cannot stat!!") 1112 1113 if thelen is not None: 1114 self.putheader('Content-Length', thelen) 1115 1116 def _send_request(self, method, url, body, headers): 1117 # Honor explicitly requested Host: and Accept-Encoding: headers. 1118 header_names = dict.fromkeys([k.lower() for k in headers]) 1119 skips = {} 1120 if 'host' in header_names: 1121 skips['skip_host'] = 1 1122 if 'accept-encoding' in header_names: 1123 skips['skip_accept_encoding'] = 1 1124 1125 self.putrequest(method, url, **skips) 1126 1127 if body is not None and ('content-length' not in header_names): 1128 self._set_content_length(body) 1129 for hdr, value in headers.items(): 1130 self.putheader(hdr, value) 1131 if isinstance(body, str): 1132 # RFC 2616 Section 3.7.1 says that text default has a 1133 # default charset of iso-8859-1. 1134 body = body.encode('iso-8859-1') 1135 self.endheaders(body) 1136 1137 def getresponse(self): 1138 """Get the response from the server. 1139 1140 If the HTTPConnection is in the correct state, returns an 1141 instance of HTTPResponse or of whatever object is returned by 1142 class the response_class variable. 1143 1144 If a request has not been sent or if a previous response has 1145 not be handled, ResponseNotReady is raised. If the HTTP 1146 response indicates that the connection should be closed, then 1147 it will be closed before the response is returned. When the 1148 connection is closed, the underlying socket is closed. 1149 """ 1150 1151 # if a prior response has been completed, then forget about it. 1152 if self.__response and self.__response.isclosed(): 1153 self.__response = None 1154 1155 # if a prior response exists, then it must be completed (otherwise, we 1156 # cannot read this response's header to determine the connection-close 1157 # behavior) 1158 # 1159 # note: if a prior response existed, but was connection-close, then the 1160 # socket and response were made independent of this HTTPConnection 1161 # object since a new request requires that we open a whole new 1162 # connection 1163 # 1164 # this means the prior response had one of two states: 1165 # 1) will_close: this connection was reset and the prior socket and 1166 # response operate independently 1167 # 2) persistent: the response was retained and we await its 1168 # isclosed() status to become true. 1169 # 1170 if self.__state != _CS_REQ_SENT or self.__response: 1171 raise ResponseNotReady(self.__state) 1172 1173 if self.debuglevel > 0: 1174 response = self.response_class(self.sock, self.debuglevel, 1175 method=self._method) 1176 else: 1177 response = self.response_class(self.sock, method=self._method) 1178 1179 response.begin() 1180 assert response.will_close != _UNKNOWN 1181 self.__state = _CS_IDLE 1182 1183 if response.will_close: 1184 # this effectively passes the connection to the response 1185 self.close() 1186 else: 1187 # remember this, so we can tell when it is complete 1188 self.__response = response 1189 1190 return response 1191 1192try: 1193 import ssl 1194 from ssl import SSLContext 1195except ImportError: 1196 pass 1197else: 1198 class HTTPSConnection(HTTPConnection): 1199 "This class allows communication via SSL." 1200 1201 default_port = HTTPS_PORT 1202 1203 # XXX Should key_file and cert_file be deprecated in favour of context? 1204 1205 def __init__(self, host, port=None, key_file=None, cert_file=None, 1206 strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 1207 source_address=None, **_3to2kwargs): 1208 if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname'] 1209 else: check_hostname = None 1210 if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context'] 1211 else: context = None 1212 super(HTTPSConnection, self).__init__(host, port, strict, timeout, 1213 source_address) 1214 self.key_file = key_file 1215 self.cert_file = cert_file 1216 if context is None: 1217 # Some reasonable defaults 1218 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) 1219 context.options |= ssl.OP_NO_SSLv2 1220 will_verify = context.verify_mode != ssl.CERT_NONE 1221 if check_hostname is None: 1222 check_hostname = will_verify 1223 elif check_hostname and not will_verify: 1224 raise ValueError("check_hostname needs a SSL context with " 1225 "either CERT_OPTIONAL or CERT_REQUIRED") 1226 if key_file or cert_file: 1227 context.load_cert_chain(cert_file, key_file) 1228 self._context = context 1229 self._check_hostname = check_hostname 1230 1231 def connect(self): 1232 "Connect to a host on a given (SSL) port." 1233 1234 sock = socket_create_connection((self.host, self.port), 1235 self.timeout, self.source_address) 1236 1237 if self._tunnel_host: 1238 self.sock = sock 1239 self._tunnel() 1240 1241 server_hostname = self.host if ssl.HAS_SNI else None 1242 self.sock = self._context.wrap_socket(sock, 1243 server_hostname=server_hostname) 1244 try: 1245 if self._check_hostname: 1246 ssl.match_hostname(self.sock.getpeercert(), self.host) 1247 except Exception: 1248 self.sock.shutdown(socket.SHUT_RDWR) 1249 self.sock.close() 1250 raise 1251 1252 __all__.append("HTTPSConnection") 1253 1254 1255 # ###################################### 1256 # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext 1257 # # doesn't exist in the Py2.7 stdlib 1258 # class HTTPSConnection(HTTPConnection): 1259 # "This class allows communication via SSL." 1260 1261 # default_port = HTTPS_PORT 1262 1263 # def __init__(self, host, port=None, key_file=None, cert_file=None, 1264 # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 1265 # source_address=None): 1266 # HTTPConnection.__init__(self, host, port, strict, timeout, 1267 # source_address) 1268 # self.key_file = key_file 1269 # self.cert_file = cert_file 1270 1271 # def connect(self): 1272 # "Connect to a host on a given (SSL) port." 1273 1274 # sock = socket_create_connection((self.host, self.port), 1275 # self.timeout, self.source_address) 1276 # if self._tunnel_host: 1277 # self.sock = sock 1278 # self._tunnel() 1279 # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) 1280 1281 # __all__.append("HTTPSConnection") 1282 # ###################################### 1283 1284 1285class HTTPException(Exception): 1286 # Subclasses that define an __init__ must call Exception.__init__ 1287 # or define self.args. Otherwise, str() will fail. 1288 pass 1289 1290class NotConnected(HTTPException): 1291 pass 1292 1293class InvalidURL(HTTPException): 1294 pass 1295 1296class UnknownProtocol(HTTPException): 1297 def __init__(self, version): 1298 self.args = version, 1299 self.version = version 1300 1301class UnknownTransferEncoding(HTTPException): 1302 pass 1303 1304class UnimplementedFileMode(HTTPException): 1305 pass 1306 1307class IncompleteRead(HTTPException): 1308 def __init__(self, partial, expected=None): 1309 self.args = partial, 1310 self.partial = partial 1311 self.expected = expected 1312 def __repr__(self): 1313 if self.expected is not None: 1314 e = ', %i more expected' % self.expected 1315 else: 1316 e = '' 1317 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) 1318 def __str__(self): 1319 return repr(self) 1320 1321class ImproperConnectionState(HTTPException): 1322 pass 1323 1324class CannotSendRequest(ImproperConnectionState): 1325 pass 1326 1327class CannotSendHeader(ImproperConnectionState): 1328 pass 1329 1330class ResponseNotReady(ImproperConnectionState): 1331 pass 1332 1333class BadStatusLine(HTTPException): 1334 def __init__(self, line): 1335 if not line: 1336 line = repr(line) 1337 self.args = line, 1338 self.line = line 1339 1340class LineTooLong(HTTPException): 1341 def __init__(self, line_type): 1342 HTTPException.__init__(self, "got more than %d bytes when reading %s" 1343 % (_MAXLINE, line_type)) 1344 1345# for backwards compatibility 1346error = HTTPException 1347