1"""HTTP/1.1 client library
2
3A backport of the Python 3.3 http/client.py module for python-future.
4
5<intro stuff goes here>
6<other stuff, too>
7
8HTTPConnection goes through a number of "states", which define when a client
9may legally make another request or fetch the response for a particular
10request. This diagram details these state transitions:
11
12    (null)
13      |
14      | HTTPConnection()
15      v
16    Idle
17      |
18      | putrequest()
19      v
20    Request-started
21      |
22      | ( putheader() )*  endheaders()
23      v
24    Request-sent
25      |
26      | response = getresponse()
27      v
28    Unread-response   [Response-headers-read]
29      |\____________________
30      |                     |
31      | response.read()     | putrequest()
32      v                     v
33    Idle                  Req-started-unread-response
34                     ______/|
35                   /        |
36   response.read() |        | ( putheader() )*  endheaders()
37                   v        v
38       Request-started    Req-sent-unread-response
39                            |
40                            | response.read()
41                            v
42                          Request-sent
43
44This diagram presents the following rules:
45  -- a second request may not be started until {response-headers-read}
46  -- a response [object] cannot be retrieved until {request-sent}
47  -- there is no differentiation between an unread response body and a
48     partially read response body
49
50Note: this enforcement is applied by the HTTPConnection class. The
51      HTTPResponse class does not enforce this state machine, which
52      implies sophisticated clients may accelerate the request/response
53      pipeline. Caution should be taken, though: accelerating the states
54      beyond the above pattern may imply knowledge of the server's
55      connection-close behavior for certain requests. For example, it
56      is impossible to tell whether the server will close the connection
57      UNTIL the response headers have been read; this means that further
58      requests cannot be placed into the pipeline until it is known that
59      the server will NOT be closing the connection.
60
61Logical State                  __state            __response
62-------------                  -------            ----------
63Idle                           _CS_IDLE           None
64Request-started                _CS_REQ_STARTED    None
65Request-sent                   _CS_REQ_SENT       None
66Unread-response                _CS_IDLE           <response_class>
67Req-started-unread-response    _CS_REQ_STARTED    <response_class>
68Req-sent-unread-response       _CS_REQ_SENT       <response_class>
69"""
70
71from __future__ import (absolute_import, division,
72                        print_function, unicode_literals)
73from future.builtins import bytes, int, str, super
74from future.utils import PY2
75
76from future.backports.email import parser as email_parser
77from future.backports.email import message as email_message
78from future.backports.misc import create_connection as socket_create_connection
79import io
80import os
81import socket
82from future.backports.urllib.parse import urlsplit
83import warnings
84from array import array
85
86if PY2:
87    from collections import Iterable
88else:
89    from collections.abc import Iterable
90
91__all__ = ["HTTPResponse", "HTTPConnection",
92           "HTTPException", "NotConnected", "UnknownProtocol",
93           "UnknownTransferEncoding", "UnimplementedFileMode",
94           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
95           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
96           "BadStatusLine", "error", "responses"]
97
98HTTP_PORT = 80
99HTTPS_PORT = 443
100
101_UNKNOWN = 'UNKNOWN'
102
103# connection states
104_CS_IDLE = 'Idle'
105_CS_REQ_STARTED = 'Request-started'
106_CS_REQ_SENT = 'Request-sent'
107
108# status codes
109# informational
110CONTINUE = 100
111SWITCHING_PROTOCOLS = 101
112PROCESSING = 102
113
114# successful
115OK = 200
116CREATED = 201
117ACCEPTED = 202
118NON_AUTHORITATIVE_INFORMATION = 203
119NO_CONTENT = 204
120RESET_CONTENT = 205
121PARTIAL_CONTENT = 206
122MULTI_STATUS = 207
123IM_USED = 226
124
125# redirection
126MULTIPLE_CHOICES = 300
127MOVED_PERMANENTLY = 301
128FOUND = 302
129SEE_OTHER = 303
130NOT_MODIFIED = 304
131USE_PROXY = 305
132TEMPORARY_REDIRECT = 307
133
134# client error
135BAD_REQUEST = 400
136UNAUTHORIZED = 401
137PAYMENT_REQUIRED = 402
138FORBIDDEN = 403
139NOT_FOUND = 404
140METHOD_NOT_ALLOWED = 405
141NOT_ACCEPTABLE = 406
142PROXY_AUTHENTICATION_REQUIRED = 407
143REQUEST_TIMEOUT = 408
144CONFLICT = 409
145GONE = 410
146LENGTH_REQUIRED = 411
147PRECONDITION_FAILED = 412
148REQUEST_ENTITY_TOO_LARGE = 413
149REQUEST_URI_TOO_LONG = 414
150UNSUPPORTED_MEDIA_TYPE = 415
151REQUESTED_RANGE_NOT_SATISFIABLE = 416
152EXPECTATION_FAILED = 417
153UNPROCESSABLE_ENTITY = 422
154LOCKED = 423
155FAILED_DEPENDENCY = 424
156UPGRADE_REQUIRED = 426
157PRECONDITION_REQUIRED = 428
158TOO_MANY_REQUESTS = 429
159REQUEST_HEADER_FIELDS_TOO_LARGE = 431
160
161# server error
162INTERNAL_SERVER_ERROR = 500
163NOT_IMPLEMENTED = 501
164BAD_GATEWAY = 502
165SERVICE_UNAVAILABLE = 503
166GATEWAY_TIMEOUT = 504
167HTTP_VERSION_NOT_SUPPORTED = 505
168INSUFFICIENT_STORAGE = 507
169NOT_EXTENDED = 510
170NETWORK_AUTHENTICATION_REQUIRED = 511
171
172# Mapping status codes to official W3C names
173responses = {
174    100: 'Continue',
175    101: 'Switching Protocols',
176
177    200: 'OK',
178    201: 'Created',
179    202: 'Accepted',
180    203: 'Non-Authoritative Information',
181    204: 'No Content',
182    205: 'Reset Content',
183    206: 'Partial Content',
184
185    300: 'Multiple Choices',
186    301: 'Moved Permanently',
187    302: 'Found',
188    303: 'See Other',
189    304: 'Not Modified',
190    305: 'Use Proxy',
191    306: '(Unused)',
192    307: 'Temporary Redirect',
193
194    400: 'Bad Request',
195    401: 'Unauthorized',
196    402: 'Payment Required',
197    403: 'Forbidden',
198    404: 'Not Found',
199    405: 'Method Not Allowed',
200    406: 'Not Acceptable',
201    407: 'Proxy Authentication Required',
202    408: 'Request Timeout',
203    409: 'Conflict',
204    410: 'Gone',
205    411: 'Length Required',
206    412: 'Precondition Failed',
207    413: 'Request Entity Too Large',
208    414: 'Request-URI Too Long',
209    415: 'Unsupported Media Type',
210    416: 'Requested Range Not Satisfiable',
211    417: 'Expectation Failed',
212    428: 'Precondition Required',
213    429: 'Too Many Requests',
214    431: 'Request Header Fields Too Large',
215
216    500: 'Internal Server Error',
217    501: 'Not Implemented',
218    502: 'Bad Gateway',
219    503: 'Service Unavailable',
220    504: 'Gateway Timeout',
221    505: 'HTTP Version Not Supported',
222    511: 'Network Authentication Required',
223}
224
225# maximal amount of data to read at one time in _safe_read
226MAXAMOUNT = 1048576
227
228# maximal line length when calling readline().
229_MAXLINE = 65536
230_MAXHEADERS = 100
231
232
233class HTTPMessage(email_message.Message):
234    # XXX The only usage of this method is in
235    # http.server.CGIHTTPRequestHandler.  Maybe move the code there so
236    # that it doesn't need to be part of the public API.  The API has
237    # never been defined so this could cause backwards compatibility
238    # issues.
239
240    def getallmatchingheaders(self, name):
241        """Find all header lines matching a given header name.
242
243        Look through the list of headers and find all lines matching a given
244        header name (and their continuation lines).  A list of the lines is
245        returned, without interpretation.  If the header does not occur, an
246        empty list is returned.  If the header occurs multiple times, all
247        occurrences are returned.  Case is not important in the header name.
248
249        """
250        name = name.lower() + ':'
251        n = len(name)
252        lst = []
253        hit = 0
254        for line in self.keys():
255            if line[:n].lower() == name:
256                hit = 1
257            elif not line[:1].isspace():
258                hit = 0
259            if hit:
260                lst.append(line)
261        return lst
262
263def parse_headers(fp, _class=HTTPMessage):
264    """Parses only RFC2822 headers from a file pointer.
265
266    email Parser wants to see strings rather than bytes.
267    But a TextIOWrapper around self.rfile would buffer too many bytes
268    from the stream, bytes which we later need to read as bytes.
269    So we read the correct bytes here, as bytes, for email Parser
270    to parse.
271
272    """
273    headers = []
274    while True:
275        line = fp.readline(_MAXLINE + 1)
276        if len(line) > _MAXLINE:
277            raise LineTooLong("header line")
278        headers.append(line)
279        if len(headers) > _MAXHEADERS:
280            raise HTTPException("got more than %d headers" % _MAXHEADERS)
281        if line in (b'\r\n', b'\n', b''):
282            break
283    hstring = bytes(b'').join(headers).decode('iso-8859-1')
284    return email_parser.Parser(_class=_class).parsestr(hstring)
285
286
287_strict_sentinel = object()
288
289class HTTPResponse(io.RawIOBase):
290
291    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
292
293    # The bytes from the socket object are iso-8859-1 strings.
294    # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
295    # text following RFC 2047.  The basic status line parsing only
296    # accepts iso-8859-1.
297
298    def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None):
299        # If the response includes a content-length header, we need to
300        # make sure that the client doesn't read more than the
301        # specified number of bytes.  If it does, it will block until
302        # the server times out and closes the connection.  This will
303        # happen if a self.fp.read() is done (without a size) whether
304        # self.fp is buffered or not.  So, no self.fp.read() by
305        # clients unless they know what they are doing.
306        self.fp = sock.makefile("rb")
307        self.debuglevel = debuglevel
308        if strict is not _strict_sentinel:
309            warnings.warn("the 'strict' argument isn't supported anymore; "
310                "http.client now always assumes HTTP/1.x compliant servers.",
311                DeprecationWarning, 2)
312        self._method = method
313
314        # The HTTPResponse object is returned via urllib.  The clients
315        # of http and urllib expect different attributes for the
316        # headers.  headers is used here and supports urllib.  msg is
317        # provided as a backwards compatibility layer for http
318        # clients.
319
320        self.headers = self.msg = None
321
322        # from the Status-Line of the response
323        self.version = _UNKNOWN # HTTP-Version
324        self.status = _UNKNOWN  # Status-Code
325        self.reason = _UNKNOWN  # Reason-Phrase
326
327        self.chunked = _UNKNOWN         # is "chunked" being used?
328        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
329        self.length = _UNKNOWN          # number of bytes left in response
330        self.will_close = _UNKNOWN      # conn will close at end of response
331
332    def _read_status(self):
333        line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
334        if len(line) > _MAXLINE:
335            raise LineTooLong("status line")
336        if self.debuglevel > 0:
337            print("reply:", repr(line))
338        if not line:
339            # Presumably, the server closed the connection before
340            # sending a valid response.
341            raise BadStatusLine(line)
342        try:
343            version, status, reason = line.split(None, 2)
344        except ValueError:
345            try:
346                version, status = line.split(None, 1)
347                reason = ""
348            except ValueError:
349                # empty version will cause next test to fail.
350                version = ""
351        if not version.startswith("HTTP/"):
352            self._close_conn()
353            raise BadStatusLine(line)
354
355        # The status code is a three-digit number
356        try:
357            status = int(status)
358            if status < 100 or status > 999:
359                raise BadStatusLine(line)
360        except ValueError:
361            raise BadStatusLine(line)
362        return version, status, reason
363
364    def begin(self):
365        if self.headers is not None:
366            # we've already started reading the response
367            return
368
369        # read until we get a non-100 response
370        while True:
371            version, status, reason = self._read_status()
372            if status != CONTINUE:
373                break
374            # skip the header from the 100 response
375            while True:
376                skip = self.fp.readline(_MAXLINE + 1)
377                if len(skip) > _MAXLINE:
378                    raise LineTooLong("header line")
379                skip = skip.strip()
380                if not skip:
381                    break
382                if self.debuglevel > 0:
383                    print("header:", skip)
384
385        self.code = self.status = status
386        self.reason = reason.strip()
387        if version in ("HTTP/1.0", "HTTP/0.9"):
388            # Some servers might still return "0.9", treat it as 1.0 anyway
389            self.version = 10
390        elif version.startswith("HTTP/1."):
391            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
392        else:
393            raise UnknownProtocol(version)
394
395        self.headers = self.msg = parse_headers(self.fp)
396
397        if self.debuglevel > 0:
398            for hdr in self.headers:
399                print("header:", hdr, end=" ")
400
401        # are we using the chunked-style of transfer encoding?
402        tr_enc = self.headers.get("transfer-encoding")
403        if tr_enc and tr_enc.lower() == "chunked":
404            self.chunked = True
405            self.chunk_left = None
406        else:
407            self.chunked = False
408
409        # will the connection close at the end of the response?
410        self.will_close = self._check_close()
411
412        # do we have a Content-Length?
413        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
414        self.length = None
415        length = self.headers.get("content-length")
416
417         # are we using the chunked-style of transfer encoding?
418        tr_enc = self.headers.get("transfer-encoding")
419        if length and not self.chunked:
420            try:
421                self.length = int(length)
422            except ValueError:
423                self.length = None
424            else:
425                if self.length < 0:  # ignore nonsensical negative lengths
426                    self.length = None
427        else:
428            self.length = None
429
430        # does the body have a fixed length? (of zero)
431        if (status == NO_CONTENT or status == NOT_MODIFIED or
432            100 <= status < 200 or      # 1xx codes
433            self._method == "HEAD"):
434            self.length = 0
435
436        # if the connection remains open, and we aren't using chunked, and
437        # a content-length was not provided, then assume that the connection
438        # WILL close.
439        if (not self.will_close and
440            not self.chunked and
441            self.length is None):
442            self.will_close = True
443
444    def _check_close(self):
445        conn = self.headers.get("connection")
446        if self.version == 11:
447            # An HTTP/1.1 proxy is assumed to stay open unless
448            # explicitly closed.
449            conn = self.headers.get("connection")
450            if conn and "close" in conn.lower():
451                return True
452            return False
453
454        # Some HTTP/1.0 implementations have support for persistent
455        # connections, using rules different than HTTP/1.1.
456
457        # For older HTTP, Keep-Alive indicates persistent connection.
458        if self.headers.get("keep-alive"):
459            return False
460
461        # At least Akamai returns a "Connection: Keep-Alive" header,
462        # which was supposed to be sent by the client.
463        if conn and "keep-alive" in conn.lower():
464            return False
465
466        # Proxy-Connection is a netscape hack.
467        pconn = self.headers.get("proxy-connection")
468        if pconn and "keep-alive" in pconn.lower():
469            return False
470
471        # otherwise, assume it will close
472        return True
473
474    def _close_conn(self):
475        fp = self.fp
476        self.fp = None
477        fp.close()
478
479    def close(self):
480        super().close() # set "closed" flag
481        if self.fp:
482            self._close_conn()
483
484    # These implementations are for the benefit of io.BufferedReader.
485
486    # XXX This class should probably be revised to act more like
487    # the "raw stream" that BufferedReader expects.
488
489    def flush(self):
490        super().flush()
491        if self.fp:
492            self.fp.flush()
493
494    def readable(self):
495        return True
496
497    # End of "raw stream" methods
498
499    def isclosed(self):
500        """True if the connection is closed."""
501        # NOTE: it is possible that we will not ever call self.close(). This
502        #       case occurs when will_close is TRUE, length is None, and we
503        #       read up to the last byte, but NOT past it.
504        #
505        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
506        #          called, meaning self.isclosed() is meaningful.
507        return self.fp is None
508
509    def read(self, amt=None):
510        if self.fp is None:
511            return bytes(b"")
512
513        if self._method == "HEAD":
514            self._close_conn()
515            return bytes(b"")
516
517        if amt is not None:
518            # Amount is given, so call base class version
519            # (which is implemented in terms of self.readinto)
520            return bytes(super(HTTPResponse, self).read(amt))
521        else:
522            # Amount is not given (unbounded read) so we must check self.length
523            # and self.chunked
524
525            if self.chunked:
526                return self._readall_chunked()
527
528            if self.length is None:
529                s = self.fp.read()
530            else:
531                try:
532                    s = self._safe_read(self.length)
533                except IncompleteRead:
534                    self._close_conn()
535                    raise
536                self.length = 0
537            self._close_conn()        # we read everything
538            return bytes(s)
539
540    def readinto(self, b):
541        if self.fp is None:
542            return 0
543
544        if self._method == "HEAD":
545            self._close_conn()
546            return 0
547
548        if self.chunked:
549            return self._readinto_chunked(b)
550
551        if self.length is not None:
552            if len(b) > self.length:
553                # clip the read to the "end of response"
554                b = memoryview(b)[0:self.length]
555
556        # we do not use _safe_read() here because this may be a .will_close
557        # connection, and the user is reading more bytes than will be provided
558        # (for example, reading in 1k chunks)
559
560        if PY2:
561            data = self.fp.read(len(b))
562            n = len(data)
563            b[:n] = data
564        else:
565            n = self.fp.readinto(b)
566
567        if not n and b:
568            # Ideally, we would raise IncompleteRead if the content-length
569            # wasn't satisfied, but it might break compatibility.
570            self._close_conn()
571        elif self.length is not None:
572            self.length -= n
573            if not self.length:
574                self._close_conn()
575        return n
576
577    def _read_next_chunk_size(self):
578        # Read the next chunk size from the file
579        line = self.fp.readline(_MAXLINE + 1)
580        if len(line) > _MAXLINE:
581            raise LineTooLong("chunk size")
582        i = line.find(b";")
583        if i >= 0:
584            line = line[:i] # strip chunk-extensions
585        try:
586            return int(line, 16)
587        except ValueError:
588            # close the connection as protocol synchronisation is
589            # probably lost
590            self._close_conn()
591            raise
592
593    def _read_and_discard_trailer(self):
594        # read and discard trailer up to the CRLF terminator
595        ### note: we shouldn't have any trailers!
596        while True:
597            line = self.fp.readline(_MAXLINE + 1)
598            if len(line) > _MAXLINE:
599                raise LineTooLong("trailer line")
600            if not line:
601                # a vanishingly small number of sites EOF without
602                # sending the trailer
603                break
604            if line in (b'\r\n', b'\n', b''):
605                break
606
607    def _readall_chunked(self):
608        assert self.chunked != _UNKNOWN
609        chunk_left = self.chunk_left
610        value = []
611        while True:
612            if chunk_left is None:
613                try:
614                    chunk_left = self._read_next_chunk_size()
615                    if chunk_left == 0:
616                        break
617                except ValueError:
618                    raise IncompleteRead(bytes(b'').join(value))
619            value.append(self._safe_read(chunk_left))
620
621            # we read the whole chunk, get another
622            self._safe_read(2)      # toss the CRLF at the end of the chunk
623            chunk_left = None
624
625        self._read_and_discard_trailer()
626
627        # we read everything; close the "file"
628        self._close_conn()
629
630        return bytes(b'').join(value)
631
632    def _readinto_chunked(self, b):
633        assert self.chunked != _UNKNOWN
634        chunk_left = self.chunk_left
635
636        total_bytes = 0
637        mvb = memoryview(b)
638        while True:
639            if chunk_left is None:
640                try:
641                    chunk_left = self._read_next_chunk_size()
642                    if chunk_left == 0:
643                        break
644                except ValueError:
645                    raise IncompleteRead(bytes(b[0:total_bytes]))
646
647            if len(mvb) < chunk_left:
648                n = self._safe_readinto(mvb)
649                self.chunk_left = chunk_left - n
650                return total_bytes + n
651            elif len(mvb) == chunk_left:
652                n = self._safe_readinto(mvb)
653                self._safe_read(2)  # toss the CRLF at the end of the chunk
654                self.chunk_left = None
655                return total_bytes + n
656            else:
657                temp_mvb = mvb[0:chunk_left]
658                n = self._safe_readinto(temp_mvb)
659                mvb = mvb[n:]
660                total_bytes += n
661
662            # we read the whole chunk, get another
663            self._safe_read(2)      # toss the CRLF at the end of the chunk
664            chunk_left = None
665
666        self._read_and_discard_trailer()
667
668        # we read everything; close the "file"
669        self._close_conn()
670
671        return total_bytes
672
673    def _safe_read(self, amt):
674        """Read the number of bytes requested, compensating for partial reads.
675
676        Normally, we have a blocking socket, but a read() can be interrupted
677        by a signal (resulting in a partial read).
678
679        Note that we cannot distinguish between EOF and an interrupt when zero
680        bytes have been read. IncompleteRead() will be raised in this
681        situation.
682
683        This function should be used when <amt> bytes "should" be present for
684        reading. If the bytes are truly not available (due to EOF), then the
685        IncompleteRead exception can be used to detect the problem.
686        """
687        s = []
688        while amt > 0:
689            chunk = self.fp.read(min(amt, MAXAMOUNT))
690            if not chunk:
691                raise IncompleteRead(bytes(b'').join(s), amt)
692            s.append(chunk)
693            amt -= len(chunk)
694        return bytes(b"").join(s)
695
696    def _safe_readinto(self, b):
697        """Same as _safe_read, but for reading into a buffer."""
698        total_bytes = 0
699        mvb = memoryview(b)
700        while total_bytes < len(b):
701            if MAXAMOUNT < len(mvb):
702                temp_mvb = mvb[0:MAXAMOUNT]
703                if PY2:
704                    data = self.fp.read(len(temp_mvb))
705                    n = len(data)
706                    temp_mvb[:n] = data
707                else:
708                    n = self.fp.readinto(temp_mvb)
709            else:
710                if PY2:
711                    data = self.fp.read(len(mvb))
712                    n = len(data)
713                    mvb[:n] = data
714                else:
715                    n = self.fp.readinto(mvb)
716            if not n:
717                raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
718            mvb = mvb[n:]
719            total_bytes += n
720        return total_bytes
721
722    def fileno(self):
723        return self.fp.fileno()
724
725    def getheader(self, name, default=None):
726        if self.headers is None:
727            raise ResponseNotReady()
728        headers = self.headers.get_all(name) or default
729        if isinstance(headers, str) or not hasattr(headers, '__iter__'):
730            return headers
731        else:
732            return ', '.join(headers)
733
734    def getheaders(self):
735        """Return list of (header, value) tuples."""
736        if self.headers is None:
737            raise ResponseNotReady()
738        return list(self.headers.items())
739
740    # We override IOBase.__iter__ so that it doesn't check for closed-ness
741
742    def __iter__(self):
743        return self
744
745    # For compatibility with old-style urllib responses.
746
747    def info(self):
748        return self.headers
749
750    def geturl(self):
751        return self.url
752
753    def getcode(self):
754        return self.status
755
756class HTTPConnection(object):
757
758    _http_vsn = 11
759    _http_vsn_str = 'HTTP/1.1'
760
761    response_class = HTTPResponse
762    default_port = HTTP_PORT
763    auto_open = 1
764    debuglevel = 0
765
766    def __init__(self, host, port=None, strict=_strict_sentinel,
767                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
768        if strict is not _strict_sentinel:
769            warnings.warn("the 'strict' argument isn't supported anymore; "
770                "http.client now always assumes HTTP/1.x compliant servers.",
771                DeprecationWarning, 2)
772        self.timeout = timeout
773        self.source_address = source_address
774        self.sock = None
775        self._buffer = []
776        self.__response = None
777        self.__state = _CS_IDLE
778        self._method = None
779        self._tunnel_host = None
780        self._tunnel_port = None
781        self._tunnel_headers = {}
782
783        self._set_hostport(host, port)
784
785    def set_tunnel(self, host, port=None, headers=None):
786        """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
787
788        The headers argument should be a mapping of extra HTTP headers
789        to send with the CONNECT request.
790        """
791        self._tunnel_host = host
792        self._tunnel_port = port
793        if headers:
794            self._tunnel_headers = headers
795        else:
796            self._tunnel_headers.clear()
797
798    def _set_hostport(self, host, port):
799        if port is None:
800            i = host.rfind(':')
801            j = host.rfind(']')         # ipv6 addresses have [...]
802            if i > j:
803                try:
804                    port = int(host[i+1:])
805                except ValueError:
806                    if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
807                        port = self.default_port
808                    else:
809                        raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
810                host = host[:i]
811            else:
812                port = self.default_port
813            if host and host[0] == '[' and host[-1] == ']':
814                host = host[1:-1]
815        self.host = host
816        self.port = port
817
818    def set_debuglevel(self, level):
819        self.debuglevel = level
820
821    def _tunnel(self):
822        self._set_hostport(self._tunnel_host, self._tunnel_port)
823        connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)
824        connect_bytes = connect_str.encode("ascii")
825        self.send(connect_bytes)
826        for header, value in self._tunnel_headers.items():
827            header_str = "%s: %s\r\n" % (header, value)
828            header_bytes = header_str.encode("latin-1")
829            self.send(header_bytes)
830        self.send(bytes(b'\r\n'))
831
832        response = self.response_class(self.sock, method=self._method)
833        (version, code, message) = response._read_status()
834
835        if code != 200:
836            self.close()
837            raise socket.error("Tunnel connection failed: %d %s" % (code,
838                                                                    message.strip()))
839        while True:
840            line = response.fp.readline(_MAXLINE + 1)
841            if len(line) > _MAXLINE:
842                raise LineTooLong("header line")
843            if not line:
844                # for sites which EOF without sending a trailer
845                break
846            if line in (b'\r\n', b'\n', b''):
847                break
848
849    def connect(self):
850        """Connect to the host and port specified in __init__."""
851        self.sock = socket_create_connection((self.host,self.port),
852                                             self.timeout, self.source_address)
853        if self._tunnel_host:
854            self._tunnel()
855
856    def close(self):
857        """Close the connection to the HTTP server."""
858        if self.sock:
859            self.sock.close()   # close it manually... there may be other refs
860            self.sock = None
861        if self.__response:
862            self.__response.close()
863            self.__response = None
864        self.__state = _CS_IDLE
865
866    def send(self, data):
867        """Send `data' to the server.
868        ``data`` can be a string object, a bytes object, an array object, a
869        file-like object that supports a .read() method, or an iterable object.
870        """
871
872        if self.sock is None:
873            if self.auto_open:
874                self.connect()
875            else:
876                raise NotConnected()
877
878        if self.debuglevel > 0:
879            print("send:", repr(data))
880        blocksize = 8192
881        # Python 2.7 array objects have a read method which is incompatible
882        # with the 2-arg calling syntax below.
883        if hasattr(data, "read") and not isinstance(data, array):
884            if self.debuglevel > 0:
885                print("sendIng a read()able")
886            encode = False
887            try:
888                mode = data.mode
889            except AttributeError:
890                # io.BytesIO and other file-like objects don't have a `mode`
891                # attribute.
892                pass
893            else:
894                if "b" not in mode:
895                    encode = True
896                    if self.debuglevel > 0:
897                        print("encoding file using iso-8859-1")
898            while 1:
899                datablock = data.read(blocksize)
900                if not datablock:
901                    break
902                if encode:
903                    datablock = datablock.encode("iso-8859-1")
904                self.sock.sendall(datablock)
905            return
906        try:
907            self.sock.sendall(data)
908        except TypeError:
909            if isinstance(data, Iterable):
910                for d in data:
911                    self.sock.sendall(d)
912            else:
913                raise TypeError("data should be a bytes-like object "
914                                "or an iterable, got %r" % type(data))
915
916    def _output(self, s):
917        """Add a line of output to the current request buffer.
918
919        Assumes that the line does *not* end with \\r\\n.
920        """
921        self._buffer.append(s)
922
923    def _send_output(self, message_body=None):
924        """Send the currently buffered request and clear the buffer.
925
926        Appends an extra \\r\\n to the buffer.
927        A message_body may be specified, to be appended to the request.
928        """
929        self._buffer.extend((bytes(b""), bytes(b"")))
930        msg = bytes(b"\r\n").join(self._buffer)
931        del self._buffer[:]
932        # If msg and message_body are sent in a single send() call,
933        # it will avoid performance problems caused by the interaction
934        # between delayed ack and the Nagle algorithm.
935        if isinstance(message_body, bytes):
936            msg += message_body
937            message_body = None
938        self.send(msg)
939        if message_body is not None:
940            # message_body was not a string (i.e. it is a file), and
941            # we must run the risk of Nagle.
942            self.send(message_body)
943
944    def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
945        """Send a request to the server.
946
947        `method' specifies an HTTP request method, e.g. 'GET'.
948        `url' specifies the object being requested, e.g. '/index.html'.
949        `skip_host' if True does not add automatically a 'Host:' header
950        `skip_accept_encoding' if True does not add automatically an
951           'Accept-Encoding:' header
952        """
953
954        # if a prior response has been completed, then forget about it.
955        if self.__response and self.__response.isclosed():
956            self.__response = None
957
958
959        # in certain cases, we cannot issue another request on this connection.
960        # this occurs when:
961        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
962        #   2) a response to a previous request has signalled that it is going
963        #      to close the connection upon completion.
964        #   3) the headers for the previous response have not been read, thus
965        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
966        #
967        # if there is no prior response, then we can request at will.
968        #
969        # if point (2) is true, then we will have passed the socket to the
970        # response (effectively meaning, "there is no prior response"), and
971        # will open a new one when a new request is made.
972        #
973        # Note: if a prior response exists, then we *can* start a new request.
974        #       We are not allowed to begin fetching the response to this new
975        #       request, however, until that prior response is complete.
976        #
977        if self.__state == _CS_IDLE:
978            self.__state = _CS_REQ_STARTED
979        else:
980            raise CannotSendRequest(self.__state)
981
982        # Save the method we use, we need it later in the response phase
983        self._method = method
984        if not url:
985            url = '/'
986        request = '%s %s %s' % (method, url, self._http_vsn_str)
987
988        # Non-ASCII characters should have been eliminated earlier
989        self._output(request.encode('ascii'))
990
991        if self._http_vsn == 11:
992            # Issue some standard headers for better HTTP/1.1 compliance
993
994            if not skip_host:
995                # this header is issued *only* for HTTP/1.1
996                # connections. more specifically, this means it is
997                # only issued when the client uses the new
998                # HTTPConnection() class. backwards-compat clients
999                # will be using HTTP/1.0 and those clients may be
1000                # issuing this header themselves. we should NOT issue
1001                # it twice; some web servers (such as Apache) barf
1002                # when they see two Host: headers
1003
1004                # If we need a non-standard port,include it in the
1005                # header.  If the request is going through a proxy,
1006                # but the host of the actual URL, not the host of the
1007                # proxy.
1008
1009                netloc = ''
1010                if url.startswith('http'):
1011                    nil, netloc, nil, nil, nil = urlsplit(url)
1012
1013                if netloc:
1014                    try:
1015                        netloc_enc = netloc.encode("ascii")
1016                    except UnicodeEncodeError:
1017                        netloc_enc = netloc.encode("idna")
1018                    self.putheader('Host', netloc_enc)
1019                else:
1020                    try:
1021                        host_enc = self.host.encode("ascii")
1022                    except UnicodeEncodeError:
1023                        host_enc = self.host.encode("idna")
1024
1025                    # As per RFC 273, IPv6 address should be wrapped with []
1026                    # when used as Host header
1027
1028                    if self.host.find(':') >= 0:
1029                        host_enc = bytes(b'[' + host_enc + b']')
1030
1031                    if self.port == self.default_port:
1032                        self.putheader('Host', host_enc)
1033                    else:
1034                        host_enc = host_enc.decode("ascii")
1035                        self.putheader('Host', "%s:%s" % (host_enc, self.port))
1036
1037            # note: we are assuming that clients will not attempt to set these
1038            #       headers since *this* library must deal with the
1039            #       consequences. this also means that when the supporting
1040            #       libraries are updated to recognize other forms, then this
1041            #       code should be changed (removed or updated).
1042
1043            # we only want a Content-Encoding of "identity" since we don't
1044            # support encodings such as x-gzip or x-deflate.
1045            if not skip_accept_encoding:
1046                self.putheader('Accept-Encoding', 'identity')
1047
1048            # we can accept "chunked" Transfer-Encodings, but no others
1049            # NOTE: no TE header implies *only* "chunked"
1050            #self.putheader('TE', 'chunked')
1051
1052            # if TE is supplied in the header, then it must appear in a
1053            # Connection header.
1054            #self.putheader('Connection', 'TE')
1055
1056        else:
1057            # For HTTP/1.0, the server will assume "not chunked"
1058            pass
1059
1060    def putheader(self, header, *values):
1061        """Send a request header line to the server.
1062
1063        For example: h.putheader('Accept', 'text/html')
1064        """
1065        if self.__state != _CS_REQ_STARTED:
1066            raise CannotSendHeader()
1067
1068        if hasattr(header, 'encode'):
1069            header = header.encode('ascii')
1070        values = list(values)
1071        for i, one_value in enumerate(values):
1072            if hasattr(one_value, 'encode'):
1073                values[i] = one_value.encode('latin-1')
1074            elif isinstance(one_value, int):
1075                values[i] = str(one_value).encode('ascii')
1076        value = bytes(b'\r\n\t').join(values)
1077        header = header + bytes(b': ') + value
1078        self._output(header)
1079
1080    def endheaders(self, message_body=None):
1081        """Indicate that the last header line has been sent to the server.
1082
1083        This method sends the request to the server.  The optional message_body
1084        argument can be used to pass a message body associated with the
1085        request.  The message body will be sent in the same packet as the
1086        message headers if it is a string, otherwise it is sent as a separate
1087        packet.
1088        """
1089        if self.__state == _CS_REQ_STARTED:
1090            self.__state = _CS_REQ_SENT
1091        else:
1092            raise CannotSendHeader()
1093        self._send_output(message_body)
1094
1095    def request(self, method, url, body=None, headers={}):
1096        """Send a complete request to the server."""
1097        self._send_request(method, url, body, headers)
1098
1099    def _set_content_length(self, body):
1100        # Set the content-length based on the body.
1101        thelen = None
1102        try:
1103            thelen = str(len(body))
1104        except TypeError as te:
1105            # If this is a file-like object, try to
1106            # fstat its file descriptor
1107            try:
1108                thelen = str(os.fstat(body.fileno()).st_size)
1109            except (AttributeError, OSError):
1110                # Don't send a length if this failed
1111                if self.debuglevel > 0: print("Cannot stat!!")
1112
1113        if thelen is not None:
1114            self.putheader('Content-Length', thelen)
1115
1116    def _send_request(self, method, url, body, headers):
1117        # Honor explicitly requested Host: and Accept-Encoding: headers.
1118        header_names = dict.fromkeys([k.lower() for k in headers])
1119        skips = {}
1120        if 'host' in header_names:
1121            skips['skip_host'] = 1
1122        if 'accept-encoding' in header_names:
1123            skips['skip_accept_encoding'] = 1
1124
1125        self.putrequest(method, url, **skips)
1126
1127        if body is not None and ('content-length' not in header_names):
1128            self._set_content_length(body)
1129        for hdr, value in headers.items():
1130            self.putheader(hdr, value)
1131        if isinstance(body, str):
1132            # RFC 2616 Section 3.7.1 says that text default has a
1133            # default charset of iso-8859-1.
1134            body = body.encode('iso-8859-1')
1135        self.endheaders(body)
1136
1137    def getresponse(self):
1138        """Get the response from the server.
1139
1140        If the HTTPConnection is in the correct state, returns an
1141        instance of HTTPResponse or of whatever object is returned by
1142        class the response_class variable.
1143
1144        If a request has not been sent or if a previous response has
1145        not be handled, ResponseNotReady is raised.  If the HTTP
1146        response indicates that the connection should be closed, then
1147        it will be closed before the response is returned.  When the
1148        connection is closed, the underlying socket is closed.
1149        """
1150
1151        # if a prior response has been completed, then forget about it.
1152        if self.__response and self.__response.isclosed():
1153            self.__response = None
1154
1155        # if a prior response exists, then it must be completed (otherwise, we
1156        # cannot read this response's header to determine the connection-close
1157        # behavior)
1158        #
1159        # note: if a prior response existed, but was connection-close, then the
1160        # socket and response were made independent of this HTTPConnection
1161        # object since a new request requires that we open a whole new
1162        # connection
1163        #
1164        # this means the prior response had one of two states:
1165        #   1) will_close: this connection was reset and the prior socket and
1166        #                  response operate independently
1167        #   2) persistent: the response was retained and we await its
1168        #                  isclosed() status to become true.
1169        #
1170        if self.__state != _CS_REQ_SENT or self.__response:
1171            raise ResponseNotReady(self.__state)
1172
1173        if self.debuglevel > 0:
1174            response = self.response_class(self.sock, self.debuglevel,
1175                                           method=self._method)
1176        else:
1177            response = self.response_class(self.sock, method=self._method)
1178
1179        response.begin()
1180        assert response.will_close != _UNKNOWN
1181        self.__state = _CS_IDLE
1182
1183        if response.will_close:
1184            # this effectively passes the connection to the response
1185            self.close()
1186        else:
1187            # remember this, so we can tell when it is complete
1188            self.__response = response
1189
1190        return response
1191
1192try:
1193    import ssl
1194    from ssl import SSLContext
1195except ImportError:
1196    pass
1197else:
1198    class HTTPSConnection(HTTPConnection):
1199        "This class allows communication via SSL."
1200
1201        default_port = HTTPS_PORT
1202
1203        # XXX Should key_file and cert_file be deprecated in favour of context?
1204
1205        def __init__(self, host, port=None, key_file=None, cert_file=None,
1206                     strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1207                     source_address=None, **_3to2kwargs):
1208            if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname']
1209            else: check_hostname = None
1210            if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context']
1211            else: context = None
1212            super(HTTPSConnection, self).__init__(host, port, strict, timeout,
1213                                                  source_address)
1214            self.key_file = key_file
1215            self.cert_file = cert_file
1216            if context is None:
1217                # Some reasonable defaults
1218                context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
1219                context.options |= ssl.OP_NO_SSLv2
1220            will_verify = context.verify_mode != ssl.CERT_NONE
1221            if check_hostname is None:
1222                check_hostname = will_verify
1223            elif check_hostname and not will_verify:
1224                raise ValueError("check_hostname needs a SSL context with "
1225                                 "either CERT_OPTIONAL or CERT_REQUIRED")
1226            if key_file or cert_file:
1227                context.load_cert_chain(cert_file, key_file)
1228            self._context = context
1229            self._check_hostname = check_hostname
1230
1231        def connect(self):
1232            "Connect to a host on a given (SSL) port."
1233
1234            sock = socket_create_connection((self.host, self.port),
1235                                            self.timeout, self.source_address)
1236
1237            if self._tunnel_host:
1238                self.sock = sock
1239                self._tunnel()
1240
1241            server_hostname = self.host if ssl.HAS_SNI else None
1242            self.sock = self._context.wrap_socket(sock,
1243                                                  server_hostname=server_hostname)
1244            try:
1245                if self._check_hostname:
1246                    ssl.match_hostname(self.sock.getpeercert(), self.host)
1247            except Exception:
1248                self.sock.shutdown(socket.SHUT_RDWR)
1249                self.sock.close()
1250                raise
1251
1252    __all__.append("HTTPSConnection")
1253
1254
1255    # ######################################
1256    # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext
1257    # # doesn't exist in the Py2.7 stdlib
1258    # class HTTPSConnection(HTTPConnection):
1259    #     "This class allows communication via SSL."
1260
1261    #     default_port = HTTPS_PORT
1262
1263    #     def __init__(self, host, port=None, key_file=None, cert_file=None,
1264    #                  strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1265    #                  source_address=None):
1266    #         HTTPConnection.__init__(self, host, port, strict, timeout,
1267    #                                 source_address)
1268    #         self.key_file = key_file
1269    #         self.cert_file = cert_file
1270
1271    #     def connect(self):
1272    #         "Connect to a host on a given (SSL) port."
1273
1274    #         sock = socket_create_connection((self.host, self.port),
1275    #                                         self.timeout, self.source_address)
1276    #         if self._tunnel_host:
1277    #             self.sock = sock
1278    #             self._tunnel()
1279    #         self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1280
1281    # __all__.append("HTTPSConnection")
1282    # ######################################
1283
1284
1285class HTTPException(Exception):
1286    # Subclasses that define an __init__ must call Exception.__init__
1287    # or define self.args.  Otherwise, str() will fail.
1288    pass
1289
1290class NotConnected(HTTPException):
1291    pass
1292
1293class InvalidURL(HTTPException):
1294    pass
1295
1296class UnknownProtocol(HTTPException):
1297    def __init__(self, version):
1298        self.args = version,
1299        self.version = version
1300
1301class UnknownTransferEncoding(HTTPException):
1302    pass
1303
1304class UnimplementedFileMode(HTTPException):
1305    pass
1306
1307class IncompleteRead(HTTPException):
1308    def __init__(self, partial, expected=None):
1309        self.args = partial,
1310        self.partial = partial
1311        self.expected = expected
1312    def __repr__(self):
1313        if self.expected is not None:
1314            e = ', %i more expected' % self.expected
1315        else:
1316            e = ''
1317        return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1318    def __str__(self):
1319        return repr(self)
1320
1321class ImproperConnectionState(HTTPException):
1322    pass
1323
1324class CannotSendRequest(ImproperConnectionState):
1325    pass
1326
1327class CannotSendHeader(ImproperConnectionState):
1328    pass
1329
1330class ResponseNotReady(ImproperConnectionState):
1331    pass
1332
1333class BadStatusLine(HTTPException):
1334    def __init__(self, line):
1335        if not line:
1336            line = repr(line)
1337        self.args = line,
1338        self.line = line
1339
1340class LineTooLong(HTTPException):
1341    def __init__(self, line_type):
1342        HTTPException.__init__(self, "got more than %d bytes when reading %s"
1343                                     % (_MAXLINE, line_type))
1344
1345# for backwards compatibility
1346error = HTTPException
1347