1r"""HTTP/1.1 client library
2
3<intro stuff goes here>
4<other stuff, too>
5
6HTTPConnection goes through a number of "states", which define when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
9
10    (null)
11      |
12      | HTTPConnection()
13      v
14    Idle
15      |
16      | putrequest()
17      v
18    Request-started
19      |
20      | ( putheader() )*  endheaders()
21      v
22    Request-sent
23      |\_____________________________
24      |                              | getresponse() raises
25      | response = getresponse()     | ConnectionError
26      v                              v
27    Unread-response                Idle
28    [Response-headers-read]
29      |\____________________
30      |                     |
31      | response.read()     | putrequest()
32      v                     v
33    Idle                  Req-started-unread-response
34                     ______/|
35                   /        |
36   response.read() |        | ( putheader() )*  endheaders()
37                   v        v
38       Request-started    Req-sent-unread-response
39                            |
40                            | response.read()
41                            v
42                          Request-sent
43
44This diagram presents the following rules:
45  -- a second request may not be started until {response-headers-read}
46  -- a response [object] cannot be retrieved until {request-sent}
47  -- there is no differentiation between an unread response body and a
48     partially read response body
49
50Note: this enforcement is applied by the HTTPConnection class. The
51      HTTPResponse class does not enforce this state machine, which
52      implies sophisticated clients may accelerate the request/response
53      pipeline. Caution should be taken, though: accelerating the states
54      beyond the above pattern may imply knowledge of the server's
55      connection-close behavior for certain requests. For example, it
56      is impossible to tell whether the server will close the connection
57      UNTIL the response headers have been read; this means that further
58      requests cannot be placed into the pipeline until it is known that
59      the server will NOT be closing the connection.
60
61Logical State                  __state            __response
62-------------                  -------            ----------
63Idle                           _CS_IDLE           None
64Request-started                _CS_REQ_STARTED    None
65Request-sent                   _CS_REQ_SENT       None
66Unread-response                _CS_IDLE           <response_class>
67Req-started-unread-response    _CS_REQ_STARTED    <response_class>
68Req-sent-unread-response       _CS_REQ_SENT       <response_class>
69"""
70
71import email.parser
72import email.message
73import http
74import io
75import re
76import socket
77import collections.abc
78from urllib.parse import urlsplit
79
80# HTTPMessage, parse_headers(), and the HTTP status code constants are
81# intentionally omitted for simplicity
82__all__ = ["HTTPResponse", "HTTPConnection",
83           "HTTPException", "NotConnected", "UnknownProtocol",
84           "UnknownTransferEncoding", "UnimplementedFileMode",
85           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
86           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
87           "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
88           "responses"]
89
90HTTP_PORT = 80
91HTTPS_PORT = 443
92
93_UNKNOWN = 'UNKNOWN'
94
95# connection states
96_CS_IDLE = 'Idle'
97_CS_REQ_STARTED = 'Request-started'
98_CS_REQ_SENT = 'Request-sent'
99
100
101# hack to maintain backwards compatibility
102globals().update(http.HTTPStatus.__members__)
103
104# another hack to maintain backwards compatibility
105# Mapping status codes to official W3C names
106responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
107
108# maximal line length when calling readline().
109_MAXLINE = 65536
110_MAXHEADERS = 100
111
112# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
113#
114# VCHAR          = %x21-7E
115# obs-text       = %x80-FF
116# header-field   = field-name ":" OWS field-value OWS
117# field-name     = token
118# field-value    = *( field-content / obs-fold )
119# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
120# field-vchar    = VCHAR / obs-text
121#
122# obs-fold       = CRLF 1*( SP / HTAB )
123#                ; obsolete line folding
124#                ; see Section 3.2.4
125
126# token          = 1*tchar
127#
128# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
129#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
130#                / DIGIT / ALPHA
131#                ; any VCHAR, except delimiters
132#
133# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
134
135# the patterns for both name and value are more lenient than RFC
136# definitions to allow for backwards compatibility
137_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
138_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
139
140# These characters are not allowed within HTTP URL paths.
141#  See https://tools.ietf.org/html/rfc3986#section-3.3 and the
142#  https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
143# Prevents CVE-2019-9740.  Includes control characters such as \r\n.
144# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
145_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
146# Arguably only these _should_ allowed:
147#  _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
148# We are more lenient for assumed real world compatibility purposes.
149
150# These characters are not allowed within HTTP method names
151# to prevent http header injection.
152_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
153
154# We always set the Content-Length header for these methods because some
155# servers will otherwise respond with a 411
156_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
157
158
159def _encode(data, name='data'):
160    """Call data.encode("latin-1") but show a better error message."""
161    try:
162        return data.encode("latin-1")
163    except UnicodeEncodeError as err:
164        raise UnicodeEncodeError(
165            err.encoding,
166            err.object,
167            err.start,
168            err.end,
169            "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
170            "if you want to send it encoded in UTF-8." %
171            (name.title(), data[err.start:err.end], name)) from None
172
173
174class HTTPMessage(email.message.Message):
175    # XXX The only usage of this method is in
176    # http.server.CGIHTTPRequestHandler.  Maybe move the code there so
177    # that it doesn't need to be part of the public API.  The API has
178    # never been defined so this could cause backwards compatibility
179    # issues.
180
181    def getallmatchingheaders(self, name):
182        """Find all header lines matching a given header name.
183
184        Look through the list of headers and find all lines matching a given
185        header name (and their continuation lines).  A list of the lines is
186        returned, without interpretation.  If the header does not occur, an
187        empty list is returned.  If the header occurs multiple times, all
188        occurrences are returned.  Case is not important in the header name.
189
190        """
191        name = name.lower() + ':'
192        n = len(name)
193        lst = []
194        hit = 0
195        for line in self.keys():
196            if line[:n].lower() == name:
197                hit = 1
198            elif not line[:1].isspace():
199                hit = 0
200            if hit:
201                lst.append(line)
202        return lst
203
204def _read_headers(fp):
205    """Reads potential header lines into a list from a file pointer.
206
207    Length of line is limited by _MAXLINE, and number of
208    headers is limited by _MAXHEADERS.
209    """
210    headers = []
211    while True:
212        line = fp.readline(_MAXLINE + 1)
213        if len(line) > _MAXLINE:
214            raise LineTooLong("header line")
215        headers.append(line)
216        if len(headers) > _MAXHEADERS:
217            raise HTTPException("got more than %d headers" % _MAXHEADERS)
218        if line in (b'\r\n', b'\n', b''):
219            break
220    return headers
221
222def parse_headers(fp, _class=HTTPMessage):
223    """Parses only RFC2822 headers from a file pointer.
224
225    email Parser wants to see strings rather than bytes.
226    But a TextIOWrapper around self.rfile would buffer too many bytes
227    from the stream, bytes which we later need to read as bytes.
228    So we read the correct bytes here, as bytes, for email Parser
229    to parse.
230
231    """
232    headers = _read_headers(fp)
233    hstring = b''.join(headers).decode('iso-8859-1')
234    return email.parser.Parser(_class=_class).parsestr(hstring)
235
236
237class HTTPResponse(io.BufferedIOBase):
238
239    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
240
241    # The bytes from the socket object are iso-8859-1 strings.
242    # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
243    # text following RFC 2047.  The basic status line parsing only
244    # accepts iso-8859-1.
245
246    def __init__(self, sock, debuglevel=0, method=None, url=None):
247        # If the response includes a content-length header, we need to
248        # make sure that the client doesn't read more than the
249        # specified number of bytes.  If it does, it will block until
250        # the server times out and closes the connection.  This will
251        # happen if a self.fp.read() is done (without a size) whether
252        # self.fp is buffered or not.  So, no self.fp.read() by
253        # clients unless they know what they are doing.
254        self.fp = sock.makefile("rb")
255        self.debuglevel = debuglevel
256        self._method = method
257
258        # The HTTPResponse object is returned via urllib.  The clients
259        # of http and urllib expect different attributes for the
260        # headers.  headers is used here and supports urllib.  msg is
261        # provided as a backwards compatibility layer for http
262        # clients.
263
264        self.headers = self.msg = None
265
266        # from the Status-Line of the response
267        self.version = _UNKNOWN # HTTP-Version
268        self.status = _UNKNOWN  # Status-Code
269        self.reason = _UNKNOWN  # Reason-Phrase
270
271        self.chunked = _UNKNOWN         # is "chunked" being used?
272        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
273        self.length = _UNKNOWN          # number of bytes left in response
274        self.will_close = _UNKNOWN      # conn will close at end of response
275
276    def _read_status(self):
277        line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
278        if len(line) > _MAXLINE:
279            raise LineTooLong("status line")
280        if self.debuglevel > 0:
281            print("reply:", repr(line))
282        if not line:
283            # Presumably, the server closed the connection before
284            # sending a valid response.
285            raise RemoteDisconnected("Remote end closed connection without"
286                                     " response")
287        try:
288            version, status, reason = line.split(None, 2)
289        except ValueError:
290            try:
291                version, status = line.split(None, 1)
292                reason = ""
293            except ValueError:
294                # empty version will cause next test to fail.
295                version = ""
296        if not version.startswith("HTTP/"):
297            self._close_conn()
298            raise BadStatusLine(line)
299
300        # The status code is a three-digit number
301        try:
302            status = int(status)
303            if status < 100 or status > 999:
304                raise BadStatusLine(line)
305        except ValueError:
306            raise BadStatusLine(line)
307        return version, status, reason
308
309    def begin(self):
310        if self.headers is not None:
311            # we've already started reading the response
312            return
313
314        # read until we get a non-100 response
315        while True:
316            version, status, reason = self._read_status()
317            if status != CONTINUE:
318                break
319            # skip the header from the 100 response
320            skipped_headers = _read_headers(self.fp)
321            if self.debuglevel > 0:
322                print("headers:", skipped_headers)
323            del skipped_headers
324
325        self.code = self.status = status
326        self.reason = reason.strip()
327        if version in ("HTTP/1.0", "HTTP/0.9"):
328            # Some servers might still return "0.9", treat it as 1.0 anyway
329            self.version = 10
330        elif version.startswith("HTTP/1."):
331            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
332        else:
333            raise UnknownProtocol(version)
334
335        self.headers = self.msg = parse_headers(self.fp)
336
337        if self.debuglevel > 0:
338            for hdr, val in self.headers.items():
339                print("header:", hdr + ":", val)
340
341        # are we using the chunked-style of transfer encoding?
342        tr_enc = self.headers.get("transfer-encoding")
343        if tr_enc and tr_enc.lower() == "chunked":
344            self.chunked = True
345            self.chunk_left = None
346        else:
347            self.chunked = False
348
349        # will the connection close at the end of the response?
350        self.will_close = self._check_close()
351
352        # do we have a Content-Length?
353        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
354        self.length = None
355        length = self.headers.get("content-length")
356        if length and not self.chunked:
357            try:
358                self.length = int(length)
359            except ValueError:
360                self.length = None
361            else:
362                if self.length < 0:  # ignore nonsensical negative lengths
363                    self.length = None
364        else:
365            self.length = None
366
367        # does the body have a fixed length? (of zero)
368        if (status == NO_CONTENT or status == NOT_MODIFIED or
369            100 <= status < 200 or      # 1xx codes
370            self._method == "HEAD"):
371            self.length = 0
372
373        # if the connection remains open, and we aren't using chunked, and
374        # a content-length was not provided, then assume that the connection
375        # WILL close.
376        if (not self.will_close and
377            not self.chunked and
378            self.length is None):
379            self.will_close = True
380
381    def _check_close(self):
382        conn = self.headers.get("connection")
383        if self.version == 11:
384            # An HTTP/1.1 proxy is assumed to stay open unless
385            # explicitly closed.
386            if conn and "close" in conn.lower():
387                return True
388            return False
389
390        # Some HTTP/1.0 implementations have support for persistent
391        # connections, using rules different than HTTP/1.1.
392
393        # For older HTTP, Keep-Alive indicates persistent connection.
394        if self.headers.get("keep-alive"):
395            return False
396
397        # At least Akamai returns a "Connection: Keep-Alive" header,
398        # which was supposed to be sent by the client.
399        if conn and "keep-alive" in conn.lower():
400            return False
401
402        # Proxy-Connection is a netscape hack.
403        pconn = self.headers.get("proxy-connection")
404        if pconn and "keep-alive" in pconn.lower():
405            return False
406
407        # otherwise, assume it will close
408        return True
409
410    def _close_conn(self):
411        fp = self.fp
412        self.fp = None
413        fp.close()
414
415    def close(self):
416        try:
417            super().close() # set "closed" flag
418        finally:
419            if self.fp:
420                self._close_conn()
421
422    # These implementations are for the benefit of io.BufferedReader.
423
424    # XXX This class should probably be revised to act more like
425    # the "raw stream" that BufferedReader expects.
426
427    def flush(self):
428        super().flush()
429        if self.fp:
430            self.fp.flush()
431
432    def readable(self):
433        """Always returns True"""
434        return True
435
436    # End of "raw stream" methods
437
438    def isclosed(self):
439        """True if the connection is closed."""
440        # NOTE: it is possible that we will not ever call self.close(). This
441        #       case occurs when will_close is TRUE, length is None, and we
442        #       read up to the last byte, but NOT past it.
443        #
444        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
445        #          called, meaning self.isclosed() is meaningful.
446        return self.fp is None
447
448    def read(self, amt=None):
449        if self.fp is None:
450            return b""
451
452        if self._method == "HEAD":
453            self._close_conn()
454            return b""
455
456        if amt is not None:
457            # Amount is given, implement using readinto
458            b = bytearray(amt)
459            n = self.readinto(b)
460            return memoryview(b)[:n].tobytes()
461        else:
462            # Amount is not given (unbounded read) so we must check self.length
463            # and self.chunked
464
465            if self.chunked:
466                return self._readall_chunked()
467
468            if self.length is None:
469                s = self.fp.read()
470            else:
471                try:
472                    s = self._safe_read(self.length)
473                except IncompleteRead:
474                    self._close_conn()
475                    raise
476                self.length = 0
477            self._close_conn()        # we read everything
478            return s
479
480    def readinto(self, b):
481        """Read up to len(b) bytes into bytearray b and return the number
482        of bytes read.
483        """
484
485        if self.fp is None:
486            return 0
487
488        if self._method == "HEAD":
489            self._close_conn()
490            return 0
491
492        if self.chunked:
493            return self._readinto_chunked(b)
494
495        if self.length is not None:
496            if len(b) > self.length:
497                # clip the read to the "end of response"
498                b = memoryview(b)[0:self.length]
499
500        # we do not use _safe_read() here because this may be a .will_close
501        # connection, and the user is reading more bytes than will be provided
502        # (for example, reading in 1k chunks)
503        n = self.fp.readinto(b)
504        if not n and b:
505            # Ideally, we would raise IncompleteRead if the content-length
506            # wasn't satisfied, but it might break compatibility.
507            self._close_conn()
508        elif self.length is not None:
509            self.length -= n
510            if not self.length:
511                self._close_conn()
512        return n
513
514    def _read_next_chunk_size(self):
515        # Read the next chunk size from the file
516        line = self.fp.readline(_MAXLINE + 1)
517        if len(line) > _MAXLINE:
518            raise LineTooLong("chunk size")
519        i = line.find(b";")
520        if i >= 0:
521            line = line[:i] # strip chunk-extensions
522        try:
523            return int(line, 16)
524        except ValueError:
525            # close the connection as protocol synchronisation is
526            # probably lost
527            self._close_conn()
528            raise
529
530    def _read_and_discard_trailer(self):
531        # read and discard trailer up to the CRLF terminator
532        ### note: we shouldn't have any trailers!
533        while True:
534            line = self.fp.readline(_MAXLINE + 1)
535            if len(line) > _MAXLINE:
536                raise LineTooLong("trailer line")
537            if not line:
538                # a vanishingly small number of sites EOF without
539                # sending the trailer
540                break
541            if line in (b'\r\n', b'\n', b''):
542                break
543
544    def _get_chunk_left(self):
545        # return self.chunk_left, reading a new chunk if necessary.
546        # chunk_left == 0: at the end of the current chunk, need to close it
547        # chunk_left == None: No current chunk, should read next.
548        # This function returns non-zero or None if the last chunk has
549        # been read.
550        chunk_left = self.chunk_left
551        if not chunk_left: # Can be 0 or None
552            if chunk_left is not None:
553                # We are at the end of chunk, discard chunk end
554                self._safe_read(2)  # toss the CRLF at the end of the chunk
555            try:
556                chunk_left = self._read_next_chunk_size()
557            except ValueError:
558                raise IncompleteRead(b'')
559            if chunk_left == 0:
560                # last chunk: 1*("0") [ chunk-extension ] CRLF
561                self._read_and_discard_trailer()
562                # we read everything; close the "file"
563                self._close_conn()
564                chunk_left = None
565            self.chunk_left = chunk_left
566        return chunk_left
567
568    def _readall_chunked(self):
569        assert self.chunked != _UNKNOWN
570        value = []
571        try:
572            while True:
573                chunk_left = self._get_chunk_left()
574                if chunk_left is None:
575                    break
576                value.append(self._safe_read(chunk_left))
577                self.chunk_left = 0
578            return b''.join(value)
579        except IncompleteRead:
580            raise IncompleteRead(b''.join(value))
581
582    def _readinto_chunked(self, b):
583        assert self.chunked != _UNKNOWN
584        total_bytes = 0
585        mvb = memoryview(b)
586        try:
587            while True:
588                chunk_left = self._get_chunk_left()
589                if chunk_left is None:
590                    return total_bytes
591
592                if len(mvb) <= chunk_left:
593                    n = self._safe_readinto(mvb)
594                    self.chunk_left = chunk_left - n
595                    return total_bytes + n
596
597                temp_mvb = mvb[:chunk_left]
598                n = self._safe_readinto(temp_mvb)
599                mvb = mvb[n:]
600                total_bytes += n
601                self.chunk_left = 0
602
603        except IncompleteRead:
604            raise IncompleteRead(bytes(b[0:total_bytes]))
605
606    def _safe_read(self, amt):
607        """Read the number of bytes requested.
608
609        This function should be used when <amt> bytes "should" be present for
610        reading. If the bytes are truly not available (due to EOF), then the
611        IncompleteRead exception can be used to detect the problem.
612        """
613        data = self.fp.read(amt)
614        if len(data) < amt:
615            raise IncompleteRead(data, amt-len(data))
616        return data
617
618    def _safe_readinto(self, b):
619        """Same as _safe_read, but for reading into a buffer."""
620        amt = len(b)
621        n = self.fp.readinto(b)
622        if n < amt:
623            raise IncompleteRead(bytes(b[:n]), amt-n)
624        return n
625
626    def read1(self, n=-1):
627        """Read with at most one underlying system call.  If at least one
628        byte is buffered, return that instead.
629        """
630        if self.fp is None or self._method == "HEAD":
631            return b""
632        if self.chunked:
633            return self._read1_chunked(n)
634        if self.length is not None and (n < 0 or n > self.length):
635            n = self.length
636        result = self.fp.read1(n)
637        if not result and n:
638            self._close_conn()
639        elif self.length is not None:
640            self.length -= len(result)
641        return result
642
643    def peek(self, n=-1):
644        # Having this enables IOBase.readline() to read more than one
645        # byte at a time
646        if self.fp is None or self._method == "HEAD":
647            return b""
648        if self.chunked:
649            return self._peek_chunked(n)
650        return self.fp.peek(n)
651
652    def readline(self, limit=-1):
653        if self.fp is None or self._method == "HEAD":
654            return b""
655        if self.chunked:
656            # Fallback to IOBase readline which uses peek() and read()
657            return super().readline(limit)
658        if self.length is not None and (limit < 0 or limit > self.length):
659            limit = self.length
660        result = self.fp.readline(limit)
661        if not result and limit:
662            self._close_conn()
663        elif self.length is not None:
664            self.length -= len(result)
665        return result
666
667    def _read1_chunked(self, n):
668        # Strictly speaking, _get_chunk_left() may cause more than one read,
669        # but that is ok, since that is to satisfy the chunked protocol.
670        chunk_left = self._get_chunk_left()
671        if chunk_left is None or n == 0:
672            return b''
673        if not (0 <= n <= chunk_left):
674            n = chunk_left # if n is negative or larger than chunk_left
675        read = self.fp.read1(n)
676        self.chunk_left -= len(read)
677        if not read:
678            raise IncompleteRead(b"")
679        return read
680
681    def _peek_chunked(self, n):
682        # Strictly speaking, _get_chunk_left() may cause more than one read,
683        # but that is ok, since that is to satisfy the chunked protocol.
684        try:
685            chunk_left = self._get_chunk_left()
686        except IncompleteRead:
687            return b'' # peek doesn't worry about protocol
688        if chunk_left is None:
689            return b'' # eof
690        # peek is allowed to return more than requested.  Just request the
691        # entire chunk, and truncate what we get.
692        return self.fp.peek(chunk_left)[:chunk_left]
693
694    def fileno(self):
695        return self.fp.fileno()
696
697    def getheader(self, name, default=None):
698        '''Returns the value of the header matching *name*.
699
700        If there are multiple matching headers, the values are
701        combined into a single string separated by commas and spaces.
702
703        If no matching header is found, returns *default* or None if
704        the *default* is not specified.
705
706        If the headers are unknown, raises http.client.ResponseNotReady.
707
708        '''
709        if self.headers is None:
710            raise ResponseNotReady()
711        headers = self.headers.get_all(name) or default
712        if isinstance(headers, str) or not hasattr(headers, '__iter__'):
713            return headers
714        else:
715            return ', '.join(headers)
716
717    def getheaders(self):
718        """Return list of (header, value) tuples."""
719        if self.headers is None:
720            raise ResponseNotReady()
721        return list(self.headers.items())
722
723    # We override IOBase.__iter__ so that it doesn't check for closed-ness
724
725    def __iter__(self):
726        return self
727
728    # For compatibility with old-style urllib responses.
729
730    def info(self):
731        '''Returns an instance of the class mimetools.Message containing
732        meta-information associated with the URL.
733
734        When the method is HTTP, these headers are those returned by
735        the server at the head of the retrieved HTML page (including
736        Content-Length and Content-Type).
737
738        When the method is FTP, a Content-Length header will be
739        present if (as is now usual) the server passed back a file
740        length in response to the FTP retrieval request. A
741        Content-Type header will be present if the MIME type can be
742        guessed.
743
744        When the method is local-file, returned headers will include
745        a Date representing the file's last-modified time, a
746        Content-Length giving file size, and a Content-Type
747        containing a guess at the file's type. See also the
748        description of the mimetools module.
749
750        '''
751        return self.headers
752
753    def geturl(self):
754        '''Return the real URL of the page.
755
756        In some cases, the HTTP server redirects a client to another
757        URL. The urlopen() function handles this transparently, but in
758        some cases the caller needs to know which URL the client was
759        redirected to. The geturl() method can be used to get at this
760        redirected URL.
761
762        '''
763        return self.url
764
765    def getcode(self):
766        '''Return the HTTP status code that was sent with the response,
767        or None if the URL is not an HTTP URL.
768
769        '''
770        return self.status
771
772class HTTPConnection:
773
774    _http_vsn = 11
775    _http_vsn_str = 'HTTP/1.1'
776
777    response_class = HTTPResponse
778    default_port = HTTP_PORT
779    auto_open = 1
780    debuglevel = 0
781
782    @staticmethod
783    def _is_textIO(stream):
784        """Test whether a file-like object is a text or a binary stream.
785        """
786        return isinstance(stream, io.TextIOBase)
787
788    @staticmethod
789    def _get_content_length(body, method):
790        """Get the content-length based on the body.
791
792        If the body is None, we set Content-Length: 0 for methods that expect
793        a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
794        any method if the body is a str or bytes-like object and not a file.
795        """
796        if body is None:
797            # do an explicit check for not None here to distinguish
798            # between unset and set but empty
799            if method.upper() in _METHODS_EXPECTING_BODY:
800                return 0
801            else:
802                return None
803
804        if hasattr(body, 'read'):
805            # file-like object.
806            return None
807
808        try:
809            # does it implement the buffer protocol (bytes, bytearray, array)?
810            mv = memoryview(body)
811            return mv.nbytes
812        except TypeError:
813            pass
814
815        if isinstance(body, str):
816            return len(body)
817
818        return None
819
820    def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
821                 source_address=None, blocksize=8192):
822        self.timeout = timeout
823        self.source_address = source_address
824        self.blocksize = blocksize
825        self.sock = None
826        self._buffer = []
827        self.__response = None
828        self.__state = _CS_IDLE
829        self._method = None
830        self._tunnel_host = None
831        self._tunnel_port = None
832        self._tunnel_headers = {}
833
834        (self.host, self.port) = self._get_hostport(host, port)
835
836        self._validate_host(self.host)
837
838        # This is stored as an instance variable to allow unit
839        # tests to replace it with a suitable mockup
840        self._create_connection = socket.create_connection
841
842    def set_tunnel(self, host, port=None, headers=None):
843        """Set up host and port for HTTP CONNECT tunnelling.
844
845        In a connection that uses HTTP CONNECT tunneling, the host passed to the
846        constructor is used as a proxy server that relays all communication to
847        the endpoint passed to `set_tunnel`. This done by sending an HTTP
848        CONNECT request to the proxy server when the connection is established.
849
850        This method must be called before the HTTP connection has been
851        established.
852
853        The headers argument should be a mapping of extra HTTP headers to send
854        with the CONNECT request.
855        """
856
857        if self.sock:
858            raise RuntimeError("Can't set up tunnel for established connection")
859
860        self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
861        if headers:
862            self._tunnel_headers = headers
863        else:
864            self._tunnel_headers.clear()
865
866    def _get_hostport(self, host, port):
867        if port is None:
868            i = host.rfind(':')
869            j = host.rfind(']')         # ipv6 addresses have [...]
870            if i > j:
871                try:
872                    port = int(host[i+1:])
873                except ValueError:
874                    if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
875                        port = self.default_port
876                    else:
877                        raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
878                host = host[:i]
879            else:
880                port = self.default_port
881            if host and host[0] == '[' and host[-1] == ']':
882                host = host[1:-1]
883
884        return (host, port)
885
886    def set_debuglevel(self, level):
887        self.debuglevel = level
888
889    def _tunnel(self):
890        connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
891            self._tunnel_port)
892        connect_bytes = connect_str.encode("ascii")
893        self.send(connect_bytes)
894        for header, value in self._tunnel_headers.items():
895            header_str = "%s: %s\r\n" % (header, value)
896            header_bytes = header_str.encode("latin-1")
897            self.send(header_bytes)
898        self.send(b'\r\n')
899
900        response = self.response_class(self.sock, method=self._method)
901        (version, code, message) = response._read_status()
902
903        if code != http.HTTPStatus.OK:
904            self.close()
905            raise OSError("Tunnel connection failed: %d %s" % (code,
906                                                               message.strip()))
907        while True:
908            line = response.fp.readline(_MAXLINE + 1)
909            if len(line) > _MAXLINE:
910                raise LineTooLong("header line")
911            if not line:
912                # for sites which EOF without sending a trailer
913                break
914            if line in (b'\r\n', b'\n', b''):
915                break
916
917            if self.debuglevel > 0:
918                print('header:', line.decode())
919
920    def connect(self):
921        """Connect to the host and port specified in __init__."""
922        self.sock = self._create_connection(
923            (self.host,self.port), self.timeout, self.source_address)
924        self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
925
926        if self._tunnel_host:
927            self._tunnel()
928
929    def close(self):
930        """Close the connection to the HTTP server."""
931        self.__state = _CS_IDLE
932        try:
933            sock = self.sock
934            if sock:
935                self.sock = None
936                sock.close()   # close it manually... there may be other refs
937        finally:
938            response = self.__response
939            if response:
940                self.__response = None
941                response.close()
942
943    def send(self, data):
944        """Send `data' to the server.
945        ``data`` can be a string object, a bytes object, an array object, a
946        file-like object that supports a .read() method, or an iterable object.
947        """
948
949        if self.sock is None:
950            if self.auto_open:
951                self.connect()
952            else:
953                raise NotConnected()
954
955        if self.debuglevel > 0:
956            print("send:", repr(data))
957        if hasattr(data, "read") :
958            if self.debuglevel > 0:
959                print("sendIng a read()able")
960            encode = self._is_textIO(data)
961            if encode and self.debuglevel > 0:
962                print("encoding file using iso-8859-1")
963            while 1:
964                datablock = data.read(self.blocksize)
965                if not datablock:
966                    break
967                if encode:
968                    datablock = datablock.encode("iso-8859-1")
969                self.sock.sendall(datablock)
970            return
971        try:
972            self.sock.sendall(data)
973        except TypeError:
974            if isinstance(data, collections.abc.Iterable):
975                for d in data:
976                    self.sock.sendall(d)
977            else:
978                raise TypeError("data should be a bytes-like object "
979                                "or an iterable, got %r" % type(data))
980
981    def _output(self, s):
982        """Add a line of output to the current request buffer.
983
984        Assumes that the line does *not* end with \\r\\n.
985        """
986        self._buffer.append(s)
987
988    def _read_readable(self, readable):
989        if self.debuglevel > 0:
990            print("sendIng a read()able")
991        encode = self._is_textIO(readable)
992        if encode and self.debuglevel > 0:
993            print("encoding file using iso-8859-1")
994        while True:
995            datablock = readable.read(self.blocksize)
996            if not datablock:
997                break
998            if encode:
999                datablock = datablock.encode("iso-8859-1")
1000            yield datablock
1001
1002    def _send_output(self, message_body=None, encode_chunked=False):
1003        """Send the currently buffered request and clear the buffer.
1004
1005        Appends an extra \\r\\n to the buffer.
1006        A message_body may be specified, to be appended to the request.
1007        """
1008        self._buffer.extend((b"", b""))
1009        msg = b"\r\n".join(self._buffer)
1010        del self._buffer[:]
1011        self.send(msg)
1012
1013        if message_body is not None:
1014
1015            # create a consistent interface to message_body
1016            if hasattr(message_body, 'read'):
1017                # Let file-like take precedence over byte-like.  This
1018                # is needed to allow the current position of mmap'ed
1019                # files to be taken into account.
1020                chunks = self._read_readable(message_body)
1021            else:
1022                try:
1023                    # this is solely to check to see if message_body
1024                    # implements the buffer API.  it /would/ be easier
1025                    # to capture if PyObject_CheckBuffer was exposed
1026                    # to Python.
1027                    memoryview(message_body)
1028                except TypeError:
1029                    try:
1030                        chunks = iter(message_body)
1031                    except TypeError:
1032                        raise TypeError("message_body should be a bytes-like "
1033                                        "object or an iterable, got %r"
1034                                        % type(message_body))
1035                else:
1036                    # the object implements the buffer interface and
1037                    # can be passed directly into socket methods
1038                    chunks = (message_body,)
1039
1040            for chunk in chunks:
1041                if not chunk:
1042                    if self.debuglevel > 0:
1043                        print('Zero length chunk ignored')
1044                    continue
1045
1046                if encode_chunked and self._http_vsn == 11:
1047                    # chunked encoding
1048                    chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
1049                        + b'\r\n'
1050                self.send(chunk)
1051
1052            if encode_chunked and self._http_vsn == 11:
1053                # end chunked transfer
1054                self.send(b'0\r\n\r\n')
1055
1056    def putrequest(self, method, url, skip_host=False,
1057                   skip_accept_encoding=False):
1058        """Send a request to the server.
1059
1060        `method' specifies an HTTP request method, e.g. 'GET'.
1061        `url' specifies the object being requested, e.g. '/index.html'.
1062        `skip_host' if True does not add automatically a 'Host:' header
1063        `skip_accept_encoding' if True does not add automatically an
1064           'Accept-Encoding:' header
1065        """
1066
1067        # if a prior response has been completed, then forget about it.
1068        if self.__response and self.__response.isclosed():
1069            self.__response = None
1070
1071
1072        # in certain cases, we cannot issue another request on this connection.
1073        # this occurs when:
1074        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
1075        #   2) a response to a previous request has signalled that it is going
1076        #      to close the connection upon completion.
1077        #   3) the headers for the previous response have not been read, thus
1078        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
1079        #
1080        # if there is no prior response, then we can request at will.
1081        #
1082        # if point (2) is true, then we will have passed the socket to the
1083        # response (effectively meaning, "there is no prior response"), and
1084        # will open a new one when a new request is made.
1085        #
1086        # Note: if a prior response exists, then we *can* start a new request.
1087        #       We are not allowed to begin fetching the response to this new
1088        #       request, however, until that prior response is complete.
1089        #
1090        if self.__state == _CS_IDLE:
1091            self.__state = _CS_REQ_STARTED
1092        else:
1093            raise CannotSendRequest(self.__state)
1094
1095        self._validate_method(method)
1096
1097        # Save the method for use later in the response phase
1098        self._method = method
1099
1100        url = url or '/'
1101        self._validate_path(url)
1102
1103        request = '%s %s %s' % (method, url, self._http_vsn_str)
1104
1105        self._output(self._encode_request(request))
1106
1107        if self._http_vsn == 11:
1108            # Issue some standard headers for better HTTP/1.1 compliance
1109
1110            if not skip_host:
1111                # this header is issued *only* for HTTP/1.1
1112                # connections. more specifically, this means it is
1113                # only issued when the client uses the new
1114                # HTTPConnection() class. backwards-compat clients
1115                # will be using HTTP/1.0 and those clients may be
1116                # issuing this header themselves. we should NOT issue
1117                # it twice; some web servers (such as Apache) barf
1118                # when they see two Host: headers
1119
1120                # If we need a non-standard port,include it in the
1121                # header.  If the request is going through a proxy,
1122                # but the host of the actual URL, not the host of the
1123                # proxy.
1124
1125                netloc = ''
1126                if url.startswith('http'):
1127                    nil, netloc, nil, nil, nil = urlsplit(url)
1128
1129                if netloc:
1130                    try:
1131                        netloc_enc = netloc.encode("ascii")
1132                    except UnicodeEncodeError:
1133                        netloc_enc = netloc.encode("idna")
1134                    self.putheader('Host', netloc_enc)
1135                else:
1136                    if self._tunnel_host:
1137                        host = self._tunnel_host
1138                        port = self._tunnel_port
1139                    else:
1140                        host = self.host
1141                        port = self.port
1142
1143                    try:
1144                        host_enc = host.encode("ascii")
1145                    except UnicodeEncodeError:
1146                        host_enc = host.encode("idna")
1147
1148                    # As per RFC 273, IPv6 address should be wrapped with []
1149                    # when used as Host header
1150
1151                    if host.find(':') >= 0:
1152                        host_enc = b'[' + host_enc + b']'
1153
1154                    if port == self.default_port:
1155                        self.putheader('Host', host_enc)
1156                    else:
1157                        host_enc = host_enc.decode("ascii")
1158                        self.putheader('Host', "%s:%s" % (host_enc, port))
1159
1160            # note: we are assuming that clients will not attempt to set these
1161            #       headers since *this* library must deal with the
1162            #       consequences. this also means that when the supporting
1163            #       libraries are updated to recognize other forms, then this
1164            #       code should be changed (removed or updated).
1165
1166            # we only want a Content-Encoding of "identity" since we don't
1167            # support encodings such as x-gzip or x-deflate.
1168            if not skip_accept_encoding:
1169                self.putheader('Accept-Encoding', 'identity')
1170
1171            # we can accept "chunked" Transfer-Encodings, but no others
1172            # NOTE: no TE header implies *only* "chunked"
1173            #self.putheader('TE', 'chunked')
1174
1175            # if TE is supplied in the header, then it must appear in a
1176            # Connection header.
1177            #self.putheader('Connection', 'TE')
1178
1179        else:
1180            # For HTTP/1.0, the server will assume "not chunked"
1181            pass
1182
1183    def _encode_request(self, request):
1184        # ASCII also helps prevent CVE-2019-9740.
1185        return request.encode('ascii')
1186
1187    def _validate_method(self, method):
1188        """Validate a method name for putrequest."""
1189        # prevent http header injection
1190        match = _contains_disallowed_method_pchar_re.search(method)
1191        if match:
1192            raise ValueError(
1193                    f"method can't contain control characters. {method!r} "
1194                    f"(found at least {match.group()!r})")
1195
1196    def _validate_path(self, url):
1197        """Validate a url for putrequest."""
1198        # Prevent CVE-2019-9740.
1199        match = _contains_disallowed_url_pchar_re.search(url)
1200        if match:
1201            raise InvalidURL(f"URL can't contain control characters. {url!r} "
1202                             f"(found at least {match.group()!r})")
1203
1204    def _validate_host(self, host):
1205        """Validate a host so it doesn't contain control characters."""
1206        # Prevent CVE-2019-18348.
1207        match = _contains_disallowed_url_pchar_re.search(host)
1208        if match:
1209            raise InvalidURL(f"URL can't contain control characters. {host!r} "
1210                             f"(found at least {match.group()!r})")
1211
1212    def putheader(self, header, *values):
1213        """Send a request header line to the server.
1214
1215        For example: h.putheader('Accept', 'text/html')
1216        """
1217        if self.__state != _CS_REQ_STARTED:
1218            raise CannotSendHeader()
1219
1220        if hasattr(header, 'encode'):
1221            header = header.encode('ascii')
1222
1223        if not _is_legal_header_name(header):
1224            raise ValueError('Invalid header name %r' % (header,))
1225
1226        values = list(values)
1227        for i, one_value in enumerate(values):
1228            if hasattr(one_value, 'encode'):
1229                values[i] = one_value.encode('latin-1')
1230            elif isinstance(one_value, int):
1231                values[i] = str(one_value).encode('ascii')
1232
1233            if _is_illegal_header_value(values[i]):
1234                raise ValueError('Invalid header value %r' % (values[i],))
1235
1236        value = b'\r\n\t'.join(values)
1237        header = header + b': ' + value
1238        self._output(header)
1239
1240    def endheaders(self, message_body=None, *, encode_chunked=False):
1241        """Indicate that the last header line has been sent to the server.
1242
1243        This method sends the request to the server.  The optional message_body
1244        argument can be used to pass a message body associated with the
1245        request.
1246        """
1247        if self.__state == _CS_REQ_STARTED:
1248            self.__state = _CS_REQ_SENT
1249        else:
1250            raise CannotSendHeader()
1251        self._send_output(message_body, encode_chunked=encode_chunked)
1252
1253    def request(self, method, url, body=None, headers={}, *,
1254                encode_chunked=False):
1255        """Send a complete request to the server."""
1256        self._send_request(method, url, body, headers, encode_chunked)
1257
1258    def _send_request(self, method, url, body, headers, encode_chunked):
1259        # Honor explicitly requested Host: and Accept-Encoding: headers.
1260        header_names = frozenset(k.lower() for k in headers)
1261        skips = {}
1262        if 'host' in header_names:
1263            skips['skip_host'] = 1
1264        if 'accept-encoding' in header_names:
1265            skips['skip_accept_encoding'] = 1
1266
1267        self.putrequest(method, url, **skips)
1268
1269        # chunked encoding will happen if HTTP/1.1 is used and either
1270        # the caller passes encode_chunked=True or the following
1271        # conditions hold:
1272        # 1. content-length has not been explicitly set
1273        # 2. the body is a file or iterable, but not a str or bytes-like
1274        # 3. Transfer-Encoding has NOT been explicitly set by the caller
1275
1276        if 'content-length' not in header_names:
1277            # only chunk body if not explicitly set for backwards
1278            # compatibility, assuming the client code is already handling the
1279            # chunking
1280            if 'transfer-encoding' not in header_names:
1281                # if content-length cannot be automatically determined, fall
1282                # back to chunked encoding
1283                encode_chunked = False
1284                content_length = self._get_content_length(body, method)
1285                if content_length is None:
1286                    if body is not None:
1287                        if self.debuglevel > 0:
1288                            print('Unable to determine size of %r' % body)
1289                        encode_chunked = True
1290                        self.putheader('Transfer-Encoding', 'chunked')
1291                else:
1292                    self.putheader('Content-Length', str(content_length))
1293        else:
1294            encode_chunked = False
1295
1296        for hdr, value in headers.items():
1297            self.putheader(hdr, value)
1298        if isinstance(body, str):
1299            # RFC 2616 Section 3.7.1 says that text default has a
1300            # default charset of iso-8859-1.
1301            body = _encode(body, 'body')
1302        self.endheaders(body, encode_chunked=encode_chunked)
1303
1304    def getresponse(self):
1305        """Get the response from the server.
1306
1307        If the HTTPConnection is in the correct state, returns an
1308        instance of HTTPResponse or of whatever object is returned by
1309        the response_class variable.
1310
1311        If a request has not been sent or if a previous response has
1312        not be handled, ResponseNotReady is raised.  If the HTTP
1313        response indicates that the connection should be closed, then
1314        it will be closed before the response is returned.  When the
1315        connection is closed, the underlying socket is closed.
1316        """
1317
1318        # if a prior response has been completed, then forget about it.
1319        if self.__response and self.__response.isclosed():
1320            self.__response = None
1321
1322        # if a prior response exists, then it must be completed (otherwise, we
1323        # cannot read this response's header to determine the connection-close
1324        # behavior)
1325        #
1326        # note: if a prior response existed, but was connection-close, then the
1327        # socket and response were made independent of this HTTPConnection
1328        # object since a new request requires that we open a whole new
1329        # connection
1330        #
1331        # this means the prior response had one of two states:
1332        #   1) will_close: this connection was reset and the prior socket and
1333        #                  response operate independently
1334        #   2) persistent: the response was retained and we await its
1335        #                  isclosed() status to become true.
1336        #
1337        if self.__state != _CS_REQ_SENT or self.__response:
1338            raise ResponseNotReady(self.__state)
1339
1340        if self.debuglevel > 0:
1341            response = self.response_class(self.sock, self.debuglevel,
1342                                           method=self._method)
1343        else:
1344            response = self.response_class(self.sock, method=self._method)
1345
1346        try:
1347            try:
1348                response.begin()
1349            except ConnectionError:
1350                self.close()
1351                raise
1352            assert response.will_close != _UNKNOWN
1353            self.__state = _CS_IDLE
1354
1355            if response.will_close:
1356                # this effectively passes the connection to the response
1357                self.close()
1358            else:
1359                # remember this, so we can tell when it is complete
1360                self.__response = response
1361
1362            return response
1363        except:
1364            response.close()
1365            raise
1366
1367try:
1368    import ssl
1369except ImportError:
1370    pass
1371else:
1372    class HTTPSConnection(HTTPConnection):
1373        "This class allows communication via SSL."
1374
1375        default_port = HTTPS_PORT
1376
1377        # XXX Should key_file and cert_file be deprecated in favour of context?
1378
1379        def __init__(self, host, port=None, key_file=None, cert_file=None,
1380                     timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1381                     source_address=None, *, context=None,
1382                     check_hostname=None, blocksize=8192):
1383            super(HTTPSConnection, self).__init__(host, port, timeout,
1384                                                  source_address,
1385                                                  blocksize=blocksize)
1386            if (key_file is not None or cert_file is not None or
1387                        check_hostname is not None):
1388                import warnings
1389                warnings.warn("key_file, cert_file and check_hostname are "
1390                              "deprecated, use a custom context instead.",
1391                              DeprecationWarning, 2)
1392            self.key_file = key_file
1393            self.cert_file = cert_file
1394            if context is None:
1395                context = ssl._create_default_https_context()
1396                # enable PHA for TLS 1.3 connections if available
1397                if context.post_handshake_auth is not None:
1398                    context.post_handshake_auth = True
1399            will_verify = context.verify_mode != ssl.CERT_NONE
1400            if check_hostname is None:
1401                check_hostname = context.check_hostname
1402            if check_hostname and not will_verify:
1403                raise ValueError("check_hostname needs a SSL context with "
1404                                 "either CERT_OPTIONAL or CERT_REQUIRED")
1405            if key_file or cert_file:
1406                context.load_cert_chain(cert_file, key_file)
1407                # cert and key file means the user wants to authenticate.
1408                # enable TLS 1.3 PHA implicitly even for custom contexts.
1409                if context.post_handshake_auth is not None:
1410                    context.post_handshake_auth = True
1411            self._context = context
1412            if check_hostname is not None:
1413                self._context.check_hostname = check_hostname
1414
1415        def connect(self):
1416            "Connect to a host on a given (SSL) port."
1417
1418            super().connect()
1419
1420            if self._tunnel_host:
1421                server_hostname = self._tunnel_host
1422            else:
1423                server_hostname = self.host
1424
1425            self.sock = self._context.wrap_socket(self.sock,
1426                                                  server_hostname=server_hostname)
1427
1428    __all__.append("HTTPSConnection")
1429
1430class HTTPException(Exception):
1431    # Subclasses that define an __init__ must call Exception.__init__
1432    # or define self.args.  Otherwise, str() will fail.
1433    pass
1434
1435class NotConnected(HTTPException):
1436    pass
1437
1438class InvalidURL(HTTPException):
1439    pass
1440
1441class UnknownProtocol(HTTPException):
1442    def __init__(self, version):
1443        self.args = version,
1444        self.version = version
1445
1446class UnknownTransferEncoding(HTTPException):
1447    pass
1448
1449class UnimplementedFileMode(HTTPException):
1450    pass
1451
1452class IncompleteRead(HTTPException):
1453    def __init__(self, partial, expected=None):
1454        self.args = partial,
1455        self.partial = partial
1456        self.expected = expected
1457    def __repr__(self):
1458        if self.expected is not None:
1459            e = ', %i more expected' % self.expected
1460        else:
1461            e = ''
1462        return '%s(%i bytes read%s)' % (self.__class__.__name__,
1463                                        len(self.partial), e)
1464    __str__ = object.__str__
1465
1466class ImproperConnectionState(HTTPException):
1467    pass
1468
1469class CannotSendRequest(ImproperConnectionState):
1470    pass
1471
1472class CannotSendHeader(ImproperConnectionState):
1473    pass
1474
1475class ResponseNotReady(ImproperConnectionState):
1476    pass
1477
1478class BadStatusLine(HTTPException):
1479    def __init__(self, line):
1480        if not line:
1481            line = repr(line)
1482        self.args = line,
1483        self.line = line
1484
1485class LineTooLong(HTTPException):
1486    def __init__(self, line_type):
1487        HTTPException.__init__(self, "got more than %d bytes when reading %s"
1488                                     % (_MAXLINE, line_type))
1489
1490class RemoteDisconnected(ConnectionResetError, BadStatusLine):
1491    def __init__(self, *pos, **kw):
1492        BadStatusLine.__init__(self, "")
1493        ConnectionResetError.__init__(self, *pos, **kw)
1494
1495# for backwards compatibility
1496error = HTTPException
1497