1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106import contextlib
107from functools import partial
108
109from http import HTTPStatus
110
111
112# Default error message template
113DEFAULT_ERROR_MESSAGE = """\
114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
115        "http://www.w3.org/TR/html4/strict.dtd">
116<html>
117    <head>
118        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
119        <title>Error response</title>
120    </head>
121    <body>
122        <h1>Error response</h1>
123        <p>Error code: %(code)d</p>
124        <p>Message: %(message)s.</p>
125        <p>Error code explanation: %(code)s - %(explain)s.</p>
126    </body>
127</html>
128"""
129
130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
131
132class HTTPServer(socketserver.TCPServer):
133
134    allow_reuse_address = 1    # Seems to make sense in testing environment
135
136    def server_bind(self):
137        """Override server_bind to store the server name."""
138        socketserver.TCPServer.server_bind(self)
139        host, port = self.server_address[:2]
140        self.server_name = socket.getfqdn(host)
141        self.server_port = port
142
143
144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
145    daemon_threads = True
146
147
148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
149
150    """HTTP request handler base class.
151
152    The following explanation of HTTP serves to guide you through the
153    code as well as to expose any misunderstandings I may have about
154    HTTP (so you don't need to read the code to figure out I'm wrong
155    :-).
156
157    HTTP (HyperText Transfer Protocol) is an extensible protocol on
158    top of a reliable stream transport (e.g. TCP/IP).  The protocol
159    recognizes three parts to a request:
160
161    1. One line identifying the request type and path
162    2. An optional set of RFC-822-style headers
163    3. An optional data part
164
165    The headers and data are separated by a blank line.
166
167    The first line of the request has the form
168
169    <command> <path> <version>
170
171    where <command> is a (case-sensitive) keyword such as GET or POST,
172    <path> is a string containing path information for the request,
173    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
174    <path> is encoded using the URL encoding scheme (using %xx to signify
175    the ASCII character with hex code xx).
176
177    The specification specifies that lines are separated by CRLF but
178    for compatibility with the widest range of clients recommends
179    servers also handle LF.  Similarly, whitespace in the request line
180    is treated sensibly (allowing multiple spaces between components
181    and allowing trailing whitespace).
182
183    Similarly, for output, lines ought to be separated by CRLF pairs
184    but most clients grok LF characters just fine.
185
186    If the first line of the request has the form
187
188    <command> <path>
189
190    (i.e. <version> is left out) then this is assumed to be an HTTP
191    0.9 request; this form has no optional headers and data part and
192    the reply consists of just the data.
193
194    The reply form of the HTTP 1.x protocol again has three parts:
195
196    1. One line giving the response code
197    2. An optional set of RFC-822-style headers
198    3. The data
199
200    Again, the headers and data are separated by a blank line.
201
202    The response code line has the form
203
204    <version> <responsecode> <responsestring>
205
206    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
207    <responsecode> is a 3-digit response code indicating success or
208    failure of the request, and <responsestring> is an optional
209    human-readable string explaining what the response code means.
210
211    This server parses the request and the headers, and then calls a
212    function specific to the request type (<command>).  Specifically,
213    a request SPAM will be handled by a method do_SPAM().  If no
214    such method exists the server sends an error response to the
215    client.  If it exists, it is called with no arguments:
216
217    do_SPAM()
218
219    Note that the request name is case sensitive (i.e. SPAM and spam
220    are different requests).
221
222    The various request details are stored in instance variables:
223
224    - client_address is the client IP address in the form (host,
225    port);
226
227    - command, path and version are the broken-down request line;
228
229    - headers is an instance of email.message.Message (or a derived
230    class) containing the header information;
231
232    - rfile is a file object open for reading positioned at the
233    start of the optional input data part;
234
235    - wfile is a file object open for writing.
236
237    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
238
239    The first thing to be written must be the response line.  Then
240    follow 0 or more header lines, then a blank line, and then the
241    actual data (if any).  The meaning of the header lines depends on
242    the command executed by the server; in most cases, when data is
243    returned, there should be at least one header line of the form
244
245    Content-type: <type>/<subtype>
246
247    where <type> and <subtype> should be registered MIME types,
248    e.g. "text/html" or "text/plain".
249
250    """
251
252    # The Python system version, truncated to its first component.
253    sys_version = "Python/" + sys.version.split()[0]
254
255    # The server software version.  You may want to override this.
256    # The format is multiple whitespace-separated strings,
257    # where each string is of the form name[/version].
258    server_version = "BaseHTTP/" + __version__
259
260    error_message_format = DEFAULT_ERROR_MESSAGE
261    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
262
263    # The default request version.  This only affects responses up until
264    # the point where the request line is parsed, so it mainly decides what
265    # the client gets back when sending a malformed request line.
266    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
267    default_request_version = "HTTP/0.9"
268
269    def parse_request(self):
270        """Parse a request (internal).
271
272        The request should be stored in self.raw_requestline; the results
273        are in self.command, self.path, self.request_version and
274        self.headers.
275
276        Return True for success, False for failure; on failure, any relevant
277        error response has already been sent back.
278
279        """
280        self.command = None  # set in case of error on the first line
281        self.request_version = version = self.default_request_version
282        self.close_connection = True
283        requestline = str(self.raw_requestline, 'iso-8859-1')
284        requestline = requestline.rstrip('\r\n')
285        self.requestline = requestline
286        words = requestline.split()
287        if len(words) == 0:
288            return False
289
290        if len(words) >= 3:  # Enough to determine protocol version
291            version = words[-1]
292            try:
293                if not version.startswith('HTTP/'):
294                    raise ValueError
295                base_version_number = version.split('/', 1)[1]
296                version_number = base_version_number.split(".")
297                # RFC 2145 section 3.1 says there can be only one "." and
298                #   - major and minor numbers MUST be treated as
299                #      separate integers;
300                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
301                #      turn is lower than HTTP/12.3;
302                #   - Leading zeros MUST be ignored by recipients.
303                if len(version_number) != 2:
304                    raise ValueError
305                version_number = int(version_number[0]), int(version_number[1])
306            except (ValueError, IndexError):
307                self.send_error(
308                    HTTPStatus.BAD_REQUEST,
309                    "Bad request version (%r)" % version)
310                return False
311            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
312                self.close_connection = False
313            if version_number >= (2, 0):
314                self.send_error(
315                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
316                    "Invalid HTTP version (%s)" % base_version_number)
317                return False
318            self.request_version = version
319
320        if not 2 <= len(words) <= 3:
321            self.send_error(
322                HTTPStatus.BAD_REQUEST,
323                "Bad request syntax (%r)" % requestline)
324            return False
325        command, path = words[:2]
326        if len(words) == 2:
327            self.close_connection = True
328            if command != 'GET':
329                self.send_error(
330                    HTTPStatus.BAD_REQUEST,
331                    "Bad HTTP/0.9 request type (%r)" % command)
332                return False
333        self.command, self.path = command, path
334
335        # Examine the headers and look for a Connection directive.
336        try:
337            self.headers = http.client.parse_headers(self.rfile,
338                                                     _class=self.MessageClass)
339        except http.client.LineTooLong as err:
340            self.send_error(
341                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
342                "Line too long",
343                str(err))
344            return False
345        except http.client.HTTPException as err:
346            self.send_error(
347                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
348                "Too many headers",
349                str(err)
350            )
351            return False
352
353        conntype = self.headers.get('Connection', "")
354        if conntype.lower() == 'close':
355            self.close_connection = True
356        elif (conntype.lower() == 'keep-alive' and
357              self.protocol_version >= "HTTP/1.1"):
358            self.close_connection = False
359        # Examine the headers and look for an Expect directive
360        expect = self.headers.get('Expect', "")
361        if (expect.lower() == "100-continue" and
362                self.protocol_version >= "HTTP/1.1" and
363                self.request_version >= "HTTP/1.1"):
364            if not self.handle_expect_100():
365                return False
366        return True
367
368    def handle_expect_100(self):
369        """Decide what to do with an "Expect: 100-continue" header.
370
371        If the client is expecting a 100 Continue response, we must
372        respond with either a 100 Continue or a final response before
373        waiting for the request body. The default is to always respond
374        with a 100 Continue. You can behave differently (for example,
375        reject unauthorized requests) by overriding this method.
376
377        This method should either return True (possibly after sending
378        a 100 Continue response) or send an error response and return
379        False.
380
381        """
382        self.send_response_only(HTTPStatus.CONTINUE)
383        self.end_headers()
384        return True
385
386    def handle_one_request(self):
387        """Handle a single HTTP request.
388
389        You normally don't need to override this method; see the class
390        __doc__ string for information on how to handle specific HTTP
391        commands such as GET and POST.
392
393        """
394        try:
395            self.raw_requestline = self.rfile.readline(65537)
396            if len(self.raw_requestline) > 65536:
397                self.requestline = ''
398                self.request_version = ''
399                self.command = ''
400                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
401                return
402            if not self.raw_requestline:
403                self.close_connection = True
404                return
405            if not self.parse_request():
406                # An error code has been sent, just exit
407                return
408            mname = 'do_' + self.command
409            if not hasattr(self, mname):
410                self.send_error(
411                    HTTPStatus.NOT_IMPLEMENTED,
412                    "Unsupported method (%r)" % self.command)
413                return
414            method = getattr(self, mname)
415            method()
416            self.wfile.flush() #actually send the response if not already done.
417        except socket.timeout as e:
418            #a read or a write timed out.  Discard this connection
419            self.log_error("Request timed out: %r", e)
420            self.close_connection = True
421            return
422
423    def handle(self):
424        """Handle multiple requests if necessary."""
425        self.close_connection = True
426
427        self.handle_one_request()
428        while not self.close_connection:
429            self.handle_one_request()
430
431    def send_error(self, code, message=None, explain=None):
432        """Send and log an error reply.
433
434        Arguments are
435        * code:    an HTTP error code
436                   3 digits
437        * message: a simple optional 1 line reason phrase.
438                   *( HTAB / SP / VCHAR / %x80-FF )
439                   defaults to short entry matching the response code
440        * explain: a detailed message defaults to the long entry
441                   matching the response code.
442
443        This sends an error response (so it must be called before any
444        output has been generated), logs the error, and finally sends
445        a piece of HTML explaining the error to the user.
446
447        """
448
449        try:
450            shortmsg, longmsg = self.responses[code]
451        except KeyError:
452            shortmsg, longmsg = '???', '???'
453        if message is None:
454            message = shortmsg
455        if explain is None:
456            explain = longmsg
457        self.log_error("code %d, message %s", code, message)
458        self.send_response(code, message)
459        self.send_header('Connection', 'close')
460
461        # Message body is omitted for cases described in:
462        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
463        #  - RFC7231: 6.3.6. 205(Reset Content)
464        body = None
465        if (code >= 200 and
466            code not in (HTTPStatus.NO_CONTENT,
467                         HTTPStatus.RESET_CONTENT,
468                         HTTPStatus.NOT_MODIFIED)):
469            # HTML encode to prevent Cross Site Scripting attacks
470            # (see bug #1100201)
471            content = (self.error_message_format % {
472                'code': code,
473                'message': html.escape(message, quote=False),
474                'explain': html.escape(explain, quote=False)
475            })
476            body = content.encode('UTF-8', 'replace')
477            self.send_header("Content-Type", self.error_content_type)
478            self.send_header('Content-Length', str(len(body)))
479        self.end_headers()
480
481        if self.command != 'HEAD' and body:
482            self.wfile.write(body)
483
484    def send_response(self, code, message=None):
485        """Add the response header to the headers buffer and log the
486        response code.
487
488        Also send two standard headers with the server software
489        version and the current date.
490
491        """
492        self.log_request(code)
493        self.send_response_only(code, message)
494        self.send_header('Server', self.version_string())
495        self.send_header('Date', self.date_time_string())
496
497    def send_response_only(self, code, message=None):
498        """Send the response header only."""
499        if self.request_version != 'HTTP/0.9':
500            if message is None:
501                if code in self.responses:
502                    message = self.responses[code][0]
503                else:
504                    message = ''
505            if not hasattr(self, '_headers_buffer'):
506                self._headers_buffer = []
507            self._headers_buffer.append(("%s %d %s\r\n" %
508                    (self.protocol_version, code, message)).encode(
509                        'latin-1', 'strict'))
510
511    def send_header(self, keyword, value):
512        """Send a MIME header to the headers buffer."""
513        if self.request_version != 'HTTP/0.9':
514            if not hasattr(self, '_headers_buffer'):
515                self._headers_buffer = []
516            self._headers_buffer.append(
517                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
518
519        if keyword.lower() == 'connection':
520            if value.lower() == 'close':
521                self.close_connection = True
522            elif value.lower() == 'keep-alive':
523                self.close_connection = False
524
525    def end_headers(self):
526        """Send the blank line ending the MIME headers."""
527        if self.request_version != 'HTTP/0.9':
528            self._headers_buffer.append(b"\r\n")
529            self.flush_headers()
530
531    def flush_headers(self):
532        if hasattr(self, '_headers_buffer'):
533            self.wfile.write(b"".join(self._headers_buffer))
534            self._headers_buffer = []
535
536    def log_request(self, code='-', size='-'):
537        """Log an accepted request.
538
539        This is called by send_response().
540
541        """
542        if isinstance(code, HTTPStatus):
543            code = code.value
544        self.log_message('"%s" %s %s',
545                         self.requestline, str(code), str(size))
546
547    def log_error(self, format, *args):
548        """Log an error.
549
550        This is called when a request cannot be fulfilled.  By
551        default it passes the message on to log_message().
552
553        Arguments are the same as for log_message().
554
555        XXX This should go to the separate error log.
556
557        """
558
559        self.log_message(format, *args)
560
561    def log_message(self, format, *args):
562        """Log an arbitrary message.
563
564        This is used by all other logging functions.  Override
565        it if you have specific logging wishes.
566
567        The first argument, FORMAT, is a format string for the
568        message to be logged.  If the format string contains
569        any % escapes requiring parameters, they should be
570        specified as subsequent arguments (it's just like
571        printf!).
572
573        The client ip and current date/time are prefixed to
574        every message.
575
576        """
577
578        sys.stderr.write("%s - - [%s] %s\n" %
579                         (self.address_string(),
580                          self.log_date_time_string(),
581                          format%args))
582
583    def version_string(self):
584        """Return the server software version string."""
585        return self.server_version + ' ' + self.sys_version
586
587    def date_time_string(self, timestamp=None):
588        """Return the current date and time formatted for a message header."""
589        if timestamp is None:
590            timestamp = time.time()
591        return email.utils.formatdate(timestamp, usegmt=True)
592
593    def log_date_time_string(self):
594        """Return the current time formatted for logging."""
595        now = time.time()
596        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
597        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
598                day, self.monthname[month], year, hh, mm, ss)
599        return s
600
601    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
602
603    monthname = [None,
604                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
605                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
606
607    def address_string(self):
608        """Return the client address."""
609
610        return self.client_address[0]
611
612    # Essentially static class variables
613
614    # The version of the HTTP protocol we support.
615    # Set this to HTTP/1.1 to enable automatic keepalive
616    protocol_version = "HTTP/1.0"
617
618    # MessageClass used to parse headers
619    MessageClass = http.client.HTTPMessage
620
621    # hack to maintain backwards compatibility
622    responses = {
623        v: (v.phrase, v.description)
624        for v in HTTPStatus.__members__.values()
625    }
626
627
628class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
629
630    """Simple HTTP request handler with GET and HEAD commands.
631
632    This serves files from the current directory and any of its
633    subdirectories.  The MIME type for files is determined by
634    calling the .guess_type() method.
635
636    The GET and HEAD requests are identical except that the HEAD
637    request omits the actual contents of the file.
638
639    """
640
641    server_version = "SimpleHTTP/" + __version__
642
643    def __init__(self, *args, directory=None, **kwargs):
644        if directory is None:
645            directory = os.getcwd()
646        self.directory = directory
647        super().__init__(*args, **kwargs)
648
649    def do_GET(self):
650        """Serve a GET request."""
651        f = self.send_head()
652        if f:
653            try:
654                self.copyfile(f, self.wfile)
655            finally:
656                f.close()
657
658    def do_HEAD(self):
659        """Serve a HEAD request."""
660        f = self.send_head()
661        if f:
662            f.close()
663
664    def send_head(self):
665        """Common code for GET and HEAD commands.
666
667        This sends the response code and MIME headers.
668
669        Return value is either a file object (which has to be copied
670        to the outputfile by the caller unless the command was HEAD,
671        and must be closed by the caller under all circumstances), or
672        None, in which case the caller has nothing further to do.
673
674        """
675        path = self.translate_path(self.path)
676        f = None
677        if os.path.isdir(path):
678            parts = urllib.parse.urlsplit(self.path)
679            if not parts.path.endswith('/'):
680                # redirect browser - doing basically what apache does
681                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
682                new_parts = (parts[0], parts[1], parts[2] + '/',
683                             parts[3], parts[4])
684                new_url = urllib.parse.urlunsplit(new_parts)
685                self.send_header("Location", new_url)
686                self.end_headers()
687                return None
688            for index in "index.html", "index.htm":
689                index = os.path.join(path, index)
690                if os.path.exists(index):
691                    path = index
692                    break
693            else:
694                return self.list_directory(path)
695        ctype = self.guess_type(path)
696        # check for trailing "/" which should return 404. See Issue17324
697        # The test for this was added in test_httpserver.py
698        # However, some OS platforms accept a trailingSlash as a filename
699        # See discussion on python-dev and Issue34711 regarding
700        # parseing and rejection of filenames with a trailing slash
701        if path.endswith("/"):
702            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
703            return None
704        try:
705            f = open(path, 'rb')
706        except OSError:
707            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
708            return None
709
710        try:
711            fs = os.fstat(f.fileno())
712            # Use browser cache if possible
713            if ("If-Modified-Since" in self.headers
714                    and "If-None-Match" not in self.headers):
715                # compare If-Modified-Since and time of last file modification
716                try:
717                    ims = email.utils.parsedate_to_datetime(
718                        self.headers["If-Modified-Since"])
719                except (TypeError, IndexError, OverflowError, ValueError):
720                    # ignore ill-formed values
721                    pass
722                else:
723                    if ims.tzinfo is None:
724                        # obsolete format with no timezone, cf.
725                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
726                        ims = ims.replace(tzinfo=datetime.timezone.utc)
727                    if ims.tzinfo is datetime.timezone.utc:
728                        # compare to UTC datetime of last modification
729                        last_modif = datetime.datetime.fromtimestamp(
730                            fs.st_mtime, datetime.timezone.utc)
731                        # remove microseconds, like in If-Modified-Since
732                        last_modif = last_modif.replace(microsecond=0)
733
734                        if last_modif <= ims:
735                            self.send_response(HTTPStatus.NOT_MODIFIED)
736                            self.end_headers()
737                            f.close()
738                            return None
739
740            self.send_response(HTTPStatus.OK)
741            self.send_header("Content-type", ctype)
742            self.send_header("Content-Length", str(fs[6]))
743            self.send_header("Last-Modified",
744                self.date_time_string(fs.st_mtime))
745            self.end_headers()
746            return f
747        except:
748            f.close()
749            raise
750
751    def list_directory(self, path):
752        """Helper to produce a directory listing (absent index.html).
753
754        Return value is either a file object, or None (indicating an
755        error).  In either case, the headers are sent, making the
756        interface the same as for send_head().
757
758        """
759        try:
760            list = os.listdir(path)
761        except OSError:
762            self.send_error(
763                HTTPStatus.NOT_FOUND,
764                "No permission to list directory")
765            return None
766        list.sort(key=lambda a: a.lower())
767        r = []
768        try:
769            displaypath = urllib.parse.unquote(self.path,
770                                               errors='surrogatepass')
771        except UnicodeDecodeError:
772            displaypath = urllib.parse.unquote(path)
773        displaypath = html.escape(displaypath, quote=False)
774        enc = sys.getfilesystemencoding()
775        title = 'Directory listing for %s' % displaypath
776        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
777                 '"http://www.w3.org/TR/html4/strict.dtd">')
778        r.append('<html>\n<head>')
779        r.append('<meta http-equiv="Content-Type" '
780                 'content="text/html; charset=%s">' % enc)
781        r.append('<title>%s</title>\n</head>' % title)
782        r.append('<body>\n<h1>%s</h1>' % title)
783        r.append('<hr>\n<ul>')
784        for name in list:
785            fullname = os.path.join(path, name)
786            displayname = linkname = name
787            # Append / for directories or @ for symbolic links
788            if os.path.isdir(fullname):
789                displayname = name + "/"
790                linkname = name + "/"
791            if os.path.islink(fullname):
792                displayname = name + "@"
793                # Note: a link to a directory displays with @ and links with /
794            r.append('<li><a href="%s">%s</a></li>'
795                    % (urllib.parse.quote(linkname,
796                                          errors='surrogatepass'),
797                       html.escape(displayname, quote=False)))
798        r.append('</ul>\n<hr>\n</body>\n</html>\n')
799        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
800        f = io.BytesIO()
801        f.write(encoded)
802        f.seek(0)
803        self.send_response(HTTPStatus.OK)
804        self.send_header("Content-type", "text/html; charset=%s" % enc)
805        self.send_header("Content-Length", str(len(encoded)))
806        self.end_headers()
807        return f
808
809    def translate_path(self, path):
810        """Translate a /-separated PATH to the local filename syntax.
811
812        Components that mean special things to the local file system
813        (e.g. drive or directory names) are ignored.  (XXX They should
814        probably be diagnosed.)
815
816        """
817        # abandon query parameters
818        path = path.split('?',1)[0]
819        path = path.split('#',1)[0]
820        # Don't forget explicit trailing slash when normalizing. Issue17324
821        trailing_slash = path.rstrip().endswith('/')
822        try:
823            path = urllib.parse.unquote(path, errors='surrogatepass')
824        except UnicodeDecodeError:
825            path = urllib.parse.unquote(path)
826        path = posixpath.normpath(path)
827        words = path.split('/')
828        words = filter(None, words)
829        path = self.directory
830        for word in words:
831            if os.path.dirname(word) or word in (os.curdir, os.pardir):
832                # Ignore components that are not a simple file/directory name
833                continue
834            path = os.path.join(path, word)
835        if trailing_slash:
836            path += '/'
837        return path
838
839    def copyfile(self, source, outputfile):
840        """Copy all data between two file objects.
841
842        The SOURCE argument is a file object open for reading
843        (or anything with a read() method) and the DESTINATION
844        argument is a file object open for writing (or
845        anything with a write() method).
846
847        The only reason for overriding this would be to change
848        the block size or perhaps to replace newlines by CRLF
849        -- note however that this the default server uses this
850        to copy binary data as well.
851
852        """
853        shutil.copyfileobj(source, outputfile)
854
855    def guess_type(self, path):
856        """Guess the type of a file.
857
858        Argument is a PATH (a filename).
859
860        Return value is a string of the form type/subtype,
861        usable for a MIME Content-type header.
862
863        The default implementation looks the file's extension
864        up in the table self.extensions_map, using application/octet-stream
865        as a default; however it would be permissible (if
866        slow) to look inside the data to make a better guess.
867
868        """
869
870        base, ext = posixpath.splitext(path)
871        if ext in self.extensions_map:
872            return self.extensions_map[ext]
873        ext = ext.lower()
874        if ext in self.extensions_map:
875            return self.extensions_map[ext]
876        else:
877            return self.extensions_map['']
878
879    if not mimetypes.inited:
880        mimetypes.init() # try to read system mime.types
881    extensions_map = mimetypes.types_map.copy()
882    extensions_map.update({
883        '': 'application/octet-stream', # Default
884        '.py': 'text/plain',
885        '.c': 'text/plain',
886        '.h': 'text/plain',
887        })
888
889
890# Utilities for CGIHTTPRequestHandler
891
892def _url_collapse_path(path):
893    """
894    Given a URL path, remove extra '/'s and '.' path elements and collapse
895    any '..' references and returns a collapsed path.
896
897    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
898    The utility of this function is limited to is_cgi method and helps
899    preventing some security attacks.
900
901    Returns: The reconstituted URL, which will always start with a '/'.
902
903    Raises: IndexError if too many '..' occur within the path.
904
905    """
906    # Query component should not be involved.
907    path, _, query = path.partition('?')
908    path = urllib.parse.unquote(path)
909
910    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
911    # path semantics rather than local operating system semantics.
912    path_parts = path.split('/')
913    head_parts = []
914    for part in path_parts[:-1]:
915        if part == '..':
916            head_parts.pop() # IndexError if more '..' than prior parts
917        elif part and part != '.':
918            head_parts.append( part )
919    if path_parts:
920        tail_part = path_parts.pop()
921        if tail_part:
922            if tail_part == '..':
923                head_parts.pop()
924                tail_part = ''
925            elif tail_part == '.':
926                tail_part = ''
927    else:
928        tail_part = ''
929
930    if query:
931        tail_part = '?'.join((tail_part, query))
932
933    splitpath = ('/' + '/'.join(head_parts), tail_part)
934    collapsed_path = "/".join(splitpath)
935
936    return collapsed_path
937
938
939
940nobody = None
941
942def nobody_uid():
943    """Internal routine to get nobody's uid"""
944    global nobody
945    if nobody:
946        return nobody
947    try:
948        import pwd
949    except ImportError:
950        return -1
951    try:
952        nobody = pwd.getpwnam('nobody')[2]
953    except KeyError:
954        nobody = 1 + max(x[2] for x in pwd.getpwall())
955    return nobody
956
957
958def executable(path):
959    """Test for executable file."""
960    return os.access(path, os.X_OK)
961
962
963class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
964
965    """Complete HTTP server with GET, HEAD and POST commands.
966
967    GET and HEAD also support running CGI scripts.
968
969    The POST command is *only* implemented for CGI scripts.
970
971    """
972
973    # Determine platform specifics
974    have_fork = hasattr(os, 'fork')
975
976    # Make rfile unbuffered -- we need to read one line and then pass
977    # the rest to a subprocess, so we can't use buffered input.
978    rbufsize = 0
979
980    def do_POST(self):
981        """Serve a POST request.
982
983        This is only implemented for CGI scripts.
984
985        """
986
987        if self.is_cgi():
988            self.run_cgi()
989        else:
990            self.send_error(
991                HTTPStatus.NOT_IMPLEMENTED,
992                "Can only POST to CGI scripts")
993
994    def send_head(self):
995        """Version of send_head that support CGI scripts"""
996        if self.is_cgi():
997            return self.run_cgi()
998        else:
999            return SimpleHTTPRequestHandler.send_head(self)
1000
1001    def is_cgi(self):
1002        """Test whether self.path corresponds to a CGI script.
1003
1004        Returns True and updates the cgi_info attribute to the tuple
1005        (dir, rest) if self.path requires running a CGI script.
1006        Returns False otherwise.
1007
1008        If any exception is raised, the caller should assume that
1009        self.path was rejected as invalid and act accordingly.
1010
1011        The default implementation tests whether the normalized url
1012        path begins with one of the strings in self.cgi_directories
1013        (and the next character is a '/' or the end of the string).
1014
1015        """
1016        collapsed_path = _url_collapse_path(self.path)
1017        dir_sep = collapsed_path.find('/', 1)
1018        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1019        if head in self.cgi_directories:
1020            self.cgi_info = head, tail
1021            return True
1022        return False
1023
1024
1025    cgi_directories = ['/cgi-bin', '/htbin']
1026
1027    def is_executable(self, path):
1028        """Test whether argument path is an executable file."""
1029        return executable(path)
1030
1031    def is_python(self, path):
1032        """Test whether argument path is a Python script."""
1033        head, tail = os.path.splitext(path)
1034        return tail.lower() in (".py", ".pyw")
1035
1036    def run_cgi(self):
1037        """Execute a CGI script."""
1038        dir, rest = self.cgi_info
1039        path = dir + '/' + rest
1040        i = path.find('/', len(dir)+1)
1041        while i >= 0:
1042            nextdir = path[:i]
1043            nextrest = path[i+1:]
1044
1045            scriptdir = self.translate_path(nextdir)
1046            if os.path.isdir(scriptdir):
1047                dir, rest = nextdir, nextrest
1048                i = path.find('/', len(dir)+1)
1049            else:
1050                break
1051
1052        # find an explicit query string, if present.
1053        rest, _, query = rest.partition('?')
1054
1055        # dissect the part after the directory name into a script name &
1056        # a possible additional path, to be stored in PATH_INFO.
1057        i = rest.find('/')
1058        if i >= 0:
1059            script, rest = rest[:i], rest[i:]
1060        else:
1061            script, rest = rest, ''
1062
1063        scriptname = dir + '/' + script
1064        scriptfile = self.translate_path(scriptname)
1065        if not os.path.exists(scriptfile):
1066            self.send_error(
1067                HTTPStatus.NOT_FOUND,
1068                "No such CGI script (%r)" % scriptname)
1069            return
1070        if not os.path.isfile(scriptfile):
1071            self.send_error(
1072                HTTPStatus.FORBIDDEN,
1073                "CGI script is not a plain file (%r)" % scriptname)
1074            return
1075        ispy = self.is_python(scriptname)
1076        if self.have_fork or not ispy:
1077            if not self.is_executable(scriptfile):
1078                self.send_error(
1079                    HTTPStatus.FORBIDDEN,
1080                    "CGI script is not executable (%r)" % scriptname)
1081                return
1082
1083        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1084        # XXX Much of the following could be prepared ahead of time!
1085        env = copy.deepcopy(os.environ)
1086        env['SERVER_SOFTWARE'] = self.version_string()
1087        env['SERVER_NAME'] = self.server.server_name
1088        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1089        env['SERVER_PROTOCOL'] = self.protocol_version
1090        env['SERVER_PORT'] = str(self.server.server_port)
1091        env['REQUEST_METHOD'] = self.command
1092        uqrest = urllib.parse.unquote(rest)
1093        env['PATH_INFO'] = uqrest
1094        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1095        env['SCRIPT_NAME'] = scriptname
1096        if query:
1097            env['QUERY_STRING'] = query
1098        env['REMOTE_ADDR'] = self.client_address[0]
1099        authorization = self.headers.get("authorization")
1100        if authorization:
1101            authorization = authorization.split()
1102            if len(authorization) == 2:
1103                import base64, binascii
1104                env['AUTH_TYPE'] = authorization[0]
1105                if authorization[0].lower() == "basic":
1106                    try:
1107                        authorization = authorization[1].encode('ascii')
1108                        authorization = base64.decodebytes(authorization).\
1109                                        decode('ascii')
1110                    except (binascii.Error, UnicodeError):
1111                        pass
1112                    else:
1113                        authorization = authorization.split(':')
1114                        if len(authorization) == 2:
1115                            env['REMOTE_USER'] = authorization[0]
1116        # XXX REMOTE_IDENT
1117        if self.headers.get('content-type') is None:
1118            env['CONTENT_TYPE'] = self.headers.get_content_type()
1119        else:
1120            env['CONTENT_TYPE'] = self.headers['content-type']
1121        length = self.headers.get('content-length')
1122        if length:
1123            env['CONTENT_LENGTH'] = length
1124        referer = self.headers.get('referer')
1125        if referer:
1126            env['HTTP_REFERER'] = referer
1127        accept = []
1128        for line in self.headers.getallmatchingheaders('accept'):
1129            if line[:1] in "\t\n\r ":
1130                accept.append(line.strip())
1131            else:
1132                accept = accept + line[7:].split(',')
1133        env['HTTP_ACCEPT'] = ','.join(accept)
1134        ua = self.headers.get('user-agent')
1135        if ua:
1136            env['HTTP_USER_AGENT'] = ua
1137        co = filter(None, self.headers.get_all('cookie', []))
1138        cookie_str = ', '.join(co)
1139        if cookie_str:
1140            env['HTTP_COOKIE'] = cookie_str
1141        # XXX Other HTTP_* headers
1142        # Since we're setting the env in the parent, provide empty
1143        # values to override previously set values
1144        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1145                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1146            env.setdefault(k, "")
1147
1148        self.send_response(HTTPStatus.OK, "Script output follows")
1149        self.flush_headers()
1150
1151        decoded_query = query.replace('+', ' ')
1152
1153        if self.have_fork:
1154            # Unix -- fork as we should
1155            args = [script]
1156            if '=' not in decoded_query:
1157                args.append(decoded_query)
1158            nobody = nobody_uid()
1159            self.wfile.flush() # Always flush before forking
1160            pid = os.fork()
1161            if pid != 0:
1162                # Parent
1163                pid, sts = os.waitpid(pid, 0)
1164                # throw away additional data [see bug #427345]
1165                while select.select([self.rfile], [], [], 0)[0]:
1166                    if not self.rfile.read(1):
1167                        break
1168                if sts:
1169                    self.log_error("CGI script exit status %#x", sts)
1170                return
1171            # Child
1172            try:
1173                try:
1174                    os.setuid(nobody)
1175                except OSError:
1176                    pass
1177                os.dup2(self.rfile.fileno(), 0)
1178                os.dup2(self.wfile.fileno(), 1)
1179                os.execve(scriptfile, args, env)
1180            except:
1181                self.server.handle_error(self.request, self.client_address)
1182                os._exit(127)
1183
1184        else:
1185            # Non-Unix -- use subprocess
1186            import subprocess
1187            cmdline = [scriptfile]
1188            if self.is_python(scriptfile):
1189                interp = sys.executable
1190                if interp.lower().endswith("w.exe"):
1191                    # On Windows, use python.exe, not pythonw.exe
1192                    interp = interp[:-5] + interp[-4:]
1193                cmdline = [interp, '-u'] + cmdline
1194            if '=' not in query:
1195                cmdline.append(query)
1196            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1197            try:
1198                nbytes = int(length)
1199            except (TypeError, ValueError):
1200                nbytes = 0
1201            p = subprocess.Popen(cmdline,
1202                                 stdin=subprocess.PIPE,
1203                                 stdout=subprocess.PIPE,
1204                                 stderr=subprocess.PIPE,
1205                                 env = env
1206                                 )
1207            if self.command.lower() == "post" and nbytes > 0:
1208                data = self.rfile.read(nbytes)
1209            else:
1210                data = None
1211            # throw away additional data [see bug #427345]
1212            while select.select([self.rfile._sock], [], [], 0)[0]:
1213                if not self.rfile._sock.recv(1):
1214                    break
1215            stdout, stderr = p.communicate(data)
1216            self.wfile.write(stdout)
1217            if stderr:
1218                self.log_error('%s', stderr)
1219            p.stderr.close()
1220            p.stdout.close()
1221            status = p.returncode
1222            if status:
1223                self.log_error("CGI script exit status %#x", status)
1224            else:
1225                self.log_message("CGI script exited OK")
1226
1227
1228def _get_best_family(*address):
1229    infos = socket.getaddrinfo(
1230        *address,
1231        type=socket.SOCK_STREAM,
1232        flags=socket.AI_PASSIVE,
1233    )
1234    family, type, proto, canonname, sockaddr = next(iter(infos))
1235    return family, sockaddr
1236
1237
1238def test(HandlerClass=BaseHTTPRequestHandler,
1239         ServerClass=ThreadingHTTPServer,
1240         protocol="HTTP/1.0", port=8000, bind=None):
1241    """Test the HTTP request handler class.
1242
1243    This runs an HTTP server on port 8000 (or the port argument).
1244
1245    """
1246    ServerClass.address_family, addr = _get_best_family(bind, port)
1247
1248    HandlerClass.protocol_version = protocol
1249    with ServerClass(addr, HandlerClass) as httpd:
1250        host, port = httpd.socket.getsockname()[:2]
1251        url_host = f'[{host}]' if ':' in host else host
1252        print(
1253            f"Serving HTTP on {host} port {port} "
1254            f"(http://{url_host}:{port}/) ..."
1255        )
1256        try:
1257            httpd.serve_forever()
1258        except KeyboardInterrupt:
1259            print("\nKeyboard interrupt received, exiting.")
1260            sys.exit(0)
1261
1262if __name__ == '__main__':
1263    import argparse
1264
1265    parser = argparse.ArgumentParser()
1266    parser.add_argument('--cgi', action='store_true',
1267                       help='Run as CGI Server')
1268    parser.add_argument('--bind', '-b', metavar='ADDRESS',
1269                        help='Specify alternate bind address '
1270                             '[default: all interfaces]')
1271    parser.add_argument('--directory', '-d', default=os.getcwd(),
1272                        help='Specify alternative directory '
1273                        '[default:current directory]')
1274    parser.add_argument('port', action='store',
1275                        default=8000, type=int,
1276                        nargs='?',
1277                        help='Specify alternate port [default: 8000]')
1278    args = parser.parse_args()
1279    if args.cgi:
1280        handler_class = CGIHTTPRequestHandler
1281    else:
1282        handler_class = partial(SimpleHTTPRequestHandler,
1283                                directory=args.directory)
1284
1285    # ensure dual-stack is not disabled; ref #38907
1286    class DualStackServer(ThreadingHTTPServer):
1287        def server_bind(self):
1288            # suppress exception when protocol is IPv4
1289            with contextlib.suppress(Exception):
1290                self.socket.setsockopt(
1291                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1292            return super().server_bind()
1293
1294    test(
1295        HandlerClass=handler_class,
1296        ServerClass=DualStackServer,
1297        port=args.port,
1298        bind=args.bind,
1299    )
1300