1"""HTTP server classes.
2
3From Python 3.3
4
5Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
6SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
7and CGIHTTPRequestHandler for CGI scripts.
8
9It does, however, optionally implement HTTP/1.1 persistent connections,
10as of version 0.3.
11
12Notes on CGIHTTPRequestHandler
13------------------------------
14
15This class implements GET and POST requests to cgi-bin scripts.
16
17If the os.fork() function is not present (e.g. on Windows),
18subprocess.Popen() is used as a fallback, with slightly altered semantics.
19
20In all cases, the implementation is intentionally naive -- all
21requests are executed synchronously.
22
23SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
24-- it may execute arbitrary Python code or external programs.
25
26Note that status code 200 is sent prior to execution of a CGI script, so
27scripts cannot send other status codes such as 302 (redirect).
28
29XXX To do:
30
31- log requests even later (to capture byte count)
32- log user-agent header and other interesting goodies
33- send error log to separate file
34"""
35
36from __future__ import (absolute_import, division,
37                        print_function, unicode_literals)
38from future import utils
39from future.builtins import *
40
41
42# See also:
43#
44# HTTP Working Group                                        T. Berners-Lee
45# INTERNET-DRAFT                                            R. T. Fielding
46# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
47# Expires September 8, 1995                                  March 8, 1995
48#
49# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
50#
51# and
52#
53# Network Working Group                                      R. Fielding
54# Request for Comments: 2616                                       et al
55# Obsoletes: 2068                                              June 1999
56# Category: Standards Track
57#
58# URL: http://www.faqs.org/rfcs/rfc2616.html
59
60# Log files
61# ---------
62#
63# Here's a quote from the NCSA httpd docs about log file format.
64#
65# | The logfile format is as follows. Each line consists of:
66# |
67# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
68# |
69# |        host: Either the DNS name or the IP number of the remote client
70# |        rfc931: Any information returned by identd for this person,
71# |                - otherwise.
72# |        authuser: If user sent a userid for authentication, the user name,
73# |                  - otherwise.
74# |        DD: Day
75# |        Mon: Month (calendar name)
76# |        YYYY: Year
77# |        hh: hour (24-hour format, the machine's timezone)
78# |        mm: minutes
79# |        ss: seconds
80# |        request: The first line of the HTTP request as sent by the client.
81# |        ddd: the status code returned by the server, - if not available.
82# |        bbbb: the total number of bytes sent,
83# |              *not including the HTTP/1.0 header*, - if not available
84# |
85# | You can determine the name of the file accessed through request.
86#
87# (Actually, the latter is only true if you know the server configuration
88# at the time the request was made!)
89
90__version__ = "0.6"
91
92__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
93
94from future.backports import html
95from future.backports.http import client as http_client
96from future.backports.urllib import parse as urllib_parse
97from future.backports import socketserver
98
99import io
100import mimetypes
101import os
102import posixpath
103import select
104import shutil
105import socket # For gethostbyaddr()
106import sys
107import time
108import copy
109import argparse
110
111
112# Default error message template
113DEFAULT_ERROR_MESSAGE = """\
114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
115        "http://www.w3.org/TR/html4/strict.dtd">
116<html>
117    <head>
118        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
119        <title>Error response</title>
120    </head>
121    <body>
122        <h1>Error response</h1>
123        <p>Error code: %(code)d</p>
124        <p>Message: %(message)s.</p>
125        <p>Error code explanation: %(code)s - %(explain)s.</p>
126    </body>
127</html>
128"""
129
130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
131
132def _quote_html(html):
133    return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
134
135class HTTPServer(socketserver.TCPServer):
136
137    allow_reuse_address = 1    # Seems to make sense in testing environment
138
139    def server_bind(self):
140        """Override server_bind to store the server name."""
141        socketserver.TCPServer.server_bind(self)
142        host, port = self.socket.getsockname()[:2]
143        self.server_name = socket.getfqdn(host)
144        self.server_port = port
145
146
147class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
148
149    """HTTP request handler base class.
150
151    The following explanation of HTTP serves to guide you through the
152    code as well as to expose any misunderstandings I may have about
153    HTTP (so you don't need to read the code to figure out I'm wrong
154    :-).
155
156    HTTP (HyperText Transfer Protocol) is an extensible protocol on
157    top of a reliable stream transport (e.g. TCP/IP).  The protocol
158    recognizes three parts to a request:
159
160    1. One line identifying the request type and path
161    2. An optional set of RFC-822-style headers
162    3. An optional data part
163
164    The headers and data are separated by a blank line.
165
166    The first line of the request has the form
167
168    <command> <path> <version>
169
170    where <command> is a (case-sensitive) keyword such as GET or POST,
171    <path> is a string containing path information for the request,
172    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
173    <path> is encoded using the URL encoding scheme (using %xx to signify
174    the ASCII character with hex code xx).
175
176    The specification specifies that lines are separated by CRLF but
177    for compatibility with the widest range of clients recommends
178    servers also handle LF.  Similarly, whitespace in the request line
179    is treated sensibly (allowing multiple spaces between components
180    and allowing trailing whitespace).
181
182    Similarly, for output, lines ought to be separated by CRLF pairs
183    but most clients grok LF characters just fine.
184
185    If the first line of the request has the form
186
187    <command> <path>
188
189    (i.e. <version> is left out) then this is assumed to be an HTTP
190    0.9 request; this form has no optional headers and data part and
191    the reply consists of just the data.
192
193    The reply form of the HTTP 1.x protocol again has three parts:
194
195    1. One line giving the response code
196    2. An optional set of RFC-822-style headers
197    3. The data
198
199    Again, the headers and data are separated by a blank line.
200
201    The response code line has the form
202
203    <version> <responsecode> <responsestring>
204
205    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
206    <responsecode> is a 3-digit response code indicating success or
207    failure of the request, and <responsestring> is an optional
208    human-readable string explaining what the response code means.
209
210    This server parses the request and the headers, and then calls a
211    function specific to the request type (<command>).  Specifically,
212    a request SPAM will be handled by a method do_SPAM().  If no
213    such method exists the server sends an error response to the
214    client.  If it exists, it is called with no arguments:
215
216    do_SPAM()
217
218    Note that the request name is case sensitive (i.e. SPAM and spam
219    are different requests).
220
221    The various request details are stored in instance variables:
222
223    - client_address is the client IP address in the form (host,
224    port);
225
226    - command, path and version are the broken-down request line;
227
228    - headers is an instance of email.message.Message (or a derived
229    class) containing the header information;
230
231    - rfile is a file object open for reading positioned at the
232    start of the optional input data part;
233
234    - wfile is a file object open for writing.
235
236    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
237
238    The first thing to be written must be the response line.  Then
239    follow 0 or more header lines, then a blank line, and then the
240    actual data (if any).  The meaning of the header lines depends on
241    the command executed by the server; in most cases, when data is
242    returned, there should be at least one header line of the form
243
244    Content-type: <type>/<subtype>
245
246    where <type> and <subtype> should be registered MIME types,
247    e.g. "text/html" or "text/plain".
248
249    """
250
251    # The Python system version, truncated to its first component.
252    sys_version = "Python/" + sys.version.split()[0]
253
254    # The server software version.  You may want to override this.
255    # The format is multiple whitespace-separated strings,
256    # where each string is of the form name[/version].
257    server_version = "BaseHTTP/" + __version__
258
259    error_message_format = DEFAULT_ERROR_MESSAGE
260    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
261
262    # The default request version.  This only affects responses up until
263    # the point where the request line is parsed, so it mainly decides what
264    # the client gets back when sending a malformed request line.
265    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
266    default_request_version = "HTTP/0.9"
267
268    def parse_request(self):
269        """Parse a request (internal).
270
271        The request should be stored in self.raw_requestline; the results
272        are in self.command, self.path, self.request_version and
273        self.headers.
274
275        Return True for success, False for failure; on failure, an
276        error is sent back.
277
278        """
279        self.command = None  # set in case of error on the first line
280        self.request_version = version = self.default_request_version
281        self.close_connection = 1
282        requestline = str(self.raw_requestline, 'iso-8859-1')
283        requestline = requestline.rstrip('\r\n')
284        self.requestline = requestline
285        words = requestline.split()
286        if len(words) == 3:
287            command, path, version = words
288            if version[:5] != 'HTTP/':
289                self.send_error(400, "Bad request version (%r)" % version)
290                return False
291            try:
292                base_version_number = version.split('/', 1)[1]
293                version_number = base_version_number.split(".")
294                # RFC 2145 section 3.1 says there can be only one "." and
295                #   - major and minor numbers MUST be treated as
296                #      separate integers;
297                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
298                #      turn is lower than HTTP/12.3;
299                #   - Leading zeros MUST be ignored by recipients.
300                if len(version_number) != 2:
301                    raise ValueError
302                version_number = int(version_number[0]), int(version_number[1])
303            except (ValueError, IndexError):
304                self.send_error(400, "Bad request version (%r)" % version)
305                return False
306            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
307                self.close_connection = 0
308            if version_number >= (2, 0):
309                self.send_error(505,
310                          "Invalid HTTP Version (%s)" % base_version_number)
311                return False
312        elif len(words) == 2:
313            command, path = words
314            self.close_connection = 1
315            if command != 'GET':
316                self.send_error(400,
317                                "Bad HTTP/0.9 request type (%r)" % command)
318                return False
319        elif not words:
320            return False
321        else:
322            self.send_error(400, "Bad request syntax (%r)" % requestline)
323            return False
324        self.command, self.path, self.request_version = command, path, version
325
326        # Examine the headers and look for a Connection directive.
327        try:
328            self.headers = http_client.parse_headers(self.rfile,
329                                                     _class=self.MessageClass)
330        except http_client.LineTooLong:
331            self.send_error(400, "Line too long")
332            return False
333
334        conntype = self.headers.get('Connection', "")
335        if conntype.lower() == 'close':
336            self.close_connection = 1
337        elif (conntype.lower() == 'keep-alive' and
338              self.protocol_version >= "HTTP/1.1"):
339            self.close_connection = 0
340        # Examine the headers and look for an Expect directive
341        expect = self.headers.get('Expect', "")
342        if (expect.lower() == "100-continue" and
343                self.protocol_version >= "HTTP/1.1" and
344                self.request_version >= "HTTP/1.1"):
345            if not self.handle_expect_100():
346                return False
347        return True
348
349    def handle_expect_100(self):
350        """Decide what to do with an "Expect: 100-continue" header.
351
352        If the client is expecting a 100 Continue response, we must
353        respond with either a 100 Continue or a final response before
354        waiting for the request body. The default is to always respond
355        with a 100 Continue. You can behave differently (for example,
356        reject unauthorized requests) by overriding this method.
357
358        This method should either return True (possibly after sending
359        a 100 Continue response) or send an error response and return
360        False.
361
362        """
363        self.send_response_only(100)
364        self.flush_headers()
365        return True
366
367    def handle_one_request(self):
368        """Handle a single HTTP request.
369
370        You normally don't need to override this method; see the class
371        __doc__ string for information on how to handle specific HTTP
372        commands such as GET and POST.
373
374        """
375        try:
376            self.raw_requestline = self.rfile.readline(65537)
377            if len(self.raw_requestline) > 65536:
378                self.requestline = ''
379                self.request_version = ''
380                self.command = ''
381                self.send_error(414)
382                return
383            if not self.raw_requestline:
384                self.close_connection = 1
385                return
386            if not self.parse_request():
387                # An error code has been sent, just exit
388                return
389            mname = 'do_' + self.command
390            if not hasattr(self, mname):
391                self.send_error(501, "Unsupported method (%r)" % self.command)
392                return
393            method = getattr(self, mname)
394            method()
395            self.wfile.flush() #actually send the response if not already done.
396        except socket.timeout as e:
397            #a read or a write timed out.  Discard this connection
398            self.log_error("Request timed out: %r", e)
399            self.close_connection = 1
400            return
401
402    def handle(self):
403        """Handle multiple requests if necessary."""
404        self.close_connection = 1
405
406        self.handle_one_request()
407        while not self.close_connection:
408            self.handle_one_request()
409
410    def send_error(self, code, message=None):
411        """Send and log an error reply.
412
413        Arguments are the error code, and a detailed message.
414        The detailed message defaults to the short entry matching the
415        response code.
416
417        This sends an error response (so it must be called before any
418        output has been generated), logs the error, and finally sends
419        a piece of HTML explaining the error to the user.
420
421        """
422
423        try:
424            shortmsg, longmsg = self.responses[code]
425        except KeyError:
426            shortmsg, longmsg = '???', '???'
427        if message is None:
428            message = shortmsg
429        explain = longmsg
430        self.log_error("code %d, message %s", code, message)
431        # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
432        content = (self.error_message_format %
433                   {'code': code, 'message': _quote_html(message), 'explain': explain})
434        self.send_response(code, message)
435        self.send_header("Content-Type", self.error_content_type)
436        self.send_header('Connection', 'close')
437        self.end_headers()
438        if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
439            self.wfile.write(content.encode('UTF-8', 'replace'))
440
441    def send_response(self, code, message=None):
442        """Add the response header to the headers buffer and log the
443        response code.
444
445        Also send two standard headers with the server software
446        version and the current date.
447
448        """
449        self.log_request(code)
450        self.send_response_only(code, message)
451        self.send_header('Server', self.version_string())
452        self.send_header('Date', self.date_time_string())
453
454    def send_response_only(self, code, message=None):
455        """Send the response header only."""
456        if message is None:
457            if code in self.responses:
458                message = self.responses[code][0]
459            else:
460                message = ''
461        if self.request_version != 'HTTP/0.9':
462            if not hasattr(self, '_headers_buffer'):
463                self._headers_buffer = []
464            self._headers_buffer.append(("%s %d %s\r\n" %
465                    (self.protocol_version, code, message)).encode(
466                        'latin-1', 'strict'))
467
468    def send_header(self, keyword, value):
469        """Send a MIME header to the headers buffer."""
470        if self.request_version != 'HTTP/0.9':
471            if not hasattr(self, '_headers_buffer'):
472                self._headers_buffer = []
473            self._headers_buffer.append(
474                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
475
476        if keyword.lower() == 'connection':
477            if value.lower() == 'close':
478                self.close_connection = 1
479            elif value.lower() == 'keep-alive':
480                self.close_connection = 0
481
482    def end_headers(self):
483        """Send the blank line ending the MIME headers."""
484        if self.request_version != 'HTTP/0.9':
485            self._headers_buffer.append(b"\r\n")
486            self.flush_headers()
487
488    def flush_headers(self):
489        if hasattr(self, '_headers_buffer'):
490            self.wfile.write(b"".join(self._headers_buffer))
491            self._headers_buffer = []
492
493    def log_request(self, code='-', size='-'):
494        """Log an accepted request.
495
496        This is called by send_response().
497
498        """
499
500        self.log_message('"%s" %s %s',
501                         self.requestline, str(code), str(size))
502
503    def log_error(self, format, *args):
504        """Log an error.
505
506        This is called when a request cannot be fulfilled.  By
507        default it passes the message on to log_message().
508
509        Arguments are the same as for log_message().
510
511        XXX This should go to the separate error log.
512
513        """
514
515        self.log_message(format, *args)
516
517    def log_message(self, format, *args):
518        """Log an arbitrary message.
519
520        This is used by all other logging functions.  Override
521        it if you have specific logging wishes.
522
523        The first argument, FORMAT, is a format string for the
524        message to be logged.  If the format string contains
525        any % escapes requiring parameters, they should be
526        specified as subsequent arguments (it's just like
527        printf!).
528
529        The client ip and current date/time are prefixed to
530        every message.
531
532        """
533
534        sys.stderr.write("%s - - [%s] %s\n" %
535                         (self.address_string(),
536                          self.log_date_time_string(),
537                          format%args))
538
539    def version_string(self):
540        """Return the server software version string."""
541        return self.server_version + ' ' + self.sys_version
542
543    def date_time_string(self, timestamp=None):
544        """Return the current date and time formatted for a message header."""
545        if timestamp is None:
546            timestamp = time.time()
547        year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
548        s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
549                self.weekdayname[wd],
550                day, self.monthname[month], year,
551                hh, mm, ss)
552        return s
553
554    def log_date_time_string(self):
555        """Return the current time formatted for logging."""
556        now = time.time()
557        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
558        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
559                day, self.monthname[month], year, hh, mm, ss)
560        return s
561
562    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
563
564    monthname = [None,
565                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
566                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
567
568    def address_string(self):
569        """Return the client address."""
570
571        return self.client_address[0]
572
573    # Essentially static class variables
574
575    # The version of the HTTP protocol we support.
576    # Set this to HTTP/1.1 to enable automatic keepalive
577    protocol_version = "HTTP/1.0"
578
579    # MessageClass used to parse headers
580    MessageClass = http_client.HTTPMessage
581
582    # Table mapping response codes to messages; entries have the
583    # form {code: (shortmessage, longmessage)}.
584    # See RFC 2616 and 6585.
585    responses = {
586        100: ('Continue', 'Request received, please continue'),
587        101: ('Switching Protocols',
588              'Switching to new protocol; obey Upgrade header'),
589
590        200: ('OK', 'Request fulfilled, document follows'),
591        201: ('Created', 'Document created, URL follows'),
592        202: ('Accepted',
593              'Request accepted, processing continues off-line'),
594        203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
595        204: ('No Content', 'Request fulfilled, nothing follows'),
596        205: ('Reset Content', 'Clear input form for further input.'),
597        206: ('Partial Content', 'Partial content follows.'),
598
599        300: ('Multiple Choices',
600              'Object has several resources -- see URI list'),
601        301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
602        302: ('Found', 'Object moved temporarily -- see URI list'),
603        303: ('See Other', 'Object moved -- see Method and URL list'),
604        304: ('Not Modified',
605              'Document has not changed since given time'),
606        305: ('Use Proxy',
607              'You must use proxy specified in Location to access this '
608              'resource.'),
609        307: ('Temporary Redirect',
610              'Object moved temporarily -- see URI list'),
611
612        400: ('Bad Request',
613              'Bad request syntax or unsupported method'),
614        401: ('Unauthorized',
615              'No permission -- see authorization schemes'),
616        402: ('Payment Required',
617              'No payment -- see charging schemes'),
618        403: ('Forbidden',
619              'Request forbidden -- authorization will not help'),
620        404: ('Not Found', 'Nothing matches the given URI'),
621        405: ('Method Not Allowed',
622              'Specified method is invalid for this resource.'),
623        406: ('Not Acceptable', 'URI not available in preferred format.'),
624        407: ('Proxy Authentication Required', 'You must authenticate with '
625              'this proxy before proceeding.'),
626        408: ('Request Timeout', 'Request timed out; try again later.'),
627        409: ('Conflict', 'Request conflict.'),
628        410: ('Gone',
629              'URI no longer exists and has been permanently removed.'),
630        411: ('Length Required', 'Client must specify Content-Length.'),
631        412: ('Precondition Failed', 'Precondition in headers is false.'),
632        413: ('Request Entity Too Large', 'Entity is too large.'),
633        414: ('Request-URI Too Long', 'URI is too long.'),
634        415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
635        416: ('Requested Range Not Satisfiable',
636              'Cannot satisfy request range.'),
637        417: ('Expectation Failed',
638              'Expect condition could not be satisfied.'),
639        428: ('Precondition Required',
640              'The origin server requires the request to be conditional.'),
641        429: ('Too Many Requests', 'The user has sent too many requests '
642              'in a given amount of time ("rate limiting").'),
643        431: ('Request Header Fields Too Large', 'The server is unwilling to '
644              'process the request because its header fields are too large.'),
645
646        500: ('Internal Server Error', 'Server got itself in trouble'),
647        501: ('Not Implemented',
648              'Server does not support this operation'),
649        502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
650        503: ('Service Unavailable',
651              'The server cannot process the request due to a high load'),
652        504: ('Gateway Timeout',
653              'The gateway server did not receive a timely response'),
654        505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
655        511: ('Network Authentication Required',
656              'The client needs to authenticate to gain network access.'),
657        }
658
659
660class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
661
662    """Simple HTTP request handler with GET and HEAD commands.
663
664    This serves files from the current directory and any of its
665    subdirectories.  The MIME type for files is determined by
666    calling the .guess_type() method.
667
668    The GET and HEAD requests are identical except that the HEAD
669    request omits the actual contents of the file.
670
671    """
672
673    server_version = "SimpleHTTP/" + __version__
674
675    def do_GET(self):
676        """Serve a GET request."""
677        f = self.send_head()
678        if f:
679            self.copyfile(f, self.wfile)
680            f.close()
681
682    def do_HEAD(self):
683        """Serve a HEAD request."""
684        f = self.send_head()
685        if f:
686            f.close()
687
688    def send_head(self):
689        """Common code for GET and HEAD commands.
690
691        This sends the response code and MIME headers.
692
693        Return value is either a file object (which has to be copied
694        to the outputfile by the caller unless the command was HEAD,
695        and must be closed by the caller under all circumstances), or
696        None, in which case the caller has nothing further to do.
697
698        """
699        path = self.translate_path(self.path)
700        f = None
701        if os.path.isdir(path):
702            if not self.path.endswith('/'):
703                # redirect browser - doing basically what apache does
704                self.send_response(301)
705                self.send_header("Location", self.path + "/")
706                self.end_headers()
707                return None
708            for index in "index.html", "index.htm":
709                index = os.path.join(path, index)
710                if os.path.exists(index):
711                    path = index
712                    break
713            else:
714                return self.list_directory(path)
715        ctype = self.guess_type(path)
716        try:
717            f = open(path, 'rb')
718        except IOError:
719            self.send_error(404, "File not found")
720            return None
721        self.send_response(200)
722        self.send_header("Content-type", ctype)
723        fs = os.fstat(f.fileno())
724        self.send_header("Content-Length", str(fs[6]))
725        self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
726        self.end_headers()
727        return f
728
729    def list_directory(self, path):
730        """Helper to produce a directory listing (absent index.html).
731
732        Return value is either a file object, or None (indicating an
733        error).  In either case, the headers are sent, making the
734        interface the same as for send_head().
735
736        """
737        try:
738            list = os.listdir(path)
739        except os.error:
740            self.send_error(404, "No permission to list directory")
741            return None
742        list.sort(key=lambda a: a.lower())
743        r = []
744        displaypath = html.escape(urllib_parse.unquote(self.path))
745        enc = sys.getfilesystemencoding()
746        title = 'Directory listing for %s' % displaypath
747        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
748                 '"http://www.w3.org/TR/html4/strict.dtd">')
749        r.append('<html>\n<head>')
750        r.append('<meta http-equiv="Content-Type" '
751                 'content="text/html; charset=%s">' % enc)
752        r.append('<title>%s</title>\n</head>' % title)
753        r.append('<body>\n<h1>%s</h1>' % title)
754        r.append('<hr>\n<ul>')
755        for name in list:
756            fullname = os.path.join(path, name)
757            displayname = linkname = name
758            # Append / for directories or @ for symbolic links
759            if os.path.isdir(fullname):
760                displayname = name + "/"
761                linkname = name + "/"
762            if os.path.islink(fullname):
763                displayname = name + "@"
764                # Note: a link to a directory displays with @ and links with /
765            r.append('<li><a href="%s">%s</a></li>'
766                    % (urllib_parse.quote(linkname), html.escape(displayname)))
767            # # Use this instead:
768            # r.append('<li><a href="%s">%s</a></li>'
769            #         % (urllib.quote(linkname), cgi.escape(displayname)))
770        r.append('</ul>\n<hr>\n</body>\n</html>\n')
771        encoded = '\n'.join(r).encode(enc)
772        f = io.BytesIO()
773        f.write(encoded)
774        f.seek(0)
775        self.send_response(200)
776        self.send_header("Content-type", "text/html; charset=%s" % enc)
777        self.send_header("Content-Length", str(len(encoded)))
778        self.end_headers()
779        return f
780
781    def translate_path(self, path):
782        """Translate a /-separated PATH to the local filename syntax.
783
784        Components that mean special things to the local file system
785        (e.g. drive or directory names) are ignored.  (XXX They should
786        probably be diagnosed.)
787
788        """
789        # abandon query parameters
790        path = path.split('?',1)[0]
791        path = path.split('#',1)[0]
792        path = posixpath.normpath(urllib_parse.unquote(path))
793        words = path.split('/')
794        words = filter(None, words)
795        path = os.getcwd()
796        for word in words:
797            drive, word = os.path.splitdrive(word)
798            head, word = os.path.split(word)
799            if word in (os.curdir, os.pardir): continue
800            path = os.path.join(path, word)
801        return path
802
803    def copyfile(self, source, outputfile):
804        """Copy all data between two file objects.
805
806        The SOURCE argument is a file object open for reading
807        (or anything with a read() method) and the DESTINATION
808        argument is a file object open for writing (or
809        anything with a write() method).
810
811        The only reason for overriding this would be to change
812        the block size or perhaps to replace newlines by CRLF
813        -- note however that this the default server uses this
814        to copy binary data as well.
815
816        """
817        shutil.copyfileobj(source, outputfile)
818
819    def guess_type(self, path):
820        """Guess the type of a file.
821
822        Argument is a PATH (a filename).
823
824        Return value is a string of the form type/subtype,
825        usable for a MIME Content-type header.
826
827        The default implementation looks the file's extension
828        up in the table self.extensions_map, using application/octet-stream
829        as a default; however it would be permissible (if
830        slow) to look inside the data to make a better guess.
831
832        """
833
834        base, ext = posixpath.splitext(path)
835        if ext in self.extensions_map:
836            return self.extensions_map[ext]
837        ext = ext.lower()
838        if ext in self.extensions_map:
839            return self.extensions_map[ext]
840        else:
841            return self.extensions_map['']
842
843    if not mimetypes.inited:
844        mimetypes.init() # try to read system mime.types
845    extensions_map = mimetypes.types_map.copy()
846    extensions_map.update({
847        '': 'application/octet-stream', # Default
848        '.py': 'text/plain',
849        '.c': 'text/plain',
850        '.h': 'text/plain',
851        })
852
853
854# Utilities for CGIHTTPRequestHandler
855
856def _url_collapse_path(path):
857    """
858    Given a URL path, remove extra '/'s and '.' path elements and collapse
859    any '..' references and returns a colllapsed path.
860
861    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
862    The utility of this function is limited to is_cgi method and helps
863    preventing some security attacks.
864
865    Returns: A tuple of (head, tail) where tail is everything after the final /
866    and head is everything before it.  Head will always start with a '/' and,
867    if it contains anything else, never have a trailing '/'.
868
869    Raises: IndexError if too many '..' occur within the path.
870
871    """
872    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
873    # path semantics rather than local operating system semantics.
874    path_parts = path.split('/')
875    head_parts = []
876    for part in path_parts[:-1]:
877        if part == '..':
878            head_parts.pop() # IndexError if more '..' than prior parts
879        elif part and part != '.':
880            head_parts.append( part )
881    if path_parts:
882        tail_part = path_parts.pop()
883        if tail_part:
884            if tail_part == '..':
885                head_parts.pop()
886                tail_part = ''
887            elif tail_part == '.':
888                tail_part = ''
889    else:
890        tail_part = ''
891
892    splitpath = ('/' + '/'.join(head_parts), tail_part)
893    collapsed_path = "/".join(splitpath)
894
895    return collapsed_path
896
897
898
899nobody = None
900
901def nobody_uid():
902    """Internal routine to get nobody's uid"""
903    global nobody
904    if nobody:
905        return nobody
906    try:
907        import pwd
908    except ImportError:
909        return -1
910    try:
911        nobody = pwd.getpwnam('nobody')[2]
912    except KeyError:
913        nobody = 1 + max(x[2] for x in pwd.getpwall())
914    return nobody
915
916
917def executable(path):
918    """Test for executable file."""
919    return os.access(path, os.X_OK)
920
921
922class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
923
924    """Complete HTTP server with GET, HEAD and POST commands.
925
926    GET and HEAD also support running CGI scripts.
927
928    The POST command is *only* implemented for CGI scripts.
929
930    """
931
932    # Determine platform specifics
933    have_fork = hasattr(os, 'fork')
934
935    # Make rfile unbuffered -- we need to read one line and then pass
936    # the rest to a subprocess, so we can't use buffered input.
937    rbufsize = 0
938
939    def do_POST(self):
940        """Serve a POST request.
941
942        This is only implemented for CGI scripts.
943
944        """
945
946        if self.is_cgi():
947            self.run_cgi()
948        else:
949            self.send_error(501, "Can only POST to CGI scripts")
950
951    def send_head(self):
952        """Version of send_head that support CGI scripts"""
953        if self.is_cgi():
954            return self.run_cgi()
955        else:
956            return SimpleHTTPRequestHandler.send_head(self)
957
958    def is_cgi(self):
959        """Test whether self.path corresponds to a CGI script.
960
961        Returns True and updates the cgi_info attribute to the tuple
962        (dir, rest) if self.path requires running a CGI script.
963        Returns False otherwise.
964
965        If any exception is raised, the caller should assume that
966        self.path was rejected as invalid and act accordingly.
967
968        The default implementation tests whether the normalized url
969        path begins with one of the strings in self.cgi_directories
970        (and the next character is a '/' or the end of the string).
971
972        """
973        collapsed_path = _url_collapse_path(self.path)
974        dir_sep = collapsed_path.find('/', 1)
975        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
976        if head in self.cgi_directories:
977            self.cgi_info = head, tail
978            return True
979        return False
980
981
982    cgi_directories = ['/cgi-bin', '/htbin']
983
984    def is_executable(self, path):
985        """Test whether argument path is an executable file."""
986        return executable(path)
987
988    def is_python(self, path):
989        """Test whether argument path is a Python script."""
990        head, tail = os.path.splitext(path)
991        return tail.lower() in (".py", ".pyw")
992
993    def run_cgi(self):
994        """Execute a CGI script."""
995        path = self.path
996        dir, rest = self.cgi_info
997
998        i = path.find('/', len(dir) + 1)
999        while i >= 0:
1000            nextdir = path[:i]
1001            nextrest = path[i+1:]
1002
1003            scriptdir = self.translate_path(nextdir)
1004            if os.path.isdir(scriptdir):
1005                dir, rest = nextdir, nextrest
1006                i = path.find('/', len(dir) + 1)
1007            else:
1008                break
1009
1010        # find an explicit query string, if present.
1011        i = rest.rfind('?')
1012        if i >= 0:
1013            rest, query = rest[:i], rest[i+1:]
1014        else:
1015            query = ''
1016
1017        # dissect the part after the directory name into a script name &
1018        # a possible additional path, to be stored in PATH_INFO.
1019        i = rest.find('/')
1020        if i >= 0:
1021            script, rest = rest[:i], rest[i:]
1022        else:
1023            script, rest = rest, ''
1024
1025        scriptname = dir + '/' + script
1026        scriptfile = self.translate_path(scriptname)
1027        if not os.path.exists(scriptfile):
1028            self.send_error(404, "No such CGI script (%r)" % scriptname)
1029            return
1030        if not os.path.isfile(scriptfile):
1031            self.send_error(403, "CGI script is not a plain file (%r)" %
1032                            scriptname)
1033            return
1034        ispy = self.is_python(scriptname)
1035        if self.have_fork or not ispy:
1036            if not self.is_executable(scriptfile):
1037                self.send_error(403, "CGI script is not executable (%r)" %
1038                                scriptname)
1039                return
1040
1041        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1042        # XXX Much of the following could be prepared ahead of time!
1043        env = copy.deepcopy(os.environ)
1044        env['SERVER_SOFTWARE'] = self.version_string()
1045        env['SERVER_NAME'] = self.server.server_name
1046        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1047        env['SERVER_PROTOCOL'] = self.protocol_version
1048        env['SERVER_PORT'] = str(self.server.server_port)
1049        env['REQUEST_METHOD'] = self.command
1050        uqrest = urllib_parse.unquote(rest)
1051        env['PATH_INFO'] = uqrest
1052        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1053        env['SCRIPT_NAME'] = scriptname
1054        if query:
1055            env['QUERY_STRING'] = query
1056        env['REMOTE_ADDR'] = self.client_address[0]
1057        authorization = self.headers.get("authorization")
1058        if authorization:
1059            authorization = authorization.split()
1060            if len(authorization) == 2:
1061                import base64, binascii
1062                env['AUTH_TYPE'] = authorization[0]
1063                if authorization[0].lower() == "basic":
1064                    try:
1065                        authorization = authorization[1].encode('ascii')
1066                        if utils.PY3:
1067                            # In Py3.3, was:
1068                            authorization = base64.decodebytes(authorization).\
1069                                            decode('ascii')
1070                        else:
1071                            # Backport to Py2.7:
1072                            authorization = base64.decodestring(authorization).\
1073                                            decode('ascii')
1074                    except (binascii.Error, UnicodeError):
1075                        pass
1076                    else:
1077                        authorization = authorization.split(':')
1078                        if len(authorization) == 2:
1079                            env['REMOTE_USER'] = authorization[0]
1080        # XXX REMOTE_IDENT
1081        if self.headers.get('content-type') is None:
1082            env['CONTENT_TYPE'] = self.headers.get_content_type()
1083        else:
1084            env['CONTENT_TYPE'] = self.headers['content-type']
1085        length = self.headers.get('content-length')
1086        if length:
1087            env['CONTENT_LENGTH'] = length
1088        referer = self.headers.get('referer')
1089        if referer:
1090            env['HTTP_REFERER'] = referer
1091        accept = []
1092        for line in self.headers.getallmatchingheaders('accept'):
1093            if line[:1] in "\t\n\r ":
1094                accept.append(line.strip())
1095            else:
1096                accept = accept + line[7:].split(',')
1097        env['HTTP_ACCEPT'] = ','.join(accept)
1098        ua = self.headers.get('user-agent')
1099        if ua:
1100            env['HTTP_USER_AGENT'] = ua
1101        co = filter(None, self.headers.get_all('cookie', []))
1102        cookie_str = ', '.join(co)
1103        if cookie_str:
1104            env['HTTP_COOKIE'] = cookie_str
1105        # XXX Other HTTP_* headers
1106        # Since we're setting the env in the parent, provide empty
1107        # values to override previously set values
1108        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1109                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1110            env.setdefault(k, "")
1111
1112        self.send_response(200, "Script output follows")
1113        self.flush_headers()
1114
1115        decoded_query = query.replace('+', ' ')
1116
1117        if self.have_fork:
1118            # Unix -- fork as we should
1119            args = [script]
1120            if '=' not in decoded_query:
1121                args.append(decoded_query)
1122            nobody = nobody_uid()
1123            self.wfile.flush() # Always flush before forking
1124            pid = os.fork()
1125            if pid != 0:
1126                # Parent
1127                pid, sts = os.waitpid(pid, 0)
1128                # throw away additional data [see bug #427345]
1129                while select.select([self.rfile], [], [], 0)[0]:
1130                    if not self.rfile.read(1):
1131                        break
1132                if sts:
1133                    self.log_error("CGI script exit status %#x", sts)
1134                return
1135            # Child
1136            try:
1137                try:
1138                    os.setuid(nobody)
1139                except os.error:
1140                    pass
1141                os.dup2(self.rfile.fileno(), 0)
1142                os.dup2(self.wfile.fileno(), 1)
1143                os.execve(scriptfile, args, env)
1144            except:
1145                self.server.handle_error(self.request, self.client_address)
1146                os._exit(127)
1147
1148        else:
1149            # Non-Unix -- use subprocess
1150            import subprocess
1151            cmdline = [scriptfile]
1152            if self.is_python(scriptfile):
1153                interp = sys.executable
1154                if interp.lower().endswith("w.exe"):
1155                    # On Windows, use python.exe, not pythonw.exe
1156                    interp = interp[:-5] + interp[-4:]
1157                cmdline = [interp, '-u'] + cmdline
1158            if '=' not in query:
1159                cmdline.append(query)
1160            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1161            try:
1162                nbytes = int(length)
1163            except (TypeError, ValueError):
1164                nbytes = 0
1165            p = subprocess.Popen(cmdline,
1166                                 stdin=subprocess.PIPE,
1167                                 stdout=subprocess.PIPE,
1168                                 stderr=subprocess.PIPE,
1169                                 env = env
1170                                 )
1171            if self.command.lower() == "post" and nbytes > 0:
1172                data = self.rfile.read(nbytes)
1173            else:
1174                data = None
1175            # throw away additional data [see bug #427345]
1176            while select.select([self.rfile._sock], [], [], 0)[0]:
1177                if not self.rfile._sock.recv(1):
1178                    break
1179            stdout, stderr = p.communicate(data)
1180            self.wfile.write(stdout)
1181            if stderr:
1182                self.log_error('%s', stderr)
1183            p.stderr.close()
1184            p.stdout.close()
1185            status = p.returncode
1186            if status:
1187                self.log_error("CGI script exit status %#x", status)
1188            else:
1189                self.log_message("CGI script exited OK")
1190
1191
1192def test(HandlerClass = BaseHTTPRequestHandler,
1193         ServerClass = HTTPServer, protocol="HTTP/1.0", port=8000):
1194    """Test the HTTP request handler class.
1195
1196    This runs an HTTP server on port 8000 (or the first command line
1197    argument).
1198
1199    """
1200    server_address = ('', port)
1201
1202    HandlerClass.protocol_version = protocol
1203    httpd = ServerClass(server_address, HandlerClass)
1204
1205    sa = httpd.socket.getsockname()
1206    print("Serving HTTP on", sa[0], "port", sa[1], "...")
1207    try:
1208        httpd.serve_forever()
1209    except KeyboardInterrupt:
1210        print("\nKeyboard interrupt received, exiting.")
1211        httpd.server_close()
1212        sys.exit(0)
1213
1214if __name__ == '__main__':
1215    parser = argparse.ArgumentParser()
1216    parser.add_argument('--cgi', action='store_true',
1217                       help='Run as CGI Server')
1218    parser.add_argument('port', action='store',
1219                        default=8000, type=int,
1220                        nargs='?',
1221                        help='Specify alternate port [default: 8000]')
1222    args = parser.parse_args()
1223    if args.cgi:
1224        test(HandlerClass=CGIHTTPRequestHandler, port=args.port)
1225    else:
1226        test(HandlerClass=SimpleHTTPRequestHandler, port=args.port)
1227