1"""Base classes for server/gateway implementations"""
2
3from .util import FileWrapper, guess_scheme, is_hop_by_hop
4from .headers import Headers
5
6import sys, os, time
7
8__all__ = [
9    'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
10    'IISCGIHandler', 'read_environ'
11]
12
13# Weekday and month names for HTTP date/time formatting; always English!
14_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
15_monthname = [None, # Dummy so we can use 1-based month numbers
16              "Jan", "Feb", "Mar", "Apr", "May", "Jun",
17              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
18
19def format_date_time(timestamp):
20    year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
21    return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
22        _weekdayname[wd], day, _monthname[month], year, hh, mm, ss
23    )
24
25_is_request = {
26    'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
27    'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
28}.__contains__
29
30def _needs_transcode(k):
31    return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
32        or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
33
34def read_environ():
35    """Read environment, fixing HTTP variables"""
36    enc = sys.getfilesystemencoding()
37    esc = 'surrogateescape'
38    try:
39        ''.encode('utf-8', esc)
40    except LookupError:
41        esc = 'replace'
42    environ = {}
43
44    # Take the basic environment from native-unicode os.environ. Attempt to
45    # fix up the variables that come from the HTTP request to compensate for
46    # the bytes->unicode decoding step that will already have taken place.
47    for k, v in os.environ.items():
48        if _needs_transcode(k):
49
50            # On win32, the os.environ is natively Unicode. Different servers
51            # decode the request bytes using different encodings.
52            if sys.platform == 'win32':
53                software = os.environ.get('SERVER_SOFTWARE', '').lower()
54
55                # On IIS, the HTTP request will be decoded as UTF-8 as long
56                # as the input is a valid UTF-8 sequence. Otherwise it is
57                # decoded using the system code page (mbcs), with no way to
58                # detect this has happened. Because UTF-8 is the more likely
59                # encoding, and mbcs is inherently unreliable (an mbcs string
60                # that happens to be valid UTF-8 will not be decoded as mbcs)
61                # always recreate the original bytes as UTF-8.
62                if software.startswith('microsoft-iis/'):
63                    v = v.encode('utf-8').decode('iso-8859-1')
64
65                # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
66                # to the Unicode environ. No modification needed.
67                elif software.startswith('apache/'):
68                    pass
69
70                # Python 3's http.server.CGIHTTPRequestHandler decodes
71                # using the urllib.unquote default of UTF-8, amongst other
72                # issues.
73                elif (
74                    software.startswith('simplehttp/')
75                    and 'python/3' in software
76                ):
77                    v = v.encode('utf-8').decode('iso-8859-1')
78
79                # For other servers, guess that they have written bytes to
80                # the environ using stdio byte-oriented interfaces, ending up
81                # with the system code page.
82                else:
83                    v = v.encode(enc, 'replace').decode('iso-8859-1')
84
85            # Recover bytes from unicode environ, using surrogate escapes
86            # where available (Python 3.1+).
87            else:
88                v = v.encode(enc, esc).decode('iso-8859-1')
89
90        environ[k] = v
91    return environ
92
93
94class BaseHandler:
95    """Manage the invocation of a WSGI application"""
96
97    # Configuration parameters; can override per-subclass or per-instance
98    wsgi_version = (1,0)
99    wsgi_multithread = True
100    wsgi_multiprocess = True
101    wsgi_run_once = False
102
103    origin_server = True    # We are transmitting direct to client
104    http_version  = "1.0"   # Version that should be used for response
105    server_software = None  # String name of server software, if any
106
107    # os_environ is used to supply configuration from the OS environment:
108    # by default it's a copy of 'os.environ' as of import time, but you can
109    # override this in e.g. your __init__ method.
110    os_environ= read_environ()
111
112    # Collaborator classes
113    wsgi_file_wrapper = FileWrapper     # set to None to disable
114    headers_class = Headers             # must be a Headers-like class
115
116    # Error handling (also per-subclass or per-instance)
117    traceback_limit = None  # Print entire traceback to self.get_stderr()
118    error_status = "500 Internal Server Error"
119    error_headers = [('Content-Type','text/plain')]
120    error_body = b"A server error occurred.  Please contact the administrator."
121
122    # State variables (don't mess with these)
123    status = result = None
124    headers_sent = False
125    headers = None
126    bytes_sent = 0
127
128    def run(self, application):
129        """Invoke the application"""
130        # Note to self: don't move the close()!  Asynchronous servers shouldn't
131        # call close() from finish_response(), so if you close() anywhere but
132        # the double-error branch here, you'll break asynchronous servers by
133        # prematurely closing.  Async servers must return from 'run()' without
134        # closing if there might still be output to iterate over.
135        try:
136            self.setup_environ()
137            self.result = application(self.environ, self.start_response)
138            self.finish_response()
139        except (ConnectionAbortedError, BrokenPipeError, ConnectionResetError):
140            # We expect the client to close the connection abruptly from time
141            # to time.
142            return
143        except:
144            try:
145                self.handle_error()
146            except:
147                # If we get an error handling an error, just give up already!
148                self.close()
149                raise   # ...and let the actual server figure it out.
150
151
152    def setup_environ(self):
153        """Set up the environment for one request"""
154
155        env = self.environ = self.os_environ.copy()
156        self.add_cgi_vars()
157
158        env['wsgi.input']        = self.get_stdin()
159        env['wsgi.errors']       = self.get_stderr()
160        env['wsgi.version']      = self.wsgi_version
161        env['wsgi.run_once']     = self.wsgi_run_once
162        env['wsgi.url_scheme']   = self.get_scheme()
163        env['wsgi.multithread']  = self.wsgi_multithread
164        env['wsgi.multiprocess'] = self.wsgi_multiprocess
165
166        if self.wsgi_file_wrapper is not None:
167            env['wsgi.file_wrapper'] = self.wsgi_file_wrapper
168
169        if self.origin_server and self.server_software:
170            env.setdefault('SERVER_SOFTWARE',self.server_software)
171
172
173    def finish_response(self):
174        """Send any iterable data, then close self and the iterable
175
176        Subclasses intended for use in asynchronous servers will
177        want to redefine this method, such that it sets up callbacks
178        in the event loop to iterate over the data, and to call
179        'self.close()' once the response is finished.
180        """
181        try:
182            if not self.result_is_file() or not self.sendfile():
183                for data in self.result:
184                    self.write(data)
185                self.finish_content()
186        except:
187            # Call close() on the iterable returned by the WSGI application
188            # in case of an exception.
189            if hasattr(self.result, 'close'):
190                self.result.close()
191            raise
192        else:
193            # We only call close() when no exception is raised, because it
194            # will set status, result, headers, and environ fields to None.
195            # See bpo-29183 for more details.
196            self.close()
197
198
199    def get_scheme(self):
200        """Return the URL scheme being used"""
201        return guess_scheme(self.environ)
202
203
204    def set_content_length(self):
205        """Compute Content-Length or switch to chunked encoding if possible"""
206        try:
207            blocks = len(self.result)
208        except (TypeError,AttributeError,NotImplementedError):
209            pass
210        else:
211            if blocks==1:
212                self.headers['Content-Length'] = str(self.bytes_sent)
213                return
214        # XXX Try for chunked encoding if origin server and client is 1.1
215
216
217    def cleanup_headers(self):
218        """Make any necessary header changes or defaults
219
220        Subclasses can extend this to add other defaults.
221        """
222        if 'Content-Length' not in self.headers:
223            self.set_content_length()
224
225    def start_response(self, status, headers,exc_info=None):
226        """'start_response()' callable as specified by PEP 3333"""
227
228        if exc_info:
229            try:
230                if self.headers_sent:
231                    # Re-raise original exception if headers sent
232                    raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
233            finally:
234                exc_info = None        # avoid dangling circular ref
235        elif self.headers is not None:
236            raise AssertionError("Headers already set!")
237
238        self.status = status
239        self.headers = self.headers_class(headers)
240        status = self._convert_string_type(status, "Status")
241        assert len(status)>=4,"Status must be at least 4 characters"
242        assert status[:3].isdigit(), "Status message must begin w/3-digit code"
243        assert status[3]==" ", "Status message must have a space after code"
244
245        if __debug__:
246            for name, val in headers:
247                name = self._convert_string_type(name, "Header name")
248                val = self._convert_string_type(val, "Header value")
249                assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed"
250
251        return self.write
252
253    def _convert_string_type(self, value, title):
254        """Convert/check value type."""
255        if type(value) is str:
256            return value
257        raise AssertionError(
258            "{0} must be of type str (got {1})".format(title, repr(value))
259        )
260
261    def send_preamble(self):
262        """Transmit version/status/date/server, via self._write()"""
263        if self.origin_server:
264            if self.client_is_modern():
265                self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1'))
266                if 'Date' not in self.headers:
267                    self._write(
268                        ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1')
269                    )
270                if self.server_software and 'Server' not in self.headers:
271                    self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1'))
272        else:
273            self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1'))
274
275    def write(self, data):
276        """'write()' callable as specified by PEP 3333"""
277
278        assert type(data) is bytes, \
279            "write() argument must be a bytes instance"
280
281        if not self.status:
282            raise AssertionError("write() before start_response()")
283
284        elif not self.headers_sent:
285            # Before the first output, send the stored headers
286            self.bytes_sent = len(data)    # make sure we know content-length
287            self.send_headers()
288        else:
289            self.bytes_sent += len(data)
290
291        # XXX check Content-Length and truncate if too many bytes written?
292        self._write(data)
293        self._flush()
294
295
296    def sendfile(self):
297        """Platform-specific file transmission
298
299        Override this method in subclasses to support platform-specific
300        file transmission.  It is only called if the application's
301        return iterable ('self.result') is an instance of
302        'self.wsgi_file_wrapper'.
303
304        This method should return a true value if it was able to actually
305        transmit the wrapped file-like object using a platform-specific
306        approach.  It should return a false value if normal iteration
307        should be used instead.  An exception can be raised to indicate
308        that transmission was attempted, but failed.
309
310        NOTE: this method should call 'self.send_headers()' if
311        'self.headers_sent' is false and it is going to attempt direct
312        transmission of the file.
313        """
314        return False   # No platform-specific transmission by default
315
316
317    def finish_content(self):
318        """Ensure headers and content have both been sent"""
319        if not self.headers_sent:
320            # Only zero Content-Length if not set by the application (so
321            # that HEAD requests can be satisfied properly, see #3839)
322            self.headers.setdefault('Content-Length', "0")
323            self.send_headers()
324        else:
325            pass # XXX check if content-length was too short?
326
327    def close(self):
328        """Close the iterable (if needed) and reset all instance vars
329
330        Subclasses may want to also drop the client connection.
331        """
332        try:
333            if hasattr(self.result,'close'):
334                self.result.close()
335        finally:
336            self.result = self.headers = self.status = self.environ = None
337            self.bytes_sent = 0; self.headers_sent = False
338
339
340    def send_headers(self):
341        """Transmit headers to the client, via self._write()"""
342        self.cleanup_headers()
343        self.headers_sent = True
344        if not self.origin_server or self.client_is_modern():
345            self.send_preamble()
346            self._write(bytes(self.headers))
347
348
349    def result_is_file(self):
350        """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'"""
351        wrapper = self.wsgi_file_wrapper
352        return wrapper is not None and isinstance(self.result,wrapper)
353
354
355    def client_is_modern(self):
356        """True if client can accept status and headers"""
357        return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9'
358
359
360    def log_exception(self,exc_info):
361        """Log the 'exc_info' tuple in the server log
362
363        Subclasses may override to retarget the output or change its format.
364        """
365        try:
366            from traceback import print_exception
367            stderr = self.get_stderr()
368            print_exception(
369                exc_info[0], exc_info[1], exc_info[2],
370                self.traceback_limit, stderr
371            )
372            stderr.flush()
373        finally:
374            exc_info = None
375
376    def handle_error(self):
377        """Log current error, and send error output to client if possible"""
378        self.log_exception(sys.exc_info())
379        if not self.headers_sent:
380            self.result = self.error_output(self.environ, self.start_response)
381            self.finish_response()
382        # XXX else: attempt advanced recovery techniques for HTML or text?
383
384    def error_output(self, environ, start_response):
385        """WSGI mini-app to create error output
386
387        By default, this just uses the 'error_status', 'error_headers',
388        and 'error_body' attributes to generate an output page.  It can
389        be overridden in a subclass to dynamically generate diagnostics,
390        choose an appropriate message for the user's preferred language, etc.
391
392        Note, however, that it's not recommended from a security perspective to
393        spit out diagnostics to any old user; ideally, you should have to do
394        something special to enable diagnostic output, which is why we don't
395        include any here!
396        """
397        start_response(self.error_status,self.error_headers[:],sys.exc_info())
398        return [self.error_body]
399
400
401    # Pure abstract methods; *must* be overridden in subclasses
402
403    def _write(self,data):
404        """Override in subclass to buffer data for send to client
405
406        It's okay if this method actually transmits the data; BaseHandler
407        just separates write and flush operations for greater efficiency
408        when the underlying system actually has such a distinction.
409        """
410        raise NotImplementedError
411
412    def _flush(self):
413        """Override in subclass to force sending of recent '_write()' calls
414
415        It's okay if this method is a no-op (i.e., if '_write()' actually
416        sends the data.
417        """
418        raise NotImplementedError
419
420    def get_stdin(self):
421        """Override in subclass to return suitable 'wsgi.input'"""
422        raise NotImplementedError
423
424    def get_stderr(self):
425        """Override in subclass to return suitable 'wsgi.errors'"""
426        raise NotImplementedError
427
428    def add_cgi_vars(self):
429        """Override in subclass to insert CGI variables in 'self.environ'"""
430        raise NotImplementedError
431
432
433class SimpleHandler(BaseHandler):
434    """Handler that's just initialized with streams, environment, etc.
435
436    This handler subclass is intended for synchronous HTTP/1.0 origin servers,
437    and handles sending the entire response output, given the correct inputs.
438
439    Usage::
440
441        handler = SimpleHandler(
442            inp,out,err,env, multithread=False, multiprocess=True
443        )
444        handler.run(app)"""
445
446    def __init__(self,stdin,stdout,stderr,environ,
447        multithread=True, multiprocess=False
448    ):
449        self.stdin = stdin
450        self.stdout = stdout
451        self.stderr = stderr
452        self.base_env = environ
453        self.wsgi_multithread = multithread
454        self.wsgi_multiprocess = multiprocess
455
456    def get_stdin(self):
457        return self.stdin
458
459    def get_stderr(self):
460        return self.stderr
461
462    def add_cgi_vars(self):
463        self.environ.update(self.base_env)
464
465    def _write(self,data):
466        result = self.stdout.write(data)
467        if result is None or result == len(data):
468            return
469        from warnings import warn
470        warn("SimpleHandler.stdout.write() should not do partial writes",
471            DeprecationWarning)
472        while True:
473            data = data[result:]
474            if not data:
475                break
476            result = self.stdout.write(data)
477
478    def _flush(self):
479        self.stdout.flush()
480        self._flush = self.stdout.flush
481
482
483class BaseCGIHandler(SimpleHandler):
484
485    """CGI-like systems using input/output/error streams and environ mapping
486
487    Usage::
488
489        handler = BaseCGIHandler(inp,out,err,env)
490        handler.run(app)
491
492    This handler class is useful for gateway protocols like ReadyExec and
493    FastCGI, that have usable input/output/error streams and an environment
494    mapping.  It's also the base class for CGIHandler, which just uses
495    sys.stdin, os.environ, and so on.
496
497    The constructor also takes keyword arguments 'multithread' and
498    'multiprocess' (defaulting to 'True' and 'False' respectively) to control
499    the configuration sent to the application.  It sets 'origin_server' to
500    False (to enable CGI-like output), and assumes that 'wsgi.run_once' is
501    False.
502    """
503
504    origin_server = False
505
506
507class CGIHandler(BaseCGIHandler):
508
509    """CGI-based invocation via sys.stdin/stdout/stderr and os.environ
510
511    Usage::
512
513        CGIHandler().run(app)
514
515    The difference between this class and BaseCGIHandler is that it always
516    uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and
517    'wsgi.multiprocess' of 'True'.  It does not take any initialization
518    parameters, but always uses 'sys.stdin', 'os.environ', and friends.
519
520    If you need to override any of these parameters, use BaseCGIHandler
521    instead.
522    """
523
524    wsgi_run_once = True
525    # Do not allow os.environ to leak between requests in Google App Engine
526    # and other multi-run CGI use cases.  This is not easily testable.
527    # See http://bugs.python.org/issue7250
528    os_environ = {}
529
530    def __init__(self):
531        BaseCGIHandler.__init__(
532            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
533            read_environ(), multithread=False, multiprocess=True
534        )
535
536
537class IISCGIHandler(BaseCGIHandler):
538    """CGI-based invocation with workaround for IIS path bug
539
540    This handler should be used in preference to CGIHandler when deploying on
541    Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
542    or metabase allowPathInfoForScriptMappings (IIS<7).
543    """
544    wsgi_run_once = True
545    os_environ = {}
546
547    # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
548    # the front, causing problems for WSGI applications that wish to implement
549    # routing. This handler strips any such duplicated path.
550
551    # IIS can be configured to pass the correct PATH_INFO, but this causes
552    # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
553    # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
554    # setting can only be made on a vhost level, affecting all other script
555    # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
556    # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
557    # rarely uses it because there is still no UI for it.)
558
559    # There is no way for CGI code to tell whether the option was set, so a
560    # separate handler class is provided.
561    def __init__(self):
562        environ= read_environ()
563        path = environ.get('PATH_INFO', '')
564        script = environ.get('SCRIPT_NAME', '')
565        if (path+'/').startswith(script+'/'):
566            environ['PATH_INFO'] = path[len(script):]
567        BaseCGIHandler.__init__(
568            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
569            environ, multithread=False, multiprocess=True
570        )
571