1# Copyright 2012, Google Inc.
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following disclaimer
12# in the documentation and/or other materials provided with the
13# distribution.
14#     * Neither the name of Google Inc. nor the names of its
15# contributors may be used to endorse or promote products derived from
16# this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29"""Dispatch WebSocket request.
30"""
31
32from __future__ import absolute_import
33import logging
34import os
35import re
36import traceback
37
38from mod_pywebsocket import common
39from mod_pywebsocket import handshake
40from mod_pywebsocket import msgutil
41from mod_pywebsocket import stream
42from mod_pywebsocket import util
43
44_SOURCE_PATH_PATTERN = re.compile(r'(?i)_wsh\.py$')
45_SOURCE_SUFFIX = '_wsh.py'
46_DO_EXTRA_HANDSHAKE_HANDLER_NAME = 'web_socket_do_extra_handshake'
47_TRANSFER_DATA_HANDLER_NAME = 'web_socket_transfer_data'
48_PASSIVE_CLOSING_HANDSHAKE_HANDLER_NAME = (
49    'web_socket_passive_closing_handshake')
50
51
52class DispatchException(Exception):
53    """Exception in dispatching WebSocket request."""
54    def __init__(self, name, status=common.HTTP_STATUS_NOT_FOUND):
55        super(DispatchException, self).__init__(name)
56        self.status = status
57
58
59def _default_passive_closing_handshake_handler(request):
60    """Default web_socket_passive_closing_handshake handler."""
61
62    return common.STATUS_NORMAL_CLOSURE, ''
63
64
65def _normalize_path(path):
66    """Normalize path.
67
68    Args:
69        path: the path to normalize.
70
71    Path is converted to the absolute path.
72    The input path can use either '\\' or '/' as the separator.
73    The normalized path always uses '/' regardless of the platform.
74    """
75
76    path = path.replace('\\', os.path.sep)
77    path = os.path.realpath(path)
78    path = path.replace('\\', '/')
79    return path
80
81
82def _create_path_to_resource_converter(base_dir):
83    """Returns a function that converts the path of a WebSocket handler source
84    file to a resource string by removing the path to the base directory from
85    its head, removing _SOURCE_SUFFIX from its tail, and replacing path
86    separators in it with '/'.
87
88    Args:
89        base_dir: the path to the base directory.
90    """
91
92    base_dir = _normalize_path(base_dir)
93
94    base_len = len(base_dir)
95    suffix_len = len(_SOURCE_SUFFIX)
96
97    def converter(path):
98        if not path.endswith(_SOURCE_SUFFIX):
99            return None
100        # _normalize_path must not be used because resolving symlink breaks
101        # following path check.
102        path = path.replace('\\', '/')
103        if not path.startswith(base_dir):
104            return None
105        return path[base_len:-suffix_len]
106
107    return converter
108
109
110def _enumerate_handler_file_paths(directory):
111    """Returns a generator that enumerates WebSocket Handler source file names
112    in the given directory.
113    """
114
115    for root, unused_dirs, files in os.walk(directory):
116        for base in files:
117            path = os.path.join(root, base)
118            if _SOURCE_PATH_PATTERN.search(path):
119                yield path
120
121
122class _HandlerSuite(object):
123    """A handler suite holder class."""
124    def __init__(self, do_extra_handshake, transfer_data,
125                 passive_closing_handshake):
126        self.do_extra_handshake = do_extra_handshake
127        self.transfer_data = transfer_data
128        self.passive_closing_handshake = passive_closing_handshake
129
130
131def _source_handler_file(handler_definition):
132    """Source a handler definition string.
133
134    Args:
135        handler_definition: a string containing Python statements that define
136                            handler functions.
137    """
138
139    global_dic = {}
140    try:
141        # This statement is gramatically different in python 2 and 3.
142        # Hence, yapf will complain about this. To overcome this, we disable
143        # yapf for this line.
144        exec(handler_definition, global_dic) # yapf: disable
145    except Exception:
146        raise DispatchException('Error in sourcing handler:' +
147                                traceback.format_exc())
148    passive_closing_handshake_handler = None
149    try:
150        passive_closing_handshake_handler = _extract_handler(
151            global_dic, _PASSIVE_CLOSING_HANDSHAKE_HANDLER_NAME)
152    except Exception:
153        passive_closing_handshake_handler = (
154            _default_passive_closing_handshake_handler)
155    return _HandlerSuite(
156        _extract_handler(global_dic, _DO_EXTRA_HANDSHAKE_HANDLER_NAME),
157        _extract_handler(global_dic, _TRANSFER_DATA_HANDLER_NAME),
158        passive_closing_handshake_handler)
159
160
161def _extract_handler(dic, name):
162    """Extracts a callable with the specified name from the given dictionary
163    dic.
164    """
165
166    if name not in dic:
167        raise DispatchException('%s is not defined.' % name)
168    handler = dic[name]
169    if not callable(handler):
170        raise DispatchException('%s is not callable.' % name)
171    return handler
172
173
174class Dispatcher(object):
175    """Dispatches WebSocket requests.
176
177    This class maintains a map from resource name to handlers.
178    """
179    def __init__(self,
180                 root_dir,
181                 scan_dir=None,
182                 allow_handlers_outside_root_dir=True):
183        """Construct an instance.
184
185        Args:
186            root_dir: The directory where handler definition files are
187                      placed.
188            scan_dir: The directory where handler definition files are
189                      searched. scan_dir must be a directory under root_dir,
190                      including root_dir itself.  If scan_dir is None,
191                      root_dir is used as scan_dir. scan_dir can be useful
192                      in saving scan time when root_dir contains many
193                      subdirectories.
194            allow_handlers_outside_root_dir: Scans handler files even if their
195                      canonical path is not under root_dir.
196        """
197
198        self._logger = util.get_class_logger(self)
199
200        self._handler_suite_map = {}
201        self._source_warnings = []
202        if scan_dir is None:
203            scan_dir = root_dir
204        if not os.path.realpath(scan_dir).startswith(
205                os.path.realpath(root_dir)):
206            raise DispatchException('scan_dir:%s must be a directory under '
207                                    'root_dir:%s.' % (scan_dir, root_dir))
208        self._source_handler_files_in_dir(root_dir, scan_dir,
209                                          allow_handlers_outside_root_dir)
210
211    def add_resource_path_alias(self, alias_resource_path,
212                                existing_resource_path):
213        """Add resource path alias.
214
215        Once added, request to alias_resource_path would be handled by
216        handler registered for existing_resource_path.
217
218        Args:
219            alias_resource_path: alias resource path
220            existing_resource_path: existing resource path
221        """
222        try:
223            handler_suite = self._handler_suite_map[existing_resource_path]
224            self._handler_suite_map[alias_resource_path] = handler_suite
225        except KeyError:
226            raise DispatchException('No handler for: %r' %
227                                    existing_resource_path)
228
229    def source_warnings(self):
230        """Return warnings in sourcing handlers."""
231
232        return self._source_warnings
233
234    def do_extra_handshake(self, request):
235        """Do extra checking in WebSocket handshake.
236
237        Select a handler based on request.uri and call its
238        web_socket_do_extra_handshake function.
239
240        Args:
241            request: mod_python request.
242
243        Raises:
244            DispatchException: when handler was not found
245            AbortedByUserException: when user handler abort connection
246            HandshakeException: when opening handshake failed
247        """
248
249        handler_suite = self.get_handler_suite(request.ws_resource)
250        if handler_suite is None:
251            raise DispatchException('No handler for: %r' % request.ws_resource)
252        do_extra_handshake_ = handler_suite.do_extra_handshake
253        try:
254            do_extra_handshake_(request)
255        except handshake.AbortedByUserException as e:
256            # Re-raise to tell the caller of this function to finish this
257            # connection without sending any error.
258            self._logger.debug('%s', traceback.format_exc())
259            raise
260        except Exception as e:
261            util.prepend_message_to_exception(
262                '%s raised exception for %s: ' %
263                (_DO_EXTRA_HANDSHAKE_HANDLER_NAME, request.ws_resource), e)
264            raise handshake.HandshakeException(e, common.HTTP_STATUS_FORBIDDEN)
265
266    def transfer_data(self, request):
267        """Let a handler transfer_data with a WebSocket client.
268
269        Select a handler based on request.ws_resource and call its
270        web_socket_transfer_data function.
271
272        Args:
273            request: mod_python request.
274
275        Raises:
276            DispatchException: when handler was not found
277            AbortedByUserException: when user handler abort connection
278        """
279
280        # TODO(tyoshino): Terminate underlying TCP connection if possible.
281        try:
282            handler_suite = self.get_handler_suite(request.ws_resource)
283            if handler_suite is None:
284                raise DispatchException('No handler for: %r' %
285                                        request.ws_resource)
286            transfer_data_ = handler_suite.transfer_data
287            transfer_data_(request)
288
289            if not request.server_terminated:
290                request.ws_stream.close_connection()
291        # Catch non-critical exceptions the handler didn't handle.
292        except handshake.AbortedByUserException as e:
293            self._logger.debug('%s', traceback.format_exc())
294            raise
295        except msgutil.BadOperationException as e:
296            self._logger.debug('%s', e)
297            request.ws_stream.close_connection(
298                common.STATUS_INTERNAL_ENDPOINT_ERROR)
299        except msgutil.InvalidFrameException as e:
300            # InvalidFrameException must be caught before
301            # ConnectionTerminatedException that catches InvalidFrameException.
302            self._logger.debug('%s', e)
303            request.ws_stream.close_connection(common.STATUS_PROTOCOL_ERROR)
304        except msgutil.UnsupportedFrameException as e:
305            self._logger.debug('%s', e)
306            request.ws_stream.close_connection(common.STATUS_UNSUPPORTED_DATA)
307        except stream.InvalidUTF8Exception as e:
308            self._logger.debug('%s', e)
309            request.ws_stream.close_connection(
310                common.STATUS_INVALID_FRAME_PAYLOAD_DATA)
311        except msgutil.ConnectionTerminatedException as e:
312            self._logger.debug('%s', e)
313        except Exception as e:
314            # Any other exceptions are forwarded to the caller of this
315            # function.
316            util.prepend_message_to_exception(
317                '%s raised exception for %s: ' %
318                (_TRANSFER_DATA_HANDLER_NAME, request.ws_resource), e)
319            raise
320
321    def passive_closing_handshake(self, request):
322        """Prepare code and reason for responding client initiated closing
323        handshake.
324        """
325
326        handler_suite = self.get_handler_suite(request.ws_resource)
327        if handler_suite is None:
328            return _default_passive_closing_handshake_handler(request)
329        return handler_suite.passive_closing_handshake(request)
330
331    def get_handler_suite(self, resource):
332        """Retrieves two handlers (one for extra handshake processing, and one
333        for data transfer) for the given request as a HandlerSuite object.
334        """
335
336        fragment = None
337        if '#' in resource:
338            resource, fragment = resource.split('#', 1)
339        if '?' in resource:
340            resource = resource.split('?', 1)[0]
341        handler_suite = self._handler_suite_map.get(resource)
342        if handler_suite and fragment:
343            raise DispatchException(
344                'Fragment identifiers MUST NOT be used on WebSocket URIs',
345                common.HTTP_STATUS_BAD_REQUEST)
346        return handler_suite
347
348    def _source_handler_files_in_dir(self, root_dir, scan_dir,
349                                     allow_handlers_outside_root_dir):
350        """Source all the handler source files in the scan_dir directory.
351
352        The resource path is determined relative to root_dir.
353        """
354
355        # We build a map from resource to handler code assuming that there's
356        # only one path from root_dir to scan_dir and it can be obtained by
357        # comparing realpath of them.
358
359        # Here we cannot use abspath. See
360        # https://bugs.webkit.org/show_bug.cgi?id=31603
361
362        convert = _create_path_to_resource_converter(root_dir)
363        scan_realpath = os.path.realpath(scan_dir)
364        root_realpath = os.path.realpath(root_dir)
365        for path in _enumerate_handler_file_paths(scan_realpath):
366            if (not allow_handlers_outside_root_dir and
367                (not os.path.realpath(path).startswith(root_realpath))):
368                self._logger.debug(
369                    'Canonical path of %s is not under root directory' % path)
370                continue
371            try:
372                with open(path) as handler_file:
373                    handler_suite = _source_handler_file(handler_file.read())
374            except DispatchException as e:
375                self._source_warnings.append('%s: %s' % (path, e))
376                continue
377            resource = convert(path)
378            if resource is None:
379                self._logger.debug('Path to resource conversion on %s failed' %
380                                   path)
381            else:
382                self._handler_suite_map[convert(path)] = handler_suite
383
384
385# vi:sts=4 sw=4 et
386