1# Copyright 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import BaseHTTPServer
6from collections import namedtuple
7import errno
8import gzip
9import logging
10import mimetypes
11import os
12import SimpleHTTPServer
13import socket
14import SocketServer
15import StringIO
16import sys
17import traceback
18import urlparse
19
20from telemetry.core import local_server
21
22ByteRange = namedtuple('ByteRange', ['from_byte', 'to_byte'])
23ResourceAndRange = namedtuple('ResourceAndRange', ['resource', 'byte_range'])
24
25_MIME_TYPES_FILE = os.path.abspath(
26    os.path.join(os.path.dirname(__file__), 'mime.types'))
27
28
29class MemoryCacheHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
30
31  protocol_version = 'HTTP/1.1'  # override BaseHTTPServer setting
32  wbufsize = -1  # override StreamRequestHandler (a base class) setting
33
34  def handle(self):
35    try:
36      BaseHTTPServer.BaseHTTPRequestHandler.handle(self)
37    except socket.error as e:
38      # Connection reset errors happen all the time due to the browser closing
39      # without terminating the connection properly.  They can be safely
40      # ignored.
41      if e[0] != errno.ECONNRESET:
42        raise
43
44  def do_GET(self):
45    """Serve a GET request."""
46    resource_range = self.SendHead()
47
48    if not resource_range or not resource_range.resource:
49      return
50    response = resource_range.resource['response']
51
52    if not resource_range.byte_range:
53      self.wfile.write(response)
54      return
55
56    start_index = resource_range.byte_range.from_byte
57    end_index = resource_range.byte_range.to_byte
58    self.wfile.write(response[start_index:end_index + 1])
59
60  def do_HEAD(self):
61    """Serve a HEAD request."""
62    self.SendHead()
63
64  def log_error(self, fmt, *args):
65    pass
66
67  def log_request(self, code='-', size='-'):
68    # Don't spam the console unless it is important.
69    pass
70
71  def Response(self, path):
72    """Get the response for the path."""
73    if path not in self.server.resource_map:
74      return None
75
76    return self.server.resource_map[path]
77
78  def SendHead(self):
79    path = os.path.realpath(self.translate_path(self.path))
80    resource = self.Response(path)
81    if not resource:
82      self.send_error(404, 'File not found')
83      return None
84
85    total_num_of_bytes = resource['content-length']
86    byte_range = self.GetByteRange(total_num_of_bytes)
87    if byte_range:
88      # request specified a range, so set response code to 206.
89      self.send_response(206)
90      self.send_header('Content-Range', 'bytes %d-%d/%d' %
91                       (byte_range.from_byte, byte_range.to_byte,
92                        total_num_of_bytes))
93      total_num_of_bytes = byte_range.to_byte - byte_range.from_byte + 1
94    else:
95      self.send_response(200)
96
97    self.send_header('Content-Length', str(total_num_of_bytes))
98    self.send_header('Content-Type', resource['content-type'])
99    self.send_header('Last-Modified',
100                     self.date_time_string(resource['last-modified']))
101    if resource['zipped']:
102      self.send_header('Content-Encoding', 'gzip')
103    self.end_headers()
104    return ResourceAndRange(resource, byte_range)
105
106  def GetByteRange(self, total_num_of_bytes):
107    """Parse the header and get the range values specified.
108
109    Args:
110      total_num_of_bytes: Total # of bytes in requested resource,
111      used to calculate upper range limit.
112    Returns:
113      A ByteRange namedtuple object with the requested byte-range values.
114      If no Range is explicitly requested or there is a failure parsing,
115      return None.
116      If range specified is in the format "N-", return N-END. Refer to
117      http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html for details.
118      If upper range limit is greater than total # of bytes, return upper index.
119    """
120
121    range_header = self.headers.getheader('Range')
122    if range_header is None:
123      return None
124    if not range_header.startswith('bytes='):
125      return None
126
127    # The range header is expected to be a string in this format:
128    # bytes=0-1
129    # Get the upper and lower limits of the specified byte-range.
130    # We've already confirmed that range_header starts with 'bytes='.
131    byte_range_values = range_header[len('bytes='):].split('-')
132    from_byte = 0
133    to_byte = 0
134
135    if len(byte_range_values) == 2:
136      # If to_range is not defined return all bytes starting from from_byte.
137      to_byte = (int(byte_range_values[1]) if byte_range_values[1] else
138                 total_num_of_bytes - 1)
139      # If from_range is not defined return last 'to_byte' bytes.
140      from_byte = (int(byte_range_values[0]) if byte_range_values[0] else
141                   total_num_of_bytes - to_byte)
142    else:
143      return None
144
145    # Do some validation.
146    if from_byte < 0:
147      return None
148
149    # Make to_byte the end byte by default in edge cases.
150    if to_byte < from_byte or to_byte >= total_num_of_bytes:
151      to_byte = total_num_of_bytes - 1
152
153    return ByteRange(from_byte, to_byte)
154
155
156class _MemoryCacheHTTPServerImpl(SocketServer.ThreadingMixIn,
157                                 BaseHTTPServer.HTTPServer):
158  # Increase the request queue size. The default value, 5, is set in
159  # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer).
160  # Since we're intercepting many domains through this single server,
161  # it is quite possible to get more than 5 concurrent requests.
162  request_queue_size = 128
163
164  # Don't prevent python from exiting when there is thread activity.
165  daemon_threads = True
166
167  def __init__(self, host_port, handler, paths):
168    BaseHTTPServer.HTTPServer.__init__(self, host_port, handler)
169    self.resource_map = {}
170    # Use Telemetry's 'mime.types' file instead of relying on system files to
171    # ensure the mime type inference is deterministic
172    # (also see crbug.com/894868).
173    assert os.path.isfile(_MIME_TYPES_FILE)
174    mimetypes.init([_MIME_TYPES_FILE])
175    for path in paths:
176      if os.path.isdir(path):
177        self.AddDirectoryToResourceMap(path)
178      else:
179        self.AddFileToResourceMap(path)
180
181  def AddDirectoryToResourceMap(self, directory_path):
182    """Loads all files in directory_path into the in-memory resource map."""
183    for root, dirs, files in os.walk(directory_path):
184      # Skip hidden files and folders (like .svn and .git).
185      files = [f for f in files if f[0] != '.']
186      dirs[:] = [d for d in dirs if d[0] != '.']
187
188      for f in files:
189        file_path = os.path.join(root, f)
190        if not os.path.exists(file_path):  # Allow for '.#' files
191          continue
192        self.AddFileToResourceMap(file_path)
193
194  def AddFileToResourceMap(self, file_path):
195    """Loads file_path into the in-memory resource map."""
196    file_path = os.path.realpath(file_path)
197    if file_path in self.resource_map:
198      return
199
200    with open(file_path, 'rb') as fd:
201      response = fd.read()
202      fs = os.fstat(fd.fileno())
203    content_type = mimetypes.guess_type(file_path)[0]
204    zipped = False
205    if content_type in ['text/html', 'text/css', 'application/javascript']:
206      zipped = True
207      sio = StringIO.StringIO()
208      gzf = gzip.GzipFile(fileobj=sio, compresslevel=9, mode='wb')
209      gzf.write(response)
210      gzf.close()
211      response = sio.getvalue()
212      sio.close()
213    self.resource_map[file_path] = {
214        'content-type': content_type,
215        'content-length': len(response),
216        'last-modified': fs.st_mtime,
217        'response': response,
218        'zipped': zipped
219    }
220
221    index = 'index.html'
222    if os.path.basename(file_path) == index:
223      dir_path = os.path.dirname(file_path)
224      self.resource_map[dir_path] = self.resource_map[file_path]
225
226  def handle_error(self, request, client_address):
227    """Handle error in a thread-safe way
228
229    We override handle_error method of our base TCPServer class. It does the
230    same but uses thread-safe logging.error instead of print, because
231    SocketServer.ThreadingMixIn runs network operations on multiple threads and
232    there's a race condition on stdout.
233    """
234    logging.error('Exception happened during processing of request from '
235                  '%s\n%s%s', client_address, traceback.format_exc(), '-'*80)
236
237
238class MemoryCacheHTTPServerBackend(local_server.LocalServerBackend):
239
240  def __init__(self):
241    super(MemoryCacheHTTPServerBackend, self).__init__()
242    self._httpd = None
243
244  def StartAndGetNamedPorts(self, args, handler_class=None):
245    if handler_class:
246      assert issubclass(handler_class, MemoryCacheHTTPRequestHandler)
247
248    base_dir = args['base_dir']
249    os.chdir(base_dir)
250
251    paths = args['paths']
252    for path in paths:
253      if not os.path.realpath(path).startswith(os.path.realpath(os.getcwd())):
254        print >> sys.stderr, '"%s" is not under the cwd.' % path
255        sys.exit(1)
256
257    server_address = (args['host'], args['port'])
258    MemoryCacheHTTPRequestHandler.protocol_version = 'HTTP/1.1'
259    self._httpd = _MemoryCacheHTTPServerImpl(
260        server_address,
261        handler_class if handler_class else MemoryCacheHTTPRequestHandler,
262        paths)
263    return [local_server.NamedPort('http', self._httpd.server_address[1])]
264
265  def ServeForever(self):
266    return self._httpd.serve_forever()
267
268
269class MemoryCacheHTTPServer(local_server.LocalServer):
270
271  def __init__(self, paths):
272    super(MemoryCacheHTTPServer, self).__init__(MemoryCacheHTTPServerBackend)
273    self._base_dir = None
274
275    for path in paths:
276      assert os.path.exists(path), '%s does not exist.' % path
277
278    paths = list(paths)
279    self._paths = paths
280
281    self._paths_as_set = set(map(os.path.realpath, paths))
282
283    common_prefix = os.path.commonprefix(paths)
284    if os.path.isdir(common_prefix):
285      self._base_dir = common_prefix
286    else:
287      self._base_dir = os.path.dirname(common_prefix)
288
289  def GetBackendStartupArgs(self):
290    return {'base_dir': self._base_dir,
291            'paths': self._paths,
292            'host': self.host_ip,
293            'port': 0}
294
295  @property
296  def paths(self):
297    return self._paths_as_set
298
299  @property
300  def url(self):
301    return 'http://127.0.0.1:%s' % self.port
302
303  def UrlOf(self, path):
304    if os.path.isabs(path):
305      relative_path = os.path.relpath(path, self._base_dir)
306    else:
307      relative_path = path
308    # Preserve trailing slash or backslash.
309    # It doesn't matter in a file path, but it does matter in a URL.
310    if path.endswith(os.sep) or (os.altsep and path.endswith(os.altsep)):
311      relative_path += '/'
312    return urlparse.urljoin(self.url, relative_path.replace(os.sep, '/'))
313
314
315class MemoryCacheDynamicHTTPRequestHandler(MemoryCacheHTTPRequestHandler):
316  """This class extends MemoryCacheHTTPRequestHandler by adding support for
317  dynamic responses. Inherite this class and register the sub-class to the
318  story set (through StorySet.SetRequestHandlerClass() or the constructor).
319  """
320
321  def ResponseFromHandler(self, path):
322    """Override this method to return dynamic response."""
323    del path  # Unused.
324    return None
325
326  def Response(self, path):
327    """Returns the dynamic response if exists, otherwise, use the resource
328    map.
329    """
330    response = self.ResponseFromHandler(path)
331    if response:
332      return response
333
334    if path not in self.server.resource_map:
335      return None
336
337    return self.server.resource_map[path]
338
339  def MakeResponse(self, content, content_type, zipped):
340    """Helper method to create a response object.
341    """
342    return {
343        'content-type': content_type,
344        'content-length': len(content),
345        'last-modified': None,
346        'response': content,
347        'zipped': zipped
348    }
349
350
351class MemoryCacheDynamicHTTPServer(MemoryCacheHTTPServer):
352  """This class extends MemoryCacheHTTPServer by adding support for returning
353  dynamic responses.
354  """
355
356  def __init__(self, paths, dynamic_request_handler_class):
357    # dynamic_request_handler_class must be a sub-class of
358    # MemoryCacheDynamicHTTPRequestHandler
359    assert issubclass(dynamic_request_handler_class,
360                      MemoryCacheDynamicHTTPRequestHandler)
361    super(MemoryCacheDynamicHTTPServer, self).__init__(paths)
362    self._dynamic_request_handler_class = dynamic_request_handler_class
363
364  @property
365  def dynamic_request_handler_class(self):
366    return self._dynamic_request_handler_class
367
368  def GetBackendStartupArgs(self):
369    args = super(MemoryCacheDynamicHTTPServer, self).GetBackendStartupArgs()
370    args['dynamic_request_handler_module_name'] = \
371        self._dynamic_request_handler_class.__module__
372    args['dynamic_request_handler_class_name'] = \
373        self._dynamic_request_handler_class.__name__
374    return args
375