1# Copyright 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import BaseHTTPServer 6from collections import namedtuple 7import errno 8import gzip 9import logging 10import mimetypes 11import os 12import SimpleHTTPServer 13import socket 14import SocketServer 15import StringIO 16import sys 17import traceback 18import urlparse 19 20from telemetry.core import local_server 21 22ByteRange = namedtuple('ByteRange', ['from_byte', 'to_byte']) 23ResourceAndRange = namedtuple('ResourceAndRange', ['resource', 'byte_range']) 24 25_MIME_TYPES_FILE = os.path.abspath( 26 os.path.join(os.path.dirname(__file__), 'mime.types')) 27 28 29class MemoryCacheHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): 30 31 protocol_version = 'HTTP/1.1' # override BaseHTTPServer setting 32 wbufsize = -1 # override StreamRequestHandler (a base class) setting 33 34 def handle(self): 35 try: 36 BaseHTTPServer.BaseHTTPRequestHandler.handle(self) 37 except socket.error as e: 38 # Connection reset errors happen all the time due to the browser closing 39 # without terminating the connection properly. They can be safely 40 # ignored. 41 if e[0] != errno.ECONNRESET: 42 raise 43 44 def do_GET(self): 45 """Serve a GET request.""" 46 resource_range = self.SendHead() 47 48 if not resource_range or not resource_range.resource: 49 return 50 response = resource_range.resource['response'] 51 52 if not resource_range.byte_range: 53 self.wfile.write(response) 54 return 55 56 start_index = resource_range.byte_range.from_byte 57 end_index = resource_range.byte_range.to_byte 58 self.wfile.write(response[start_index:end_index + 1]) 59 60 def do_HEAD(self): 61 """Serve a HEAD request.""" 62 self.SendHead() 63 64 def log_error(self, fmt, *args): 65 pass 66 67 def log_request(self, code='-', size='-'): 68 # Don't spam the console unless it is important. 69 pass 70 71 def Response(self, path): 72 """Get the response for the path.""" 73 if path not in self.server.resource_map: 74 return None 75 76 return self.server.resource_map[path] 77 78 def SendHead(self): 79 path = os.path.realpath(self.translate_path(self.path)) 80 resource = self.Response(path) 81 if not resource: 82 self.send_error(404, 'File not found') 83 return None 84 85 total_num_of_bytes = resource['content-length'] 86 byte_range = self.GetByteRange(total_num_of_bytes) 87 if byte_range: 88 # request specified a range, so set response code to 206. 89 self.send_response(206) 90 self.send_header('Content-Range', 'bytes %d-%d/%d' % 91 (byte_range.from_byte, byte_range.to_byte, 92 total_num_of_bytes)) 93 total_num_of_bytes = byte_range.to_byte - byte_range.from_byte + 1 94 else: 95 self.send_response(200) 96 97 self.send_header('Content-Length', str(total_num_of_bytes)) 98 self.send_header('Content-Type', resource['content-type']) 99 self.send_header('Last-Modified', 100 self.date_time_string(resource['last-modified'])) 101 if resource['zipped']: 102 self.send_header('Content-Encoding', 'gzip') 103 self.end_headers() 104 return ResourceAndRange(resource, byte_range) 105 106 def GetByteRange(self, total_num_of_bytes): 107 """Parse the header and get the range values specified. 108 109 Args: 110 total_num_of_bytes: Total # of bytes in requested resource, 111 used to calculate upper range limit. 112 Returns: 113 A ByteRange namedtuple object with the requested byte-range values. 114 If no Range is explicitly requested or there is a failure parsing, 115 return None. 116 If range specified is in the format "N-", return N-END. Refer to 117 http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html for details. 118 If upper range limit is greater than total # of bytes, return upper index. 119 """ 120 121 range_header = self.headers.getheader('Range') 122 if range_header is None: 123 return None 124 if not range_header.startswith('bytes='): 125 return None 126 127 # The range header is expected to be a string in this format: 128 # bytes=0-1 129 # Get the upper and lower limits of the specified byte-range. 130 # We've already confirmed that range_header starts with 'bytes='. 131 byte_range_values = range_header[len('bytes='):].split('-') 132 from_byte = 0 133 to_byte = 0 134 135 if len(byte_range_values) == 2: 136 # If to_range is not defined return all bytes starting from from_byte. 137 to_byte = (int(byte_range_values[1]) if byte_range_values[1] else 138 total_num_of_bytes - 1) 139 # If from_range is not defined return last 'to_byte' bytes. 140 from_byte = (int(byte_range_values[0]) if byte_range_values[0] else 141 total_num_of_bytes - to_byte) 142 else: 143 return None 144 145 # Do some validation. 146 if from_byte < 0: 147 return None 148 149 # Make to_byte the end byte by default in edge cases. 150 if to_byte < from_byte or to_byte >= total_num_of_bytes: 151 to_byte = total_num_of_bytes - 1 152 153 return ByteRange(from_byte, to_byte) 154 155 156class _MemoryCacheHTTPServerImpl(SocketServer.ThreadingMixIn, 157 BaseHTTPServer.HTTPServer): 158 # Increase the request queue size. The default value, 5, is set in 159 # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer). 160 # Since we're intercepting many domains through this single server, 161 # it is quite possible to get more than 5 concurrent requests. 162 request_queue_size = 128 163 164 # Don't prevent python from exiting when there is thread activity. 165 daemon_threads = True 166 167 def __init__(self, host_port, handler, paths): 168 BaseHTTPServer.HTTPServer.__init__(self, host_port, handler) 169 self.resource_map = {} 170 # Use Telemetry's 'mime.types' file instead of relying on system files to 171 # ensure the mime type inference is deterministic 172 # (also see crbug.com/894868). 173 assert os.path.isfile(_MIME_TYPES_FILE) 174 mimetypes.init([_MIME_TYPES_FILE]) 175 for path in paths: 176 if os.path.isdir(path): 177 self.AddDirectoryToResourceMap(path) 178 else: 179 self.AddFileToResourceMap(path) 180 181 def AddDirectoryToResourceMap(self, directory_path): 182 """Loads all files in directory_path into the in-memory resource map.""" 183 for root, dirs, files in os.walk(directory_path): 184 # Skip hidden files and folders (like .svn and .git). 185 files = [f for f in files if f[0] != '.'] 186 dirs[:] = [d for d in dirs if d[0] != '.'] 187 188 for f in files: 189 file_path = os.path.join(root, f) 190 if not os.path.exists(file_path): # Allow for '.#' files 191 continue 192 self.AddFileToResourceMap(file_path) 193 194 def AddFileToResourceMap(self, file_path): 195 """Loads file_path into the in-memory resource map.""" 196 file_path = os.path.realpath(file_path) 197 if file_path in self.resource_map: 198 return 199 200 with open(file_path, 'rb') as fd: 201 response = fd.read() 202 fs = os.fstat(fd.fileno()) 203 content_type = mimetypes.guess_type(file_path)[0] 204 zipped = False 205 if content_type in ['text/html', 'text/css', 'application/javascript']: 206 zipped = True 207 sio = StringIO.StringIO() 208 gzf = gzip.GzipFile(fileobj=sio, compresslevel=9, mode='wb') 209 gzf.write(response) 210 gzf.close() 211 response = sio.getvalue() 212 sio.close() 213 self.resource_map[file_path] = { 214 'content-type': content_type, 215 'content-length': len(response), 216 'last-modified': fs.st_mtime, 217 'response': response, 218 'zipped': zipped 219 } 220 221 index = 'index.html' 222 if os.path.basename(file_path) == index: 223 dir_path = os.path.dirname(file_path) 224 self.resource_map[dir_path] = self.resource_map[file_path] 225 226 def handle_error(self, request, client_address): 227 """Handle error in a thread-safe way 228 229 We override handle_error method of our base TCPServer class. It does the 230 same but uses thread-safe logging.error instead of print, because 231 SocketServer.ThreadingMixIn runs network operations on multiple threads and 232 there's a race condition on stdout. 233 """ 234 logging.error('Exception happened during processing of request from ' 235 '%s\n%s%s', client_address, traceback.format_exc(), '-'*80) 236 237 238class MemoryCacheHTTPServerBackend(local_server.LocalServerBackend): 239 240 def __init__(self): 241 super(MemoryCacheHTTPServerBackend, self).__init__() 242 self._httpd = None 243 244 def StartAndGetNamedPorts(self, args, handler_class=None): 245 if handler_class: 246 assert issubclass(handler_class, MemoryCacheHTTPRequestHandler) 247 248 base_dir = args['base_dir'] 249 os.chdir(base_dir) 250 251 paths = args['paths'] 252 for path in paths: 253 if not os.path.realpath(path).startswith(os.path.realpath(os.getcwd())): 254 print >> sys.stderr, '"%s" is not under the cwd.' % path 255 sys.exit(1) 256 257 server_address = (args['host'], args['port']) 258 MemoryCacheHTTPRequestHandler.protocol_version = 'HTTP/1.1' 259 self._httpd = _MemoryCacheHTTPServerImpl( 260 server_address, 261 handler_class if handler_class else MemoryCacheHTTPRequestHandler, 262 paths) 263 return [local_server.NamedPort('http', self._httpd.server_address[1])] 264 265 def ServeForever(self): 266 return self._httpd.serve_forever() 267 268 269class MemoryCacheHTTPServer(local_server.LocalServer): 270 271 def __init__(self, paths): 272 super(MemoryCacheHTTPServer, self).__init__(MemoryCacheHTTPServerBackend) 273 self._base_dir = None 274 275 for path in paths: 276 assert os.path.exists(path), '%s does not exist.' % path 277 278 paths = list(paths) 279 self._paths = paths 280 281 self._paths_as_set = set(map(os.path.realpath, paths)) 282 283 common_prefix = os.path.commonprefix(paths) 284 if os.path.isdir(common_prefix): 285 self._base_dir = common_prefix 286 else: 287 self._base_dir = os.path.dirname(common_prefix) 288 289 def GetBackendStartupArgs(self): 290 return {'base_dir': self._base_dir, 291 'paths': self._paths, 292 'host': self.host_ip, 293 'port': 0} 294 295 @property 296 def paths(self): 297 return self._paths_as_set 298 299 @property 300 def url(self): 301 return 'http://127.0.0.1:%s' % self.port 302 303 def UrlOf(self, path): 304 if os.path.isabs(path): 305 relative_path = os.path.relpath(path, self._base_dir) 306 else: 307 relative_path = path 308 # Preserve trailing slash or backslash. 309 # It doesn't matter in a file path, but it does matter in a URL. 310 if path.endswith(os.sep) or (os.altsep and path.endswith(os.altsep)): 311 relative_path += '/' 312 return urlparse.urljoin(self.url, relative_path.replace(os.sep, '/')) 313 314 315class MemoryCacheDynamicHTTPRequestHandler(MemoryCacheHTTPRequestHandler): 316 """This class extends MemoryCacheHTTPRequestHandler by adding support for 317 dynamic responses. Inherite this class and register the sub-class to the 318 story set (through StorySet.SetRequestHandlerClass() or the constructor). 319 """ 320 321 def ResponseFromHandler(self, path): 322 """Override this method to return dynamic response.""" 323 del path # Unused. 324 return None 325 326 def Response(self, path): 327 """Returns the dynamic response if exists, otherwise, use the resource 328 map. 329 """ 330 response = self.ResponseFromHandler(path) 331 if response: 332 return response 333 334 if path not in self.server.resource_map: 335 return None 336 337 return self.server.resource_map[path] 338 339 def MakeResponse(self, content, content_type, zipped): 340 """Helper method to create a response object. 341 """ 342 return { 343 'content-type': content_type, 344 'content-length': len(content), 345 'last-modified': None, 346 'response': content, 347 'zipped': zipped 348 } 349 350 351class MemoryCacheDynamicHTTPServer(MemoryCacheHTTPServer): 352 """This class extends MemoryCacheHTTPServer by adding support for returning 353 dynamic responses. 354 """ 355 356 def __init__(self, paths, dynamic_request_handler_class): 357 # dynamic_request_handler_class must be a sub-class of 358 # MemoryCacheDynamicHTTPRequestHandler 359 assert issubclass(dynamic_request_handler_class, 360 MemoryCacheDynamicHTTPRequestHandler) 361 super(MemoryCacheDynamicHTTPServer, self).__init__(paths) 362 self._dynamic_request_handler_class = dynamic_request_handler_class 363 364 @property 365 def dynamic_request_handler_class(self): 366 return self._dynamic_request_handler_class 367 368 def GetBackendStartupArgs(self): 369 args = super(MemoryCacheDynamicHTTPServer, self).GetBackendStartupArgs() 370 args['dynamic_request_handler_module_name'] = \ 371 self._dynamic_request_handler_class.__module__ 372 args['dynamic_request_handler_class_name'] = \ 373 self._dynamic_request_handler_class.__name__ 374 return args 375