1import struct 2import time 3import io 4 5import cherrypy 6from cherrypy._cpcompat import text_or_bytes 7from cherrypy.lib import file_generator 8from cherrypy.lib import is_closable_iterator 9from cherrypy.lib import set_vary_header 10 11 12_COMPRESSION_LEVEL_FAST = 1 13_COMPRESSION_LEVEL_BEST = 9 14 15 16def decode(encoding=None, default_encoding='utf-8'): 17 """Replace or extend the list of charsets used to decode a request entity. 18 19 Either argument may be a single string or a list of strings. 20 21 encoding 22 If not None, restricts the set of charsets attempted while decoding 23 a request entity to the given set (even if a different charset is 24 given in the Content-Type request header). 25 26 default_encoding 27 Only in effect if the 'encoding' argument is not given. 28 If given, the set of charsets attempted while decoding a request 29 entity is *extended* with the given value(s). 30 31 """ 32 body = cherrypy.request.body 33 if encoding is not None: 34 if not isinstance(encoding, list): 35 encoding = [encoding] 36 body.attempt_charsets = encoding 37 elif default_encoding: 38 if not isinstance(default_encoding, list): 39 default_encoding = [default_encoding] 40 body.attempt_charsets = body.attempt_charsets + default_encoding 41 42 43class UTF8StreamEncoder: 44 def __init__(self, iterator): 45 self._iterator = iterator 46 47 def __iter__(self): 48 return self 49 50 def next(self): 51 return self.__next__() 52 53 def __next__(self): 54 res = next(self._iterator) 55 if isinstance(res, str): 56 res = res.encode('utf-8') 57 return res 58 59 def close(self): 60 if is_closable_iterator(self._iterator): 61 self._iterator.close() 62 63 def __getattr__(self, attr): 64 if attr.startswith('__'): 65 raise AttributeError(self, attr) 66 return getattr(self._iterator, attr) 67 68 69class ResponseEncoder: 70 71 default_encoding = 'utf-8' 72 failmsg = 'Response body could not be encoded with %r.' 73 encoding = None 74 errors = 'strict' 75 text_only = True 76 add_charset = True 77 debug = False 78 79 def __init__(self, **kwargs): 80 for k, v in kwargs.items(): 81 setattr(self, k, v) 82 83 self.attempted_charsets = set() 84 request = cherrypy.serving.request 85 if request.handler is not None: 86 # Replace request.handler with self 87 if self.debug: 88 cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') 89 self.oldhandler = request.handler 90 request.handler = self 91 92 def encode_stream(self, encoding): 93 """Encode a streaming response body. 94 95 Use a generator wrapper, and just pray it works as the stream is 96 being written out. 97 """ 98 if encoding in self.attempted_charsets: 99 return False 100 self.attempted_charsets.add(encoding) 101 102 def encoder(body): 103 for chunk in body: 104 if isinstance(chunk, str): 105 chunk = chunk.encode(encoding, self.errors) 106 yield chunk 107 self.body = encoder(self.body) 108 return True 109 110 def encode_string(self, encoding): 111 """Encode a buffered response body.""" 112 if encoding in self.attempted_charsets: 113 return False 114 self.attempted_charsets.add(encoding) 115 body = [] 116 for chunk in self.body: 117 if isinstance(chunk, str): 118 try: 119 chunk = chunk.encode(encoding, self.errors) 120 except (LookupError, UnicodeError): 121 return False 122 body.append(chunk) 123 self.body = body 124 return True 125 126 def find_acceptable_charset(self): 127 request = cherrypy.serving.request 128 response = cherrypy.serving.response 129 130 if self.debug: 131 cherrypy.log('response.stream %r' % 132 response.stream, 'TOOLS.ENCODE') 133 if response.stream: 134 encoder = self.encode_stream 135 else: 136 encoder = self.encode_string 137 if 'Content-Length' in response.headers: 138 # Delete Content-Length header so finalize() recalcs it. 139 # Encoded strings may be of different lengths from their 140 # unicode equivalents, and even from each other. For example: 141 # >>> t = u"\u7007\u3040" 142 # >>> len(t) 143 # 2 144 # >>> len(t.encode("UTF-8")) 145 # 6 146 # >>> len(t.encode("utf7")) 147 # 8 148 del response.headers['Content-Length'] 149 150 # Parse the Accept-Charset request header, and try to provide one 151 # of the requested charsets (in order of user preference). 152 encs = request.headers.elements('Accept-Charset') 153 charsets = [enc.value.lower() for enc in encs] 154 if self.debug: 155 cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') 156 157 if self.encoding is not None: 158 # If specified, force this encoding to be used, or fail. 159 encoding = self.encoding.lower() 160 if self.debug: 161 cherrypy.log('Specified encoding %r' % 162 encoding, 'TOOLS.ENCODE') 163 if (not charsets) or '*' in charsets or encoding in charsets: 164 if self.debug: 165 cherrypy.log('Attempting encoding %r' % 166 encoding, 'TOOLS.ENCODE') 167 if encoder(encoding): 168 return encoding 169 else: 170 if not encs: 171 if self.debug: 172 cherrypy.log('Attempting default encoding %r' % 173 self.default_encoding, 'TOOLS.ENCODE') 174 # Any character-set is acceptable. 175 if encoder(self.default_encoding): 176 return self.default_encoding 177 else: 178 raise cherrypy.HTTPError(500, self.failmsg % 179 self.default_encoding) 180 else: 181 for element in encs: 182 if element.qvalue > 0: 183 if element.value == '*': 184 # Matches any charset. Try our default. 185 if self.debug: 186 cherrypy.log('Attempting default encoding due ' 187 'to %r' % element, 'TOOLS.ENCODE') 188 if encoder(self.default_encoding): 189 return self.default_encoding 190 else: 191 encoding = element.value 192 if self.debug: 193 cherrypy.log('Attempting encoding %s (qvalue >' 194 '0)' % element, 'TOOLS.ENCODE') 195 if encoder(encoding): 196 return encoding 197 198 if '*' not in charsets: 199 # If no "*" is present in an Accept-Charset field, then all 200 # character sets not explicitly mentioned get a quality 201 # value of 0, except for ISO-8859-1, which gets a quality 202 # value of 1 if not explicitly mentioned. 203 iso = 'iso-8859-1' 204 if iso not in charsets: 205 if self.debug: 206 cherrypy.log('Attempting ISO-8859-1 encoding', 207 'TOOLS.ENCODE') 208 if encoder(iso): 209 return iso 210 211 # No suitable encoding found. 212 ac = request.headers.get('Accept-Charset') 213 if ac is None: 214 msg = 'Your client did not send an Accept-Charset header.' 215 else: 216 msg = 'Your client sent this Accept-Charset header: %s.' % ac 217 _charsets = ', '.join(sorted(self.attempted_charsets)) 218 msg += ' We tried these charsets: %s.' % (_charsets,) 219 raise cherrypy.HTTPError(406, msg) 220 221 def __call__(self, *args, **kwargs): 222 response = cherrypy.serving.response 223 self.body = self.oldhandler(*args, **kwargs) 224 225 self.body = prepare_iter(self.body) 226 227 ct = response.headers.elements('Content-Type') 228 if self.debug: 229 cherrypy.log('Content-Type: %r' % [str(h) 230 for h in ct], 'TOOLS.ENCODE') 231 if ct and self.add_charset: 232 ct = ct[0] 233 if self.text_only: 234 if ct.value.lower().startswith('text/'): 235 if self.debug: 236 cherrypy.log( 237 'Content-Type %s starts with "text/"' % ct, 238 'TOOLS.ENCODE') 239 do_find = True 240 else: 241 if self.debug: 242 cherrypy.log('Not finding because Content-Type %s ' 243 'does not start with "text/"' % ct, 244 'TOOLS.ENCODE') 245 do_find = False 246 else: 247 if self.debug: 248 cherrypy.log('Finding because not text_only', 249 'TOOLS.ENCODE') 250 do_find = True 251 252 if do_find: 253 # Set "charset=..." param on response Content-Type header 254 ct.params['charset'] = self.find_acceptable_charset() 255 if self.debug: 256 cherrypy.log('Setting Content-Type %s' % ct, 257 'TOOLS.ENCODE') 258 response.headers['Content-Type'] = str(ct) 259 260 return self.body 261 262 263def prepare_iter(value): 264 """ 265 Ensure response body is iterable and resolves to False when empty. 266 """ 267 if isinstance(value, text_or_bytes): 268 # strings get wrapped in a list because iterating over a single 269 # item list is much faster than iterating over every character 270 # in a long string. 271 if value: 272 value = [value] 273 else: 274 # [''] doesn't evaluate to False, so replace it with []. 275 value = [] 276 # Don't use isinstance here; io.IOBase which has an ABC takes 277 # 1000 times as long as, say, isinstance(value, str) 278 elif hasattr(value, 'read'): 279 value = file_generator(value) 280 elif value is None: 281 value = [] 282 return value 283 284 285# GZIP 286 287 288def compress(body, compress_level): 289 """Compress 'body' at the given compress_level.""" 290 import zlib 291 292 # See https://tools.ietf.org/html/rfc1952 293 yield b'\x1f\x8b' # ID1 and ID2: gzip marker 294 yield b'\x08' # CM: compression method 295 yield b'\x00' # FLG: none set 296 # MTIME: 4 bytes 297 yield struct.pack('<L', int(time.time()) & int('FFFFFFFF', 16)) 298 299 # RFC 1952, section 2.3.1: 300 # 301 # XFL (eXtra FLags) 302 # These flags are available for use by specific compression 303 # methods. The "deflate" method (CM = 8) sets these flags as 304 # follows: 305 # 306 # XFL = 2 - compressor used maximum compression, 307 # slowest algorithm 308 # XFL = 4 - compressor used fastest algorithm 309 if compress_level == _COMPRESSION_LEVEL_BEST: 310 yield b'\x02' # XFL: max compression, slowest algo 311 elif compress_level == _COMPRESSION_LEVEL_FAST: 312 yield b'\x04' # XFL: min compression, fastest algo 313 else: 314 yield b'\x00' # XFL: compression unset/tradeoff 315 yield b'\xff' # OS: unknown 316 317 crc = zlib.crc32(b'') 318 size = 0 319 zobj = zlib.compressobj(compress_level, 320 zlib.DEFLATED, -zlib.MAX_WBITS, 321 zlib.DEF_MEM_LEVEL, 0) 322 for line in body: 323 size += len(line) 324 crc = zlib.crc32(line, crc) 325 yield zobj.compress(line) 326 yield zobj.flush() 327 328 # CRC32: 4 bytes 329 yield struct.pack('<L', crc & int('FFFFFFFF', 16)) 330 # ISIZE: 4 bytes 331 yield struct.pack('<L', size & int('FFFFFFFF', 16)) 332 333 334def decompress(body): 335 import gzip 336 337 zbuf = io.BytesIO() 338 zbuf.write(body) 339 zbuf.seek(0) 340 zfile = gzip.GzipFile(mode='rb', fileobj=zbuf) 341 data = zfile.read() 342 zfile.close() 343 return data 344 345 346def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], 347 debug=False): 348 """Try to gzip the response body if Content-Type in mime_types. 349 350 cherrypy.response.headers['Content-Type'] must be set to one of the 351 values in the mime_types arg before calling this function. 352 353 The provided list of mime-types must be of one of the following form: 354 * `type/subtype` 355 * `type/*` 356 * `type/*+subtype` 357 358 No compression is performed if any of the following hold: 359 * The client sends no Accept-Encoding request header 360 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header 361 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present 362 * The 'identity' value is given with a qvalue > 0. 363 364 """ 365 request = cherrypy.serving.request 366 response = cherrypy.serving.response 367 368 set_vary_header(response, 'Accept-Encoding') 369 370 if not response.body: 371 # Response body is empty (might be a 304 for instance) 372 if debug: 373 cherrypy.log('No response body', context='TOOLS.GZIP') 374 return 375 376 # If returning cached content (which should already have been gzipped), 377 # don't re-zip. 378 if getattr(request, 'cached', False): 379 if debug: 380 cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') 381 return 382 383 acceptable = request.headers.elements('Accept-Encoding') 384 if not acceptable: 385 # If no Accept-Encoding field is present in a request, 386 # the server MAY assume that the client will accept any 387 # content coding. In this case, if "identity" is one of 388 # the available content-codings, then the server SHOULD use 389 # the "identity" content-coding, unless it has additional 390 # information that a different content-coding is meaningful 391 # to the client. 392 if debug: 393 cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') 394 return 395 396 ct = response.headers.get('Content-Type', '').split(';')[0] 397 for coding in acceptable: 398 if coding.value == 'identity' and coding.qvalue != 0: 399 if debug: 400 cherrypy.log('Non-zero identity qvalue: %s' % coding, 401 context='TOOLS.GZIP') 402 return 403 if coding.value in ('gzip', 'x-gzip'): 404 if coding.qvalue == 0: 405 if debug: 406 cherrypy.log('Zero gzip qvalue: %s' % coding, 407 context='TOOLS.GZIP') 408 return 409 410 if ct not in mime_types: 411 # If the list of provided mime-types contains tokens 412 # such as 'text/*' or 'application/*+xml', 413 # we go through them and find the most appropriate one 414 # based on the given content-type. 415 # The pattern matching is only caring about the most 416 # common cases, as stated above, and doesn't support 417 # for extra parameters. 418 found = False 419 if '/' in ct: 420 ct_media_type, ct_sub_type = ct.split('/') 421 for mime_type in mime_types: 422 if '/' in mime_type: 423 media_type, sub_type = mime_type.split('/') 424 if ct_media_type == media_type: 425 if sub_type == '*': 426 found = True 427 break 428 elif '+' in sub_type and '+' in ct_sub_type: 429 ct_left, ct_right = ct_sub_type.split('+') 430 left, right = sub_type.split('+') 431 if left == '*' and ct_right == right: 432 found = True 433 break 434 435 if not found: 436 if debug: 437 cherrypy.log('Content-Type %s not in mime_types %r' % 438 (ct, mime_types), context='TOOLS.GZIP') 439 return 440 441 if debug: 442 cherrypy.log('Gzipping', context='TOOLS.GZIP') 443 # Return a generator that compresses the page 444 response.headers['Content-Encoding'] = 'gzip' 445 response.body = compress(response.body, compress_level) 446 if 'Content-Length' in response.headers: 447 # Delete Content-Length header so finalize() recalcs it. 448 del response.headers['Content-Length'] 449 450 return 451 452 if debug: 453 cherrypy.log('No acceptable encoding found.', context='GZIP') 454 cherrypy.HTTPError(406, 'identity, gzip').set_response() 455