1# Copyright (c) 2008-2011 testtools developers. See LICENSE for details. 2 3"""Compatibility support for python 2 and 3.""" 4 5__metaclass__ = type 6__all__ = [ 7 '_b', 8 '_u', 9 'advance_iterator', 10 'all', 11 'BytesIO', 12 'classtypes', 13 'isbaseexception', 14 'istext', 15 'str_is_unicode', 16 'StringIO', 17 'reraise', 18 'unicode_output_stream', 19 ] 20 21import codecs 22import io 23import linecache 24import locale 25import os 26import re 27import sys 28import traceback 29import unicodedata 30 31from extras import try_imports 32 33BytesIO = try_imports(['StringIO.StringIO', 'io.BytesIO']) 34StringIO = try_imports(['StringIO.StringIO', 'io.StringIO']) 35 36try: 37 from testtools import _compat2x as _compat 38except (SyntaxError, ImportError): 39 from testtools import _compat3x as _compat 40 41reraise = _compat.reraise 42 43 44__u_doc = """A function version of the 'u' prefix. 45 46This is needed becayse the u prefix is not usable in Python 3 but is required 47in Python 2 to get a unicode object. 48 49To migrate code that was written as u'\u1234' in Python 2 to 2+3 change 50it to be _u('\u1234'). The Python 3 interpreter will decode it 51appropriately and the no-op _u for Python 3 lets it through, in Python 522 we then call unicode-escape in the _u function. 53""" 54 55if sys.version_info > (3, 0): 56 import builtins 57 def _u(s): 58 return s 59 _r = ascii 60 def _b(s): 61 """A byte literal.""" 62 return s.encode("latin-1") 63 advance_iterator = next 64 # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code. 65 def istext(x): 66 return isinstance(x, str) 67 def classtypes(): 68 return (type,) 69 str_is_unicode = True 70else: 71 import __builtin__ as builtins 72 def _u(s): 73 # The double replace mangling going on prepares the string for 74 # unicode-escape - \foo is preserved, \u and \U are decoded. 75 return (s.replace("\\", "\\\\").replace("\\\\u", "\\u") 76 .replace("\\\\U", "\\U").decode("unicode-escape")) 77 _r = repr 78 def _b(s): 79 return s 80 advance_iterator = lambda it: it.next() 81 def istext(x): 82 return isinstance(x, basestring) 83 def classtypes(): 84 import types 85 return (type, types.ClassType) 86 str_is_unicode = sys.platform == "cli" 87 88_u.__doc__ = __u_doc 89 90 91if sys.version_info > (2, 5): 92 all = all 93 _error_repr = BaseException.__repr__ 94 def isbaseexception(exception): 95 """Return whether exception inherits from BaseException only""" 96 return (isinstance(exception, BaseException) 97 and not isinstance(exception, Exception)) 98else: 99 def all(iterable): 100 """If contents of iterable all evaluate as boolean True""" 101 for obj in iterable: 102 if not obj: 103 return False 104 return True 105 def _error_repr(exception): 106 """Format an exception instance as Python 2.5 and later do""" 107 return exception.__class__.__name__ + repr(exception.args) 108 def isbaseexception(exception): 109 """Return whether exception would inherit from BaseException only 110 111 This approximates the hierarchy in Python 2.5 and later, compare the 112 difference between the diagrams at the bottom of the pages: 113 <http://docs.python.org/release/2.4.4/lib/module-exceptions.html> 114 <http://docs.python.org/release/2.5.4/lib/module-exceptions.html> 115 """ 116 return isinstance(exception, (KeyboardInterrupt, SystemExit)) 117 118 119# GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces, 120# there should be better ways to write code needing this. 121if not issubclass(getattr(builtins, "bytes", str), str): 122 def _isbytes(x): 123 return isinstance(x, bytes) 124else: 125 # Never return True on Pythons that provide the name but not the real type 126 def _isbytes(x): 127 return False 128 129 130def _slow_escape(text): 131 """Escape unicode ``text`` leaving printable characters unmodified 132 133 The behaviour emulates the Python 3 implementation of repr, see 134 unicode_repr in unicodeobject.c and isprintable definition. 135 136 Because this iterates over the input a codepoint at a time, it's slow, and 137 does not handle astral characters correctly on Python builds with 16 bit 138 rather than 32 bit unicode type. 139 """ 140 output = [] 141 for c in text: 142 o = ord(c) 143 if o < 256: 144 if o < 32 or 126 < o < 161: 145 output.append(c.encode("unicode-escape")) 146 elif o == 92: 147 # Separate due to bug in unicode-escape codec in Python 2.4 148 output.append("\\\\") 149 else: 150 output.append(c) 151 else: 152 # To get correct behaviour would need to pair up surrogates here 153 if unicodedata.category(c)[0] in "CZ": 154 output.append(c.encode("unicode-escape")) 155 else: 156 output.append(c) 157 return "".join(output) 158 159 160def text_repr(text, multiline=None): 161 """Rich repr for ``text`` returning unicode, triple quoted if ``multiline``. 162 """ 163 is_py3k = sys.version_info > (3, 0) 164 nl = _isbytes(text) and bytes((0xA,)) or "\n" 165 if multiline is None: 166 multiline = nl in text 167 if not multiline and (is_py3k or not str_is_unicode and type(text) is str): 168 # Use normal repr for single line of unicode on Python 3 or bytes 169 return repr(text) 170 prefix = repr(text[:0])[:-2] 171 if multiline: 172 # To escape multiline strings, split and process each line in turn, 173 # making sure that quotes are not escaped. 174 if is_py3k: 175 offset = len(prefix) + 1 176 lines = [] 177 for l in text.split(nl): 178 r = repr(l) 179 q = r[-1] 180 lines.append(r[offset:-1].replace("\\" + q, q)) 181 elif not str_is_unicode and isinstance(text, str): 182 lines = [l.encode("string-escape").replace("\\'", "'") 183 for l in text.split("\n")] 184 else: 185 lines = [_slow_escape(l) for l in text.split("\n")] 186 # Combine the escaped lines and append two of the closing quotes, 187 # then iterate over the result to escape triple quotes correctly. 188 _semi_done = "\n".join(lines) + "''" 189 p = 0 190 while True: 191 p = _semi_done.find("'''", p) 192 if p == -1: 193 break 194 _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]]) 195 p += 2 196 return "".join([prefix, "'''\\\n", _semi_done, "'"]) 197 escaped_text = _slow_escape(text) 198 # Determine which quote character to use and if one gets prefixed with a 199 # backslash following the same logic Python uses for repr() on strings 200 quote = "'" 201 if "'" in text: 202 if '"' in text: 203 escaped_text = escaped_text.replace("'", "\\'") 204 else: 205 quote = '"' 206 return "".join([prefix, quote, escaped_text, quote]) 207 208 209def unicode_output_stream(stream): 210 """Get wrapper for given stream that writes any unicode without exception 211 212 Characters that can't be coerced to the encoding of the stream, or 'ascii' 213 if valid encoding is not found, will be replaced. The original stream may 214 be returned in situations where a wrapper is determined unneeded. 215 216 The wrapper only allows unicode to be written, not non-ascii bytestrings, 217 which is a good thing to ensure sanity and sanitation. 218 """ 219 if (sys.platform == "cli" or 220 isinstance(stream, (io.TextIOWrapper, io.StringIO))): 221 # Best to never encode before writing in IronPython, or if it is 222 # already a TextIO [which in the io library has no encoding 223 # attribute). 224 return stream 225 try: 226 writer = codecs.getwriter(stream.encoding or "") 227 except (AttributeError, LookupError): 228 return codecs.getwriter("ascii")(stream, "replace") 229 if writer.__module__.rsplit(".", 1)[1].startswith("utf"): 230 # The current stream has a unicode encoding so no error handler is needed 231 if sys.version_info > (3, 0): 232 return stream 233 return writer(stream) 234 if sys.version_info > (3, 0): 235 # Python 3 doesn't seem to make this easy, handle a common case 236 try: 237 return stream.__class__(stream.buffer, stream.encoding, "replace", 238 stream.newlines, stream.line_buffering) 239 except AttributeError: 240 pass 241 return writer(stream, "replace") 242 243 244# The default source encoding is actually "iso-8859-1" until Python 2.5 but 245# using non-ascii causes a deprecation warning in 2.4 and it's cleaner to 246# treat all versions the same way 247_default_source_encoding = "ascii" 248 249# Pattern specified in <http://www.python.org/dev/peps/pep-0263/> 250_cookie_search=re.compile("coding[:=]\s*([-\w.]+)").search 251 252def _detect_encoding(lines): 253 """Get the encoding of a Python source file from a list of lines as bytes 254 255 This function does less than tokenize.detect_encoding added in Python 3 as 256 it does not attempt to raise a SyntaxError when the interpreter would, it 257 just wants the encoding of a source file Python has already compiled and 258 determined is valid. 259 """ 260 if not lines: 261 return _default_source_encoding 262 if lines[0].startswith("\xef\xbb\xbf"): 263 # Source starting with UTF-8 BOM is either UTF-8 or a SyntaxError 264 return "utf-8" 265 # Only the first two lines of the source file are examined 266 magic = _cookie_search("".join(lines[:2])) 267 if magic is None: 268 return _default_source_encoding 269 encoding = magic.group(1) 270 try: 271 codecs.lookup(encoding) 272 except LookupError: 273 # Some codecs raise something other than LookupError if they don't 274 # support the given error handler, but not the text ones that could 275 # actually be used for Python source code 276 return _default_source_encoding 277 return encoding 278 279 280class _EncodingTuple(tuple): 281 """A tuple type that can have an encoding attribute smuggled on""" 282 283 284def _get_source_encoding(filename): 285 """Detect, cache and return the encoding of Python source at filename""" 286 try: 287 return linecache.cache[filename].encoding 288 except (AttributeError, KeyError): 289 encoding = _detect_encoding(linecache.getlines(filename)) 290 if filename in linecache.cache: 291 newtuple = _EncodingTuple(linecache.cache[filename]) 292 newtuple.encoding = encoding 293 linecache.cache[filename] = newtuple 294 return encoding 295 296 297def _get_exception_encoding(): 298 """Return the encoding we expect messages from the OS to be encoded in""" 299 if os.name == "nt": 300 # GZ 2010-05-24: Really want the codepage number instead, the error 301 # handling of standard codecs is more deterministic 302 return "mbcs" 303 # GZ 2010-05-23: We need this call to be after initialisation, but there's 304 # no benefit in asking more than once as it's a global 305 # setting that can change after the message is formatted. 306 return locale.getlocale(locale.LC_MESSAGES)[1] or "ascii" 307 308 309def _exception_to_text(evalue): 310 """Try hard to get a sensible text value out of an exception instance""" 311 try: 312 return unicode(evalue) 313 except KeyboardInterrupt: 314 raise 315 except: 316 # Apparently this is what traceback._some_str does. Sigh - RBC 20100623 317 pass 318 try: 319 return str(evalue).decode(_get_exception_encoding(), "replace") 320 except KeyboardInterrupt: 321 raise 322 except: 323 # Apparently this is what traceback._some_str does. Sigh - RBC 20100623 324 pass 325 # Okay, out of ideas, let higher level handle it 326 return None 327 328 329def _format_stack_list(stack_lines): 330 """Format 'stack_lines' and return a list of unicode strings. 331 332 :param stack_lines: A list of filename, lineno, name, and line variables, 333 probably obtained by calling traceback.extract_tb or 334 traceback.extract_stack. 335 """ 336 fs_enc = sys.getfilesystemencoding() 337 extracted_list = [] 338 for filename, lineno, name, line in stack_lines: 339 extracted_list.append(( 340 filename.decode(fs_enc, "replace"), 341 lineno, 342 name.decode("ascii", "replace"), 343 line and line.decode( 344 _get_source_encoding(filename), "replace"))) 345 return traceback.format_list(extracted_list) 346 347 348def _format_exception_only(eclass, evalue): 349 """Format the excption part of a traceback. 350 351 :param eclass: The type of the exception being formatted. 352 :param evalue: The exception instance. 353 :returns: A list of unicode strings. 354 """ 355 list = [] 356 if evalue is None: 357 # Is a (deprecated) string exception 358 list.append((eclass + "\n").decode("ascii", "replace")) 359 return list 360 if isinstance(evalue, SyntaxError): 361 # Avoid duplicating the special formatting for SyntaxError here, 362 # instead create a new instance with unicode filename and line 363 # Potentially gives duff spacing, but that's a pre-existing issue 364 try: 365 msg, (filename, lineno, offset, line) = evalue 366 except (TypeError, ValueError): 367 pass # Strange exception instance, fall through to generic code 368 else: 369 # Errors during parsing give the line from buffer encoded as 370 # latin-1 or utf-8 or the encoding of the file depending on the 371 # coding and whether the patch for issue #1031213 is applied, so 372 # give up on trying to decode it and just read the file again 373 if line: 374 bytestr = linecache.getline(filename, lineno) 375 if bytestr: 376 if lineno == 1 and bytestr.startswith("\xef\xbb\xbf"): 377 bytestr = bytestr[3:] 378 line = bytestr.decode( 379 _get_source_encoding(filename), "replace") 380 del linecache.cache[filename] 381 else: 382 line = line.decode("ascii", "replace") 383 if filename: 384 fs_enc = sys.getfilesystemencoding() 385 filename = filename.decode(fs_enc, "replace") 386 evalue = eclass(msg, (filename, lineno, offset, line)) 387 list.extend(traceback.format_exception_only(eclass, evalue)) 388 return list 389 sclass = eclass.__name__ 390 svalue = _exception_to_text(evalue) 391 if svalue: 392 list.append("%s: %s\n" % (sclass, svalue)) 393 elif svalue is None: 394 # GZ 2010-05-24: Not a great fallback message, but keep for the moment 395 list.append(_u("%s: <unprintable %s object>\n" % (sclass, sclass))) 396 else: 397 list.append(_u("%s\n" % sclass)) 398 return list 399 400 401_TB_HEADER = _u('Traceback (most recent call last):\n') 402 403 404def _format_exc_info(eclass, evalue, tb, limit=None): 405 """Format a stack trace and the exception information as unicode 406 407 Compatibility function for Python 2 which ensures each component of a 408 traceback is correctly decoded according to its origins. 409 410 Based on traceback.format_exception and related functions. 411 """ 412 return [_TB_HEADER] \ 413 + _format_stack_list(traceback.extract_tb(tb, limit)) \ 414 + _format_exception_only(eclass, evalue) 415 416