1# -*- coding: utf-8 -*- 2# 3# Copyright (C) 2003-2021 Edgewall Software 4# Copyright (C) 2003-2004 Jonas Borgström <jonas@edgewall.com> 5# Copyright (C) 2006 Matthew Good <trac@matt-good.net> 6# Copyright (C) 2005-2006 Christian Boos <cboos@edgewall.org> 7# All rights reserved. 8# 9# This software is licensed as described in the file COPYING, which 10# you should have received as part of this distribution. The terms 11# are also available at https://trac.edgewall.org/wiki/TracLicense. 12# 13# This software consists of voluntary contributions made by many 14# individuals. For the exact contribution history, see the revision 15# history and logs, available at https://trac.edgewall.org/log/. 16# 17# Author: Jonas Borgström <jonas@edgewall.com> 18# Matthew Good <trac@matt-good.net> 19# Christian Boos <cboos@edgewall.org> 20 21import base64 22import configparser 23import locale 24import os 25import pkg_resources 26import re 27import sys 28import textwrap 29from urllib.parse import quote, quote_plus, unquote 30from unicodedata import east_asian_width 31 32import jinja2 33 34CRLF = '\r\n' 35 36class Empty(str): 37 """A special tag object evaluating to the empty string""" 38 __slots__ = [] 39 40empty = Empty() 41 42del Empty # shouldn't be used outside of Trac core 43 44 45# -- Jinja2 46 47_jinja2_ver = pkg_resources.parse_version(jinja2.__version__) 48_jinja2_exts = ['jinja2.ext.do', 'jinja2.ext.i18n'] 49if _jinja2_ver < pkg_resources.parse_version('3'): 50 _jinja2_exts.append('jinja2.ext.with_') 51 52def jinja2env(**kwargs): 53 """Creates a Jinja2 ``Environment`` configured with Trac conventions. 54 55 All default parameters can optionally be overriden. The ``loader`` 56 parameter is not set by default, so unless it is set by the 57 caller, only inline templates can be created from the environment. 58 59 :rtype: `jinja.Environment` 60 61 """ 62 exts = ('html', 'rss', 'xml') 63 def filterout_none(v): 64 return '' if v is None else v 65 def autoescape_extensions(template): 66 return template and template.rsplit('.', 1)[1] in exts 67 defaults = dict( 68 variable_start_string='${', 69 variable_end_string='}', 70 line_statement_prefix='#', 71 line_comment_prefix='##', 72 trim_blocks=True, 73 lstrip_blocks=True, 74 extensions=list(_jinja2_exts), 75 finalize=filterout_none, 76 autoescape=autoescape_extensions, 77 ) 78 defaults.update(kwargs) 79 jenv = jinja2.Environment(**defaults) 80 jenv.globals.update( 81 len=len, 82 ) 83 return jenv 84 85def jinja2template(template, text=False, **kwargs): 86 """Creates a Jinja2 ``Template`` from inlined source. 87 88 :param template: the template content 89 :param text: if set to `False`, the result of the variable 90 expansion will be XML/HTML escaped 91 :param kwargs: additional arguments to pass to `jinja2env`. See 92 `jinja2.Environment` for supported arguments. 93 """ 94 return jinja2env(autoescape=not text, **kwargs).from_string(template) 95 96 97# -- Unicode 98 99def to_unicode(text, charset=None): 100 """Convert input to a `str` object. 101 102 For a `bytes` object, we'll first try to decode the bytes using the given 103 `charset` encoding (or UTF-8 if none is specified), then we fall back to 104 the latin1 encoding which might be correct or not, but at least preserves 105 the original byte sequence by mapping each byte to the corresponding 106 unicode code point in the range U+0000 to U+00FF. 107 108 For anything else, a simple `str()` conversion is attempted, 109 with special care taken with `Exception` objects. 110 """ 111 if isinstance(text, bytes): 112 try: 113 return str(text, charset or 'utf-8') 114 except UnicodeDecodeError: 115 return str(text, 'latin1') 116 if isinstance(text, Exception): 117 # two possibilities for storing unicode strings in exception data: 118 try: 119 # custom __str__ method on the exception (e.g. PermissionError) 120 result = str(text) 121 except UnicodeError: 122 # unicode arguments given to the exception (e.g. parse_date) 123 return ' '.join(to_unicode(arg) for arg in text.args) 124 if os.name == 'nt': 125 # remove duplicated backslashes from filename in the message 126 if isinstance(text, EnvironmentError) and text.filename: 127 source = repr(text.filename) 128 elif isinstance(text, configparser.ParsingError) and text.source: 129 source = repr(text.source) 130 else: 131 source = None 132 if source: 133 result = result.replace(source, source.replace(r'\\', '\\')) 134 return result 135 return str(text) 136 137 138def exception_to_unicode(e, traceback=False): 139 """Convert an `Exception` to a `str` object. 140 141 In addition to `to_unicode`, this representation of the exception 142 also contains the class name and optionally the traceback. 143 """ 144 message = '%s: %s' % (e.__class__.__name__, to_unicode(e)) 145 if traceback: 146 from trac.util import get_last_traceback 147 traceback_only = get_last_traceback().split('\n')[:-2] 148 message = '\n%s\n%s' % (to_unicode('\n'.join(traceback_only)), message) 149 return message 150 151 152def path_to_unicode(path): 153 """Convert a filesystem path to str, using the filesystem encoding.""" 154 if isinstance(path, bytes): 155 try: 156 return str(path, sys.getfilesystemencoding()) 157 except UnicodeDecodeError: 158 return str(path, 'latin1') 159 return str(path) 160 161 162_ws_leading_re = re.compile('\\A[\\s\u200b]+', re.UNICODE) 163_ws_trailing_re = re.compile('[\\s\u200b]+\\Z', re.UNICODE) 164 165def stripws(text, leading=True, trailing=True): 166 """Strips unicode white-spaces and ZWSPs from ``text``. 167 168 :param leading: strips leading spaces from ``text`` unless ``leading`` is 169 `False`. 170 :param trailing: strips trailing spaces from ``text`` unless ``trailing`` 171 is `False`. 172 """ 173 if leading: 174 text = _ws_leading_re.sub('', text) 175 if trailing: 176 text = _ws_trailing_re.sub('', text) 177 return text 178 179 180def strip_line_ws(text, leading=True, trailing=True): 181 """Strips unicode white-spaces and ZWSPs from each line of ``text``. 182 183 :param leading: strips leading spaces from ``text`` unless ``leading`` is 184 `False`. 185 :param trailing: strips trailing spaces from ``text`` unless ``trailing`` 186 is `False`. 187 """ 188 lines = re.compile(r'(\n|\r\n|\r)').split(text) 189 if leading: 190 lines[::2] = (_ws_leading_re.sub('', line) for line in lines[::2]) 191 if trailing: 192 lines[::2] = (_ws_trailing_re.sub('', line) for line in lines[::2]) 193 return ''.join(lines) 194 195 196_js_quote = {'\\': '\\\\', '"': '\\"', '\b': '\\b', '\f': '\\f', 197 '\n': '\\n', '\r': '\\r', '\t': '\\t', "'": "\\'"} 198for i in list(range(0x20)) + [ord(c) for c in '&<>\u2028\u2029']: 199 _js_quote.setdefault(chr(i), '\\u%04x' % i) 200_js_quote_re = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t\'&<>' + '\u2028\u2029]') 201_js_string_re = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t&<>' + '\u2028\u2029]') 202 203 204def javascript_quote(text): 205 """Quote strings for inclusion in single or double quote delimited 206 Javascript strings 207 """ 208 if not text: 209 return '' 210 def replace(match): 211 return _js_quote[match.group(0)] 212 return _js_quote_re.sub(replace, text) 213 214 215def to_js_string(text): 216 """Embed the given string in a double quote delimited Javascript string 217 (conform to the JSON spec) 218 """ 219 if not text: 220 return '""' 221 def replace(match): 222 return _js_quote[match.group(0)] 223 return '"%s"' % _js_string_re.sub(replace, text) 224 225 226def unicode_quote(value, safe='/'): 227 """A unicode aware version of `urllib.quote` 228 229 :param value: anything that converts to a `bytes`. If `str` 230 input is given, it will be UTF-8 encoded. 231 :param safe: as in `quote`, the characters that would otherwise be 232 quoted but shouldn't here (defaults to '/') 233 """ 234 return quote(value if isinstance(value, bytes) else str(value), safe) 235 236 237def unicode_quote_plus(value, safe=''): 238 """A unicode aware version of `urllib.quote_plus`. 239 240 :param value: anything that converts to a `bytes`. If `str` 241 input is given, it will be UTF-8 encoded. 242 :param safe: as in `quote_plus`, the characters that would 243 otherwise be quoted but shouldn't here (defaults to 244 '/') 245 """ 246 return quote_plus(value if isinstance(value, bytes) else str(value), safe) 247 248 249def unicode_unquote(value): 250 """A unicode aware version of `urllib.unquote`. 251 252 :param value: UTF-8 encoded `str` value (for example, as obtained by 253 `unicode_quote`). 254 :rtype: `str` 255 """ 256 if isinstance(value, bytes): 257 value = value.decode('latin1') 258 return unquote(value, encoding='utf-8', errors='strict') 259 260 261def unicode_urlencode(params, safe=''): 262 """A unicode aware version of `urllib.urlencode`. 263 264 Values set to `empty` are converted to the key alone, without the 265 equal sign. 266 """ 267 if isinstance(params, dict): 268 params = sorted(params.items(), key=lambda i: i[0]) 269 l = [] 270 for k, v in params: 271 if v is empty: 272 l.append(unicode_quote_plus(k, safe)) 273 else: 274 l.append(unicode_quote_plus(k, safe) + '=' + 275 unicode_quote_plus(v, safe)) 276 return '&'.join(l) 277 278 279_qs_quote_safe = ''.join(chr(c) for c in range(0x21, 0x7f)) 280 281def quote_query_string(text): 282 """Quote strings for query string 283 """ 284 return unicode_quote_plus(text, _qs_quote_safe) 285 286 287def to_utf8(text, charset='latin1'): 288 """Convert input to a UTF-8 `bytes` object. 289 290 If the input is not an `str` object, we assume the encoding is 291 already UTF-8, ISO Latin-1, or as specified by the optional 292 *charset* parameter. 293 """ 294 if isinstance(text, bytes): 295 try: 296 u = str(text, 'utf-8') 297 except UnicodeError: 298 try: 299 # Use the user supplied charset if possible 300 u = str(text, charset) 301 except UnicodeError: 302 # This should always work 303 u = str(text, 'latin1') 304 else: 305 # Do nothing if it's already utf-8 306 return text 307 else: 308 u = to_unicode(text) 309 return u.encode('utf-8') 310 311 312class unicode_passwd(str): 313 """Conceal the actual content of the string when `repr` is called.""" 314 def __repr__(self): 315 return '*******' 316 317 318def stream_encoding(stream): 319 """Return the appropriate encoding for the given stream.""" 320 encoding = getattr(stream, 'encoding', None) 321 # Windows returns 'cp0' to indicate no encoding 322 return encoding if encoding not in (None, 'cp0') else 'utf-8' 323 324 325def console_print(out, *args, **kwargs): 326 """Output the given arguments to the console, encoding the output 327 as appropriate. 328 329 :param kwargs: ``newline`` controls whether a newline will be appended 330 (defaults to `True`) 331 """ 332 out.write(' '.join(to_unicode(a) for a in args)) 333 if kwargs.get('newline', True): 334 out.write('\n') 335 336 337def printout(*args, **kwargs): 338 """Do a `console_print` on `sys.stdout`.""" 339 console_print(sys.stdout, *args, **kwargs) 340 341 342def printerr(*args, **kwargs): 343 """Do a `console_print` on `sys.stderr`.""" 344 console_print(sys.stderr, *args, **kwargs) 345 346 347def printfout(message, *args, **kwargs): 348 """Format `message`, do a `console.print` on `sys.stdout` and flush 349 the buffer. 350 """ 351 if args: 352 message %= args 353 printout(message, **kwargs) 354 sys.stdout.flush() 355 356 357def printferr(message, *args, **kwargs): 358 """Format `message`, do a `console.print` on `sys.stderr` and flush 359 the buffer. 360 """ 361 if args: 362 message %= args 363 printerr(message, **kwargs) 364 sys.stderr.flush() 365 366 367def raw_input(prompt): 368 """Input one line from the console and converts it to unicode as 369 appropriate. 370 """ 371 printout(prompt, newline=False) 372 return to_unicode(input(), sys.stdin.encoding) 373 374 375_preferredencoding = locale.getpreferredencoding() 376 377def getpreferredencoding(): 378 """Return the encoding, which is retrieved on ahead, according to user 379 preference. 380 381 We should use this instead of `locale.getpreferredencoding()` which 382 is not thread-safe.""" 383 return _preferredencoding 384 385 386# -- Plain text formatting 387 388def text_width(text, ambiwidth=1): 389 """Determine the column width of `text` in Unicode characters. 390 391 The characters in the East Asian Fullwidth (F) or East Asian Wide (W) 392 have a column width of 2. The other characters in the East Asian 393 Halfwidth (H) or East Asian Narrow (Na) have a column width of 1. 394 395 That `ambiwidth` parameter is used for the column width of the East 396 Asian Ambiguous (A). If `1`, the same width as characters in US-ASCII. 397 This is expected by most users. If `2`, twice the width of US-ASCII 398 characters. This is expected by CJK users. 399 400 cf. http://www.unicode.org/reports/tr11/. 401 """ 402 twice = 'FWA' if ambiwidth == 2 else 'FW' 403 return sum([2 if east_asian_width(chr) in twice else 1 404 for chr in to_unicode(text)]) 405 406 407def _get_default_ambiwidth(): 408 """Return width of East Asian Ambiguous based on locale environment 409 variables or Windows codepage. 410 """ 411 412 if os.name == 'nt': 413 import ctypes 414 codepage = ctypes.windll.kernel32.GetConsoleOutputCP() 415 if codepage in (932, # Japanese (Shift-JIS) 416 936, # Chinese Simplified (GB2312) 417 949, # Korean (Unified Hangul Code) 418 950): # Chinese Traditional (Big5) 419 return 2 420 else: 421 for name in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): 422 value = os.environ.get(name) or '' 423 if value: 424 if name == 'LANGUAGE' and ':' in value: 425 value = value.split(':')[0] 426 return 2 if value.lower().startswith(('zh', 'ja', 'ko')) else 1 427 428 return 1 429 430 431_default_ambiwidth = _get_default_ambiwidth() 432 433 434def print_table(data, headers=None, sep=' ', out=None, ambiwidth=None): 435 """Print data according to a tabular layout. 436 437 :param data: a sequence of rows; assume all rows are of equal length. 438 :param headers: an optional row containing column headers; must be of 439 the same length as each row in `data`. 440 :param sep: column separator 441 :param out: output file descriptor (`None` means use `sys.stdout`) 442 :param ambiwidth: column width of the East Asian Ambiguous (A). If None, 443 detect ambiwidth with the locale settings. If others, 444 pass to the `ambiwidth` parameter of `text_width`. 445 """ 446 if out is None: 447 out = sys.stdout 448 if ambiwidth is None: 449 ambiwidth = _default_ambiwidth 450 data = list(data) 451 if headers: 452 data.insert(0, headers) 453 elif not data: 454 return 455 456 # Convert to a str object with `to_unicode`. If None, convert to a 457 # empty string. 458 def to_text(val): 459 if val is None: 460 return '' 461 return to_unicode(val) 462 463 def tw(text): 464 return text_width(text, ambiwidth=ambiwidth) 465 466 def to_lines(data): 467 lines = [] 468 for row in data: 469 row = [to_text(cell) for cell in row] 470 if any('\n' in cell for cell in row): 471 row = [cell.splitlines() for cell in row] 472 max_lines = max(len(cell) for cell in row) 473 for cell in row: 474 if len(cell) < max_lines: 475 cell += [''] * (max_lines - len(cell)) 476 lines.extend([cell[idx] for cell in row] 477 for idx in range(max_lines)) 478 else: 479 lines.append(row) 480 return lines 481 482 data = to_lines(data) 483 484 num_cols = len(data[0]) 485 col_width = [max(tw(row[idx]) for row in data) 486 for idx in range(num_cols)] 487 488 out.write('\n') 489 for ridx, row in enumerate(data): 490 for cidx, cell in enumerate(row): 491 if cidx + 1 == num_cols: 492 line = cell # No separator after last column 493 else: 494 if headers and ridx == 0: 495 sp = ' ' * tw(sep) # No separator in header 496 else: 497 sp = sep 498 line = '%-*s%s' % (col_width[cidx] - tw(cell) + len(cell), 499 cell, sp) 500 out.write(line) 501 502 out.write('\n') 503 if ridx == 0 and headers: 504 out.write('-' * (tw(sep) * cidx + sum(col_width))) 505 out.write('\n') 506 out.write('\n') 507 508 509def shorten_line(text, maxlen=75): 510 """Truncates `text` to length less than or equal to `maxlen` characters. 511 512 This tries to be (a bit) clever and attempts to find a proper word 513 boundary for doing so. 514 """ 515 if len(text or '') <= maxlen: 516 return text 517 suffix = ' ...' 518 maxtextlen = maxlen - len(suffix) 519 cut = max(text.rfind(' ', 0, maxtextlen), text.rfind('\n', 0, maxtextlen)) 520 if cut < 0: 521 cut = maxtextlen 522 return text[:cut] + suffix 523 524 525class UnicodeTextWrapper(textwrap.TextWrapper): 526 breakable_char_ranges = [ 527 (0x1100, 0x11FF), # Hangul Jamo 528 (0x2E80, 0x2EFF), # CJK Radicals Supplement 529 (0x3000, 0x303F), # CJK Symbols and Punctuation 530 (0x3040, 0x309F), # Hiragana 531 (0x30A0, 0x30FF), # Katakana 532 (0x3130, 0x318F), # Hangul Compatibility Jamo 533 (0x3190, 0x319F), # Kanbun 534 (0x31C0, 0x31EF), # CJK Strokes 535 (0x3200, 0x32FF), # Enclosed CJK Letters and Months 536 (0x3300, 0x33FF), # CJK Compatibility 537 (0x3400, 0x4DBF), # CJK Unified Ideographs Extension A 538 (0x4E00, 0x9FFF), # CJK Unified Ideographs 539 (0xA960, 0xA97F), # Hangul Jamo Extended-A 540 (0xAC00, 0xD7AF), # Hangul Syllables 541 (0xD7B0, 0xD7FF), # Hangul Jamo Extended-B 542 (0xF900, 0xFAFF), # CJK Compatibility Ideographs 543 (0xFE30, 0xFE4F), # CJK Compatibility Forms 544 (0xFF00, 0xFFEF), # Halfwidth and Fullwidth Forms 545 (0x20000, 0x2FFFF, '[\uD840-\uD87F][\uDC00-\uDFFF]'), # Plane 2 546 (0x30000, 0x3FFFF, '[\uD880-\uD8BF][\uDC00-\uDFFF]'), # Plane 3 547 ] 548 549 split_re = None 550 breakable_re = None 551 552 @classmethod 553 def _init_patterns(cls): 554 char_ranges = [] 555 for val in cls.breakable_char_ranges: 556 high = chr(val[0]) 557 low = chr(val[1]) 558 char_ranges.append('%s-%s' % (high, low)) 559 char_ranges = ''.join(char_ranges) 560 pattern = '[%s]+' % char_ranges 561 562 cls.split_re = re.compile( 563 r'(\s+|' + # any whitespace 564 pattern + '|' + # breakable text 565 r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' + # hyphenated words 566 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash 567 re.UNICODE) 568 cls.breakable_re = re.compile(r'\A' + pattern, re.UNICODE) 569 570 def __init__(self, cols, replace_whitespace=0, break_long_words=0, 571 initial_indent='', subsequent_indent='', ambiwidth=1): 572 textwrap.TextWrapper.__init__( 573 self, cols, replace_whitespace=0, break_long_words=0, 574 initial_indent=initial_indent, 575 subsequent_indent=subsequent_indent) 576 self.ambiwidth = ambiwidth 577 if self.split_re is None: 578 self._init_patterns() 579 580 def _split(self, text): 581 chunks = self.split_re.split(to_unicode(text)) 582 return list(filter(None, chunks)) 583 584 def _text_width(self, text): 585 return text_width(text, ambiwidth=self.ambiwidth) 586 587 def _wrap_chunks(self, chunks): 588 lines = [] 589 chunks.reverse() 590 text_width = self._text_width 591 592 while chunks: 593 cur_line = [] 594 cur_width = 0 595 596 if lines: 597 indent = self.subsequent_indent 598 else: 599 indent = self.initial_indent 600 width = self.width - text_width(indent) 601 602 if chunks[-1].strip() == '' and lines: 603 del chunks[-1] 604 605 while chunks: 606 chunk = chunks[-1] 607 w = text_width(chunk) 608 if cur_width + w <= width: 609 cur_line.append(chunks.pop()) 610 cur_width += w 611 elif self.breakable_re.match(chunk): 612 left_space = width - cur_width 613 for i in range(len(chunk)): 614 w = text_width(chunk[i]) 615 if left_space < w: 616 break 617 left_space -= w 618 if i > 0: 619 cur_line.append(chunk[:i]) 620 chunk = chunk[i:] 621 chunks[-1] = chunk 622 w = text_width(chunk) 623 break 624 else: 625 break 626 627 if chunks and w > width: 628 self._handle_long_word(chunks, cur_line, cur_width, width) 629 630 if cur_line and cur_line[-1].strip() == '': 631 del cur_line[-1] 632 633 if cur_line: 634 lines.append(indent + ''.join(cur_line)) 635 636 return lines 637 638 639def wrap(t, cols=75, initial_indent='', subsequent_indent='', 640 linesep=os.linesep, ambiwidth=1): 641 """Wraps the single paragraph in `t`, which contains unicode characters. 642 The every line is at most `cols` characters long. 643 644 That `ambiwidth` parameter is used for the column width of the East 645 Asian Ambiguous (A). If `1`, the same width as characters in US-ASCII. 646 This is expected by most users. If `2`, twice the width of US-ASCII 647 characters. This is expected by CJK users. 648 """ 649 t = t.strip().replace('\r\n', '\n').replace('\r', '\n') 650 wrapper = UnicodeTextWrapper(cols, replace_whitespace=0, 651 break_long_words=0, 652 initial_indent=initial_indent, 653 subsequent_indent=subsequent_indent, 654 ambiwidth=ambiwidth) 655 wrappedLines = [] 656 for line in t.split('\n'): 657 wrappedLines += wrapper.wrap(line.rstrip()) or [''] 658 return linesep.join(wrappedLines) 659 660 661_obfuscation_char = '@\u2026' 662 663def obfuscate_email_address(address): 664 """Replace anything looking like an e-mail address (``'@something'``) 665 with a trailing ellipsis (``'@…'``) 666 """ 667 if address: 668 at = address.find('@') 669 if at != -1: 670 return address[:at] + _obfuscation_char + \ 671 ('>' if address[-1] == '>' else '') 672 return address 673 674 675def is_obfuscated(word): 676 """Returns `True` if the `word` looks like an obfuscated e-mail 677 address. 678 679 :since: 1.2 680 """ 681 return _obfuscation_char in word 682 683 684def breakable_path(path): 685 """Make a path breakable after path separators, and conversely, avoid 686 breaking at spaces. 687 """ 688 if not path: 689 return path 690 prefix = '' 691 if path.startswith('/'): # Avoid breaking after a leading / 692 prefix = '/' 693 path = path[1:] 694 return prefix + path.replace('/', '/\u200b').replace('\\', '\\\u200b') \ 695 .replace(' ', '\u00a0') 696 697 698def normalize_whitespace(text, to_space='\u00a0', remove='\u200b'): 699 """Normalize whitespace in a string, by replacing special spaces by normal 700 spaces and removing zero-width spaces.""" 701 if not text: 702 return text 703 for each in to_space: 704 text = text.replace(each, ' ') 705 for each in remove: 706 text = text.replace(each, '') 707 return text 708 709 710def unquote_label(txt): 711 """Remove (one level of) enclosing single or double quotes. 712 713 .. versionadded :: 1.0 714 """ 715 return txt[1:-1] if txt and txt[0] in "'\"" and txt[0] == txt[-1] else txt 716 717 718def cleandoc(message): 719 """Removes uniform indentation and leading/trailing whitespace.""" 720 from inspect import cleandoc 721 return cleandoc(message).strip() 722 723 724# -- Conversion 725 726def pretty_size(size, format='%.1f'): 727 """Pretty print content size information with appropriate unit. 728 729 :param size: number of bytes 730 :param format: can be used to adjust the precision shown 731 """ 732 if size is None: 733 return '' 734 735 jump = 1024 736 if size < jump: 737 from trac.util.translation import ngettext 738 return ngettext("%(num)d byte", "%(num)d bytes", num=size) 739 740 units = ['KB', 'MB', 'GB', 'TB'] 741 i = 0 742 while size >= jump and i < len(units): 743 i += 1 744 size /= 1024. 745 746 return (format + ' %s') % (size, units[i - 1]) 747 748 749def expandtabs(s, tabstop=8, ignoring=None): 750 """Expand tab characters `'\\\\t'` into spaces. 751 752 :param tabstop: number of space characters per tab 753 (defaults to the canonical 8) 754 755 :param ignoring: if not `None`, the expansion will be "smart" and 756 go from one tabstop to the next. In addition, 757 this parameter lists characters which can be 758 ignored when computing the indent. 759 """ 760 if '\t' not in s: 761 return s 762 if ignoring is None: 763 return s.expandtabs(tabstop) 764 765 outlines = [] 766 for line in s.split('\n'): 767 if '\t' not in line: 768 outlines.append(line) 769 continue 770 p = 0 771 s = [] 772 for c in line: 773 if c == '\t': 774 n = tabstop - p % tabstop 775 s.append(' ' * n) 776 p += n 777 elif not ignoring or c not in ignoring: 778 p += 1 779 s.append(c) 780 else: 781 s.append(c) 782 outlines.append(''.join(s)) 783 return '\n'.join(outlines) 784 785 786def fix_eol(text, eol): 787 """Fix end-of-lines in a text.""" 788 lines = text.splitlines() 789 if isinstance(text, bytes): 790 last = b'' 791 eol = eol.encode('utf-8') 792 else: 793 last = '' 794 lines.append(last) 795 return eol.join(lines) 796 797def unicode_to_base64(text, strip_newlines=True): 798 """Safe conversion of ``text`` to base64 representation using 799 utf-8 bytes. 800 801 Strips newlines from output unless ``strip_newlines`` is `False`. 802 """ 803 text = to_unicode(text) 804 text = text.encode('utf-8') 805 if strip_newlines: 806 rv = base64.b64encode(text) 807 else: 808 rv = base64.encodebytes(text) 809 return str(rv, 'ascii') 810 811def unicode_from_base64(text): 812 """Safe conversion of ``text`` to str based on utf-8 bytes.""" 813 return str(base64.b64decode(text), 'utf-8') 814 815 816def levenshtein_distance(lhs, rhs): 817 """Return the Levenshtein distance between two strings.""" 818 if len(lhs) > len(rhs): 819 rhs, lhs = lhs, rhs 820 if not lhs: 821 return len(rhs) 822 823 prev = range(len(rhs) + 1) 824 for lidx, lch in enumerate(lhs): 825 curr = [lidx + 1] 826 for ridx, rch in enumerate(rhs): 827 cost = (lch != rch) * 2 828 curr.append(min(prev[ridx + 1] + 1, # deletion 829 curr[ridx] + 1, # insertion 830 prev[ridx] + cost)) # substitution 831 prev = curr 832 return prev[-1] 833 834 835sub_vars_re = re.compile("[$]([A-Z_][A-Z0-9_]*)") 836 837def sub_vars(text, args): 838 """Substitute $XYZ-style variables in a string with provided values. 839 840 :param text: string containing variables to substitute. 841 :param args: dictionary with keys matching the variables to be substituted. 842 The keys should not be prefixed with the $ character.""" 843 def repl(match): 844 key = match.group(1) 845 return args[key] if key in args else '$' + key 846 return sub_vars_re.sub(repl, text) 847