1# Copyright 2017, Alex Willmer 2# 3# Redistribution and use in source and binary forms, with or without 4# modification, are permitted provided that the following conditions are met: 5# 6# 1. Redistributions of source code must retain the above copyright notice, 7# this list of conditions and the following disclaimer. 8# 9# 2. Redistributions in binary form must reproduce the above copyright notice, 10# this list of conditions and the following disclaimer in the documentation 11# and/or other materials provided with the distribution. 12# 13# 3. Neither the name of the copyright holder nor the names of its contributors 14# may be used to endorse or promote products derived from this software without 15# specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27# POSSIBILITY OF SUCH DAMAGE. 28 29# !mitogen: minify_safe 30 31import sys 32 33try: 34 from io import StringIO 35except ImportError: 36 from StringIO import StringIO 37 38import mitogen.core 39 40if sys.version_info < (2, 7, 11): 41 from mitogen.compat import tokenize 42else: 43 import tokenize 44 45 46def minimize_source(source): 47 """ 48 Remove comments and docstrings from Python `source`, preserving line 49 numbers and syntax of empty blocks. 50 51 :param str source: 52 The source to minimize. 53 54 :returns str: 55 The minimized source. 56 """ 57 source = mitogen.core.to_text(source) 58 tokens = tokenize.generate_tokens(StringIO(source).readline) 59 tokens = strip_comments(tokens) 60 tokens = strip_docstrings(tokens) 61 tokens = reindent(tokens) 62 return tokenize.untokenize(tokens) 63 64 65def strip_comments(tokens): 66 """ 67 Drop comment tokens from a `tokenize` stream. 68 69 Comments on lines 1-2 are kept, to preserve hashbang and encoding. 70 Trailing whitespace is remove from all lines. 71 """ 72 prev_typ = None 73 prev_end_col = 0 74 for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens: 75 if typ in (tokenize.NL, tokenize.NEWLINE): 76 if prev_typ in (tokenize.NL, tokenize.NEWLINE): 77 start_col = 0 78 else: 79 start_col = prev_end_col 80 end_col = start_col + 1 81 elif typ == tokenize.COMMENT and start_row > 2: 82 continue 83 prev_typ = typ 84 prev_end_col = end_col 85 yield typ, tok, (start_row, start_col), (end_row, end_col), line 86 87 88def strip_docstrings(tokens): 89 """ 90 Replace docstring tokens with NL tokens in a `tokenize` stream. 91 92 Any STRING token not part of an expression is deemed a docstring. 93 Indented docstrings are not yet recognised. 94 """ 95 stack = [] 96 state = 'wait_string' 97 for t in tokens: 98 typ = t[0] 99 if state == 'wait_string': 100 if typ in (tokenize.NL, tokenize.COMMENT): 101 yield t 102 elif typ in (tokenize.DEDENT, tokenize.INDENT, tokenize.STRING): 103 stack.append(t) 104 elif typ == tokenize.NEWLINE: 105 stack.append(t) 106 start_line, end_line = stack[0][2][0], stack[-1][3][0]+1 107 for i in range(start_line, end_line): 108 yield tokenize.NL, '\n', (i, 0), (i,1), '\n' 109 for t in stack: 110 if t[0] in (tokenize.DEDENT, tokenize.INDENT): 111 yield t[0], t[1], (i+1, t[2][1]), (i+1, t[3][1]), t[4] 112 del stack[:] 113 else: 114 stack.append(t) 115 for t in stack: yield t 116 del stack[:] 117 state = 'wait_newline' 118 elif state == 'wait_newline': 119 if typ == tokenize.NEWLINE: 120 state = 'wait_string' 121 yield t 122 123 124def reindent(tokens, indent=' '): 125 """ 126 Replace existing indentation in a token steam, with `indent`. 127 """ 128 old_levels = [] 129 old_level = 0 130 new_level = 0 131 for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens: 132 if typ == tokenize.INDENT: 133 old_levels.append(old_level) 134 old_level = len(tok) 135 new_level += 1 136 tok = indent * new_level 137 elif typ == tokenize.DEDENT: 138 old_level = old_levels.pop() 139 new_level -= 1 140 start_col = max(0, start_col - old_level + new_level) 141 if start_row == end_row: 142 end_col = start_col + len(tok) 143 yield typ, tok, (start_row, start_col), (end_row, end_col), line 144