1# Copyright 2017, Alex Willmer
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are met:
5#
6# 1. Redistributions of source code must retain the above copyright notice,
7# this list of conditions and the following disclaimer.
8#
9# 2. Redistributions in binary form must reproduce the above copyright notice,
10# this list of conditions and the following disclaimer in the documentation
11# and/or other materials provided with the distribution.
12#
13# 3. Neither the name of the copyright holder nor the names of its contributors
14# may be used to endorse or promote products derived from this software without
15# specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
21# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27# POSSIBILITY OF SUCH DAMAGE.
28
29# !mitogen: minify_safe
30
31import sys
32
33try:
34    from io import StringIO
35except ImportError:
36    from StringIO import StringIO
37
38import mitogen.core
39
40if sys.version_info < (2, 7, 11):
41    from mitogen.compat import tokenize
42else:
43    import tokenize
44
45
46def minimize_source(source):
47    """
48    Remove comments and docstrings from Python `source`, preserving line
49    numbers and syntax of empty blocks.
50
51    :param str source:
52        The source to minimize.
53
54    :returns str:
55        The minimized source.
56    """
57    source = mitogen.core.to_text(source)
58    tokens = tokenize.generate_tokens(StringIO(source).readline)
59    tokens = strip_comments(tokens)
60    tokens = strip_docstrings(tokens)
61    tokens = reindent(tokens)
62    return tokenize.untokenize(tokens)
63
64
65def strip_comments(tokens):
66    """
67    Drop comment tokens from a `tokenize` stream.
68
69    Comments on lines 1-2 are kept, to preserve hashbang and encoding.
70    Trailing whitespace is remove from all lines.
71    """
72    prev_typ = None
73    prev_end_col = 0
74    for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens:
75        if typ in (tokenize.NL, tokenize.NEWLINE):
76            if prev_typ in (tokenize.NL, tokenize.NEWLINE):
77                start_col = 0
78            else:
79                start_col = prev_end_col
80            end_col = start_col + 1
81        elif typ == tokenize.COMMENT and start_row > 2:
82            continue
83        prev_typ = typ
84        prev_end_col = end_col
85        yield typ, tok, (start_row, start_col), (end_row, end_col), line
86
87
88def strip_docstrings(tokens):
89    """
90    Replace docstring tokens with NL tokens in a `tokenize` stream.
91
92    Any STRING token not part of an expression is deemed a docstring.
93    Indented docstrings are not yet recognised.
94    """
95    stack = []
96    state = 'wait_string'
97    for t in tokens:
98        typ = t[0]
99        if state == 'wait_string':
100            if typ in (tokenize.NL, tokenize.COMMENT):
101                yield t
102            elif typ in (tokenize.DEDENT, tokenize.INDENT, tokenize.STRING):
103                stack.append(t)
104            elif typ == tokenize.NEWLINE:
105                stack.append(t)
106                start_line, end_line = stack[0][2][0], stack[-1][3][0]+1
107                for i in range(start_line, end_line):
108                    yield tokenize.NL, '\n', (i, 0), (i,1), '\n'
109                for t in stack:
110                    if t[0] in (tokenize.DEDENT, tokenize.INDENT):
111                        yield t[0], t[1], (i+1, t[2][1]), (i+1, t[3][1]), t[4]
112                del stack[:]
113            else:
114                stack.append(t)
115                for t in stack: yield t
116                del stack[:]
117                state = 'wait_newline'
118        elif state == 'wait_newline':
119            if typ == tokenize.NEWLINE:
120                state = 'wait_string'
121            yield t
122
123
124def reindent(tokens, indent=' '):
125    """
126    Replace existing indentation in a token steam, with `indent`.
127    """
128    old_levels = []
129    old_level = 0
130    new_level = 0
131    for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens:
132        if typ == tokenize.INDENT:
133            old_levels.append(old_level)
134            old_level = len(tok)
135            new_level += 1
136            tok = indent * new_level
137        elif typ == tokenize.DEDENT:
138            old_level = old_levels.pop()
139            new_level -= 1
140        start_col = max(0, start_col - old_level + new_level)
141        if start_row == end_row:
142            end_col = start_col + len(tok)
143        yield typ, tok, (start_row, start_col), (end_row, end_col), line
144