1from __future__ import print_function
2
3###{standalone
4#
5#
6#   Lark Stand-alone Generator Tool
7# ----------------------------------
8# Generates a stand-alone LALR(1) parser with a standard lexer
9#
10# Git:    https://github.com/erezsh/lark
11# Author: Erez Shinan (erezshin@gmail.com)
12#
13#
14#    >>> LICENSE
15#
16#    This tool and its generated code use a separate license from Lark,
17#    and are subject to the terms of the Mozilla Public License, v. 2.0.
18#    If a copy of the MPL was not distributed with this
19#    file, You can obtain one at https://mozilla.org/MPL/2.0/.
20#
21#    If you wish to purchase a commercial license for this tool and its
22#    generated code, you may contact me via email or otherwise.
23#
24#    If MPL2 is incompatible with your free or open-source project,
25#    contact me and we'll work it out.
26#
27#
28
29from io import open
30###}
31
32import sys
33import token, tokenize
34import os
35from os import path
36from collections import defaultdict
37from functools import partial
38from argparse import ArgumentParser, SUPPRESS
39from warnings import warn
40
41import lark
42from lark import Lark
43from lark.tools import lalr_argparser, build_lalr, make_warnings_comments
44
45
46from lark.grammar import RuleOptions, Rule
47from lark.lexer import TerminalDef
48
49_dir = path.dirname(__file__)
50_larkdir = path.join(_dir, path.pardir)
51
52
53EXTRACT_STANDALONE_FILES = [
54    'tools/standalone.py',
55    'exceptions.py',
56    'utils.py',
57    'tree.py',
58    'visitors.py',
59    'grammar.py',
60    'lexer.py',
61    'common.py',
62    'parse_tree_builder.py',
63    'parsers/lalr_parser.py',
64    'parsers/lalr_analysis.py',
65    'parser_frontends.py',
66    'lark.py',
67    'indenter.py',
68]
69
70def extract_sections(lines):
71    section = None
72    text = []
73    sections = defaultdict(list)
74    for l in lines:
75        if l.startswith('###'):
76            if l[3] == '{':
77                section = l[4:].strip()
78            elif l[3] == '}':
79                sections[section] += text
80                section = None
81                text = []
82            else:
83                raise ValueError(l)
84        elif section:
85            text.append(l)
86
87    return {name:''.join(text) for name, text in sections.items()}
88
89
90def strip_docstrings(line_gen):
91    """ Strip comments and docstrings from a file.
92    Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
93    """
94    res = []
95
96    prev_toktype = token.INDENT
97    last_lineno = -1
98    last_col = 0
99
100    tokgen = tokenize.generate_tokens(line_gen)
101    for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
102        if slineno > last_lineno:
103            last_col = 0
104        if scol > last_col:
105            res.append(" " * (scol - last_col))
106        if toktype == token.STRING and prev_toktype == token.INDENT:
107            # Docstring
108            res.append("#--")
109        elif toktype == tokenize.COMMENT:
110            # Comment
111            res.append("##\n")
112        else:
113            res.append(ttext)
114        prev_toktype = toktype
115        last_col = ecol
116        last_lineno = elineno
117
118    return ''.join(res)
119
120
121def main(fobj, start, print=print):
122    warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning)
123    lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)
124    gen_standalone(lark_inst, print)
125
126def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
127    if output is None:
128        output = partial(print, file=out)
129
130    import pickle, zlib, base64
131    def compressed_output(obj):
132        s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
133        c = zlib.compress(s)
134        output(repr(base64.b64encode(c)))
135
136    def output_decompress(name):
137        output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals())
138
139    output('# The file was automatically generated by Lark v%s' % lark.__version__)
140    output('__version__ = "%s"' % lark.__version__)
141    output()
142
143    for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
144        with open(os.path.join(_larkdir, pyfile)) as f:
145            code = extract_sections(f)['standalone']
146            if i:   # if not this file
147                code = strip_docstrings(partial(next, iter(code.splitlines(True))))
148            output(code)
149
150    data, m = lark_inst.memo_serialize([TerminalDef, Rule])
151    output('import pickle, zlib, base64')
152    if compress:
153        output('DATA = (')
154        compressed_output(data)
155        output(')')
156        output_decompress('DATA')
157        output('MEMO = (')
158        compressed_output(m)
159        output(')')
160        output_decompress('MEMO')
161    else:
162        output('DATA = (')
163        output(data)
164        output(')')
165        output('MEMO = (')
166        output(m)
167        output(')')
168
169
170    output('Shift = 0')
171    output('Reduce = 1')
172    output("def Lark_StandAlone(**kwargs):")
173    output("  return Lark._load_from_dict(DATA, MEMO, **kwargs)")
174
175
176
177
178def main():
179    make_warnings_comments()
180    parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
181                            parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
182    parser.add_argument("old_start", nargs='?', help=SUPPRESS)
183    parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
184    if len(sys.argv)==1:
185        parser.print_help(sys.stderr)
186        sys.exit(1)
187    ns = parser.parse_args()
188    if ns.old_start is not None:
189        warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option')
190        ns.start.append(ns.old_start)
191
192    lark_inst, out = build_lalr(ns)
193    gen_standalone(lark_inst, out=out, compress=ns.compress)
194
195if __name__ == '__main__':
196    main()