1from __future__ import print_function 2 3###{standalone 4# 5# 6# Lark Stand-alone Generator Tool 7# ---------------------------------- 8# Generates a stand-alone LALR(1) parser with a standard lexer 9# 10# Git: https://github.com/erezsh/lark 11# Author: Erez Shinan (erezshin@gmail.com) 12# 13# 14# >>> LICENSE 15# 16# This tool and its generated code use a separate license from Lark, 17# and are subject to the terms of the Mozilla Public License, v. 2.0. 18# If a copy of the MPL was not distributed with this 19# file, You can obtain one at https://mozilla.org/MPL/2.0/. 20# 21# If you wish to purchase a commercial license for this tool and its 22# generated code, you may contact me via email or otherwise. 23# 24# If MPL2 is incompatible with your free or open-source project, 25# contact me and we'll work it out. 26# 27# 28 29from io import open 30###} 31 32import sys 33import token, tokenize 34import os 35from os import path 36from collections import defaultdict 37from functools import partial 38from argparse import ArgumentParser, SUPPRESS 39from warnings import warn 40 41import lark 42from lark import Lark 43from lark.tools import lalr_argparser, build_lalr, make_warnings_comments 44 45 46from lark.grammar import RuleOptions, Rule 47from lark.lexer import TerminalDef 48 49_dir = path.dirname(__file__) 50_larkdir = path.join(_dir, path.pardir) 51 52 53EXTRACT_STANDALONE_FILES = [ 54 'tools/standalone.py', 55 'exceptions.py', 56 'utils.py', 57 'tree.py', 58 'visitors.py', 59 'grammar.py', 60 'lexer.py', 61 'common.py', 62 'parse_tree_builder.py', 63 'parsers/lalr_parser.py', 64 'parsers/lalr_analysis.py', 65 'parser_frontends.py', 66 'lark.py', 67 'indenter.py', 68] 69 70def extract_sections(lines): 71 section = None 72 text = [] 73 sections = defaultdict(list) 74 for l in lines: 75 if l.startswith('###'): 76 if l[3] == '{': 77 section = l[4:].strip() 78 elif l[3] == '}': 79 sections[section] += text 80 section = None 81 text = [] 82 else: 83 raise ValueError(l) 84 elif section: 85 text.append(l) 86 87 return {name:''.join(text) for name, text in sections.items()} 88 89 90def strip_docstrings(line_gen): 91 """ Strip comments and docstrings from a file. 92 Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings 93 """ 94 res = [] 95 96 prev_toktype = token.INDENT 97 last_lineno = -1 98 last_col = 0 99 100 tokgen = tokenize.generate_tokens(line_gen) 101 for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen: 102 if slineno > last_lineno: 103 last_col = 0 104 if scol > last_col: 105 res.append(" " * (scol - last_col)) 106 if toktype == token.STRING and prev_toktype == token.INDENT: 107 # Docstring 108 res.append("#--") 109 elif toktype == tokenize.COMMENT: 110 # Comment 111 res.append("##\n") 112 else: 113 res.append(ttext) 114 prev_toktype = toktype 115 last_col = ecol 116 last_lineno = elineno 117 118 return ''.join(res) 119 120 121def main(fobj, start, print=print): 122 warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning) 123 lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) 124 gen_standalone(lark_inst, print) 125 126def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False): 127 if output is None: 128 output = partial(print, file=out) 129 130 import pickle, zlib, base64 131 def compressed_output(obj): 132 s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) 133 c = zlib.compress(s) 134 output(repr(base64.b64encode(c))) 135 136 def output_decompress(name): 137 output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals()) 138 139 output('# The file was automatically generated by Lark v%s' % lark.__version__) 140 output('__version__ = "%s"' % lark.__version__) 141 output() 142 143 for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES): 144 with open(os.path.join(_larkdir, pyfile)) as f: 145 code = extract_sections(f)['standalone'] 146 if i: # if not this file 147 code = strip_docstrings(partial(next, iter(code.splitlines(True)))) 148 output(code) 149 150 data, m = lark_inst.memo_serialize([TerminalDef, Rule]) 151 output('import pickle, zlib, base64') 152 if compress: 153 output('DATA = (') 154 compressed_output(data) 155 output(')') 156 output_decompress('DATA') 157 output('MEMO = (') 158 compressed_output(m) 159 output(')') 160 output_decompress('MEMO') 161 else: 162 output('DATA = (') 163 output(data) 164 output(')') 165 output('MEMO = (') 166 output(m) 167 output(')') 168 169 170 output('Shift = 0') 171 output('Reduce = 1') 172 output("def Lark_StandAlone(**kwargs):") 173 output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") 174 175 176 177 178def main(): 179 make_warnings_comments() 180 parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", 181 parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') 182 parser.add_argument("old_start", nargs='?', help=SUPPRESS) 183 parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression") 184 if len(sys.argv)==1: 185 parser.print_help(sys.stderr) 186 sys.exit(1) 187 ns = parser.parse_args() 188 if ns.old_start is not None: 189 warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option') 190 ns.start.append(ns.old_start) 191 192 lark_inst, out = build_lalr(ns) 193 gen_standalone(lark_inst, out=out, compress=ns.compress) 194 195if __name__ == '__main__': 196 main()