1"Converts between Lark and Nearley grammars. Work in progress!"
2
3import os.path
4import sys
5import codecs
6
7
8from lark import Lark, InlineTransformer
9
10nearley_grammar = r"""
11    start: (ruledef|directive)+
12
13    directive: "@" NAME (STRING|NAME)
14             | "@" JS  -> js_code
15    ruledef: NAME "->" expansions
16           | NAME REGEXP "->" expansions -> macro
17    expansions: expansion ("|" expansion)*
18
19    expansion: expr+ js
20
21    ?expr: item (":" /[+*?]/)?
22
23    ?item: rule|string|regexp|null
24         | "(" expansions ")"
25
26    rule: NAME
27    string: STRING
28    regexp: REGEXP
29    null: "null"
30    JS: /{%.*?%}/s
31    js: JS?
32
33    NAME: /[a-zA-Z_$]\w*/
34    COMMENT: /#[^\n]*/
35    REGEXP: /\[.*?\]/
36
37    %import common.ESCAPED_STRING -> STRING
38    %import common.WS
39    %ignore WS
40    %ignore COMMENT
41
42    """
43
44nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
45
46def _get_rulename(name):
47    name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
48    return 'n_' + name.replace('$', '__DOLLAR__').lower()
49
50class NearleyToLark(InlineTransformer):
51    def __init__(self):
52        self._count = 0
53        self.extra_rules = {}
54        self.extra_rules_rev = {}
55        self.alias_js_code = {}
56
57    def _new_function(self, code):
58        name = 'alias_%d' % self._count
59        self._count += 1
60
61        self.alias_js_code[name] = code
62        return name
63
64    def _extra_rule(self, rule):
65        if rule in self.extra_rules_rev:
66            return self.extra_rules_rev[rule]
67
68        name = 'xrule_%d' % len(self.extra_rules)
69        assert name not in self.extra_rules
70        self.extra_rules[name] = rule
71        self.extra_rules_rev[rule] = name
72        return name
73
74    def rule(self, name):
75        return _get_rulename(name)
76
77    def ruledef(self, name, exps):
78        return '!%s: %s' % (_get_rulename(name), exps)
79
80    def expr(self, item, op):
81        rule = '(%s)%s' % (item, op)
82        return self._extra_rule(rule)
83
84    def regexp(self, r):
85        return '/%s/' % r
86
87    def null(self):
88        return ''
89
90    def string(self, s):
91        return self._extra_rule(s)
92
93    def expansion(self, *x):
94        x, js = x[:-1], x[-1]
95        if js.children:
96            js_code ,= js.children
97            js_code = js_code[2:-2]
98            alias = '-> ' + self._new_function(js_code)
99        else:
100            alias = ''
101        return ' '.join(x) + alias
102
103    def expansions(self, *x):
104        return '%s' % ('\n    |'.join(x))
105
106    def start(self, *rules):
107        return '\n'.join(filter(None, rules))
108
109def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
110    rule_defs = []
111
112    tree = nearley_grammar_parser.parse(g)
113    for statement in tree.children:
114        if statement.data == 'directive':
115            directive, arg = statement.children
116            if directive in ('builtin', 'include'):
117                folder = builtin_path if directive == 'builtin' else folder_path
118                path = os.path.join(folder, arg[1:-1])
119                if path not in includes:
120                    includes.add(path)
121                    with codecs.open(path, encoding='utf8') as f:
122                        text = f.read()
123                    rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
124            else:
125                assert False, directive
126        elif statement.data == 'js_code':
127            code ,= statement.children
128            code = code[2:-2]
129            js_code.append(code)
130        elif statement.data == 'macro':
131            pass    # TODO Add support for macros!
132        elif statement.data == 'ruledef':
133            rule_defs.append( n2l.transform(statement) )
134        else:
135            raise Exception("Unknown statement: %s" % statement)
136
137    return rule_defs
138
139
140def create_code_for_nearley_grammar(g, start, builtin_path, folder_path):
141    import js2py
142
143    emit_code = []
144    def emit(x=None):
145        if x:
146            emit_code.append(x)
147        emit_code.append('\n')
148
149    js_code = ['function id(x) {return x[0];}']
150    n2l = NearleyToLark()
151    rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
152    lark_g = '\n'.join(rule_defs)
153    lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
154
155    emit('from lark import Lark, Transformer')
156    emit()
157    emit('grammar = ' + repr(lark_g))
158    emit()
159
160    for alias, code in n2l.alias_js_code.items():
161        js_code.append('%s = (%s);' % (alias, code))
162
163    emit(js2py.translate_js('\n'.join(js_code)))
164    emit('class TransformNearley(Transformer):')
165    for alias in n2l.alias_js_code:
166        emit("    %s = var.get('%s').to_python()" % (alias, alias))
167    emit("    __default__ = lambda self, n, c, m: c if c else None")
168
169    emit()
170    emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
171    emit('def parse(text):')
172    emit('    return TransformNearley().transform(parser.parse(text))')
173
174    return ''.join(emit_code)
175
176def main(fn, start, nearley_lib):
177    with codecs.open(fn, encoding='utf8') as f:
178        grammar = f.read()
179    return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)))
180
181
182if __name__ == '__main__':
183    if len(sys.argv) < 4:
184        print("Reads Nearley grammar (with js functions) outputs an equivalent lark parser.")
185        print("Usage: %s <nearley_grammar_path> <start_rule> <nearley_lib_path>" % sys.argv[0])
186        sys.exit(1)
187
188    fn, start, nearley_lib = sys.argv[1:]
189
190    print(main(fn, start, nearley_lib))
191