1"""Parses and creates Grammar objects"""
2import hashlib
3import os.path
4import sys
5from collections import namedtuple
6from copy import copy, deepcopy
7from io import open
8import pkgutil
9from ast import literal_eval
10from numbers import Integral
11
12from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors
13from .lexer import Token, TerminalDef, PatternStr, PatternRE
14
15from .parse_tree_builder import ParseTreeBuilder
16from .parser_frontends import ParsingFrontend
17from .common import LexerConf, ParserConf
18from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
19from .utils import classify, suppress, dedup_list, Str
20from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError
21
22from .tree import Tree, SlottedTree as ST
23from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive
24inline_args = v_args(inline=True)
25
26__path__ = os.path.dirname(__file__)
27IMPORT_PATHS = ['grammars']
28
29EXT = '.lark'
30
31_RE_FLAGS = 'imslux'
32
33_EMPTY = Symbol('__empty__')
34
35_TERMINAL_NAMES = {
36    '.' : 'DOT',
37    ',' : 'COMMA',
38    ':' : 'COLON',
39    ';' : 'SEMICOLON',
40    '+' : 'PLUS',
41    '-' : 'MINUS',
42    '*' : 'STAR',
43    '/' : 'SLASH',
44    '\\' : 'BACKSLASH',
45    '|' : 'VBAR',
46    '?' : 'QMARK',
47    '!' : 'BANG',
48    '@' : 'AT',
49    '#' : 'HASH',
50    '$' : 'DOLLAR',
51    '%' : 'PERCENT',
52    '^' : 'CIRCUMFLEX',
53    '&' : 'AMPERSAND',
54    '_' : 'UNDERSCORE',
55    '<' : 'LESSTHAN',
56    '>' : 'MORETHAN',
57    '=' : 'EQUAL',
58    '"' : 'DBLQUOTE',
59    '\'' : 'QUOTE',
60    '`' : 'BACKQUOTE',
61    '~' : 'TILDE',
62    '(' : 'LPAR',
63    ')' : 'RPAR',
64    '{' : 'LBRACE',
65    '}' : 'RBRACE',
66    '[' : 'LSQB',
67    ']' : 'RSQB',
68    '\n' : 'NEWLINE',
69    '\r\n' : 'CRLF',
70    '\t' : 'TAB',
71    ' ' : 'SPACE',
72}
73
74# Grammar Parser
75TERMINALS = {
76    '_LPAR': r'\(',
77    '_RPAR': r'\)',
78    '_LBRA': r'\[',
79    '_RBRA': r'\]',
80    '_LBRACE': r'\{',
81    '_RBRACE': r'\}',
82    'OP': '[+*]|[?](?![a-z])',
83    '_COLON': ':',
84    '_COMMA': ',',
85    '_OR': r'\|',
86    '_DOT': r'\.(?!\.)',
87    '_DOTDOT': r'\.\.',
88    'TILDE': '~',
89    'RULE': '!?[_?]?[a-z][_a-z0-9]*',
90    'TERMINAL': '_?[A-Z][_A-Z0-9]*',
91    'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
92    'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,
93    '_NL': r'(\r?\n)+\s*',
94    '_NL_OR': r'(\r?\n)+\s*\|',
95    'WS': r'[ \t]+',
96    'COMMENT': r'\s*//[^\n]*',
97    '_TO': '->',
98    '_IGNORE': r'%ignore',
99    '_OVERRIDE': r'%override',
100    '_DECLARE': r'%declare',
101    '_EXTEND': r'%extend',
102    '_IMPORT': r'%import',
103    'NUMBER': r'[+-]?\d+',
104}
105
106RULES = {
107    'start': ['_list'],
108    '_list':  ['_item', '_list _item'],
109    '_item':  ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'],
110
111    'rule': ['RULE template_params _COLON expansions _NL',
112             'RULE template_params _DOT NUMBER _COLON expansions _NL'],
113    'template_params': ['_LBRACE _template_params _RBRACE',
114                        ''],
115    '_template_params': ['RULE',
116                         '_template_params _COMMA RULE'],
117    'expansions': ['_expansions'],
118    '_expansions': ['alias',
119                    '_expansions _OR alias',
120                    '_expansions _NL_OR alias'],
121
122    '?alias':     ['expansion _TO RULE', 'expansion'],
123    'expansion': ['_expansion'],
124
125    '_expansion': ['', '_expansion expr'],
126
127    '?expr': ['atom',
128              'atom OP',
129              'atom TILDE NUMBER',
130              'atom TILDE NUMBER _DOTDOT NUMBER',
131              ],
132
133    '?atom': ['_LPAR expansions _RPAR',
134              'maybe',
135              'value'],
136
137    'value': ['terminal',
138              'nonterminal',
139              'literal',
140              'range',
141              'template_usage'],
142
143    'terminal': ['TERMINAL'],
144    'nonterminal': ['RULE'],
145
146    '?name': ['RULE', 'TERMINAL'],
147
148    'maybe': ['_LBRA expansions _RBRA'],
149    'range': ['STRING _DOTDOT STRING'],
150
151    'template_usage': ['RULE _LBRACE _template_args _RBRACE'],
152    '_template_args': ['value',
153                       '_template_args _COMMA value'],
154
155    'term': ['TERMINAL _COLON expansions _NL',
156             'TERMINAL _DOT NUMBER _COLON expansions _NL'],
157    'override': ['_OVERRIDE rule',
158                 '_OVERRIDE term'],
159    'extend': ['_EXTEND rule',
160               '_EXTEND term'],
161    'ignore': ['_IGNORE expansions _NL'],
162    'declare': ['_DECLARE _declare_args _NL'],
163    'import': ['_IMPORT _import_path _NL',
164               '_IMPORT _import_path _LPAR name_list _RPAR _NL',
165               '_IMPORT _import_path _TO name _NL'],
166
167    '_import_path': ['import_lib', 'import_rel'],
168    'import_lib': ['_import_args'],
169    'import_rel': ['_DOT _import_args'],
170    '_import_args': ['name', '_import_args _DOT name'],
171
172    'name_list': ['_name_list'],
173    '_name_list': ['name', '_name_list _COMMA name'],
174
175    '_declare_args': ['name', '_declare_args name'],
176    'literal': ['REGEXP', 'STRING'],
177}
178
179
180# Value 5 keeps the number of states in the lalr parser somewhat minimal
181# It isn't optimal, but close to it. See PR #949
182SMALL_FACTOR_THRESHOLD = 5
183# The Threshold whether repeat via ~ are split up into different rules
184# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low,
185# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts.
186# (See PR #949)
187REPEAT_BREAK_THRESHOLD = 50
188
189
190@inline_args
191class EBNF_to_BNF(Transformer_InPlace):
192    def __init__(self):
193        self.new_rules = []
194        self.rules_cache = {}
195        self.prefix = 'anon'
196        self.i = 0
197        self.rule_options = None
198
199    def _name_rule(self, inner):
200        new_name = '__%s_%s_%d' % (self.prefix, inner, self.i)
201        self.i += 1
202        return new_name
203
204    def _add_rule(self, key, name, expansions):
205        t = NonTerminal(name)
206        self.new_rules.append((name, expansions, self.rule_options))
207        self.rules_cache[key] = t
208        return t
209
210    def _add_recurse_rule(self, type_, expr):
211        try:
212            return self.rules_cache[expr]
213        except KeyError:
214            new_name = self._name_rule(type_)
215            t = NonTerminal(new_name)
216            tree = ST('expansions', [
217                ST('expansion', [expr]),
218                ST('expansion', [t, expr])
219            ])
220            return self._add_rule(expr, new_name, tree)
221
222    def _add_repeat_rule(self, a, b, target, atom):
223        """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times.
224
225        When called recursively (into target), it repeats atom for x(n) times, where:
226            x(0) = 1
227            x(n) = a(n) * x(n-1) + b
228
229        Example rule when a=3, b=4:
230
231            new_rule: target target target atom atom atom atom
232
233        """
234        key = (a, b, target, atom)
235        try:
236            return self.rules_cache[key]
237        except KeyError:
238            new_name = self._name_rule('repeat_a%d_b%d' % (a, b))
239            tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)])
240            return self._add_rule(key, new_name, tree)
241
242    def _add_repeat_opt_rule(self, a, b, target, target_opt, atom):
243        """Creates a rule that matches atom 0 to (a*n+b)-1 times.
244
245        When target matches n times atom, and target_opt 0 to n-1 times target_opt,
246
247        First we generate target * i followed by target_opt, for i from 0 to a-1
248        These match 0 to n*a - 1 times atom
249
250        Then we generate target * a followed by atom * i, for i from 0 to b-1
251        These match n*a to n*a + b-1 times atom
252
253        The created rule will not have any shift/reduce conflicts so that it can be used with lalr
254
255        Example rule when a=3, b=4:
256
257            new_rule: target_opt
258                    | target target_opt
259                    | target target target_opt
260
261                    | target target target
262                    | target target target atom
263                    | target target target atom atom
264                    | target target target atom atom atom
265
266        """
267        key = (a, b, target, atom, "opt")
268        try:
269            return self.rules_cache[key]
270        except KeyError:
271            new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b))
272            tree = ST('expansions', [
273                ST('expansion', [target]*i + [target_opt]) for i in range(a)
274            ] + [
275                ST('expansion', [target]*a + [atom]*i) for i in range(b)
276            ])
277            return self._add_rule(key, new_name, tree)
278
279    def _generate_repeats(self, rule, mn, mx):
280        """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times.
281        """
282        # For a small number of repeats, we can take the naive approach
283        if mx < REPEAT_BREAK_THRESHOLD:
284            return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)])
285
286        # For large repeat values, we break the repetition into sub-rules.
287        # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``.
288        # We then use small_factors to split up mn and diff up into values [(a, b), ...]
289        # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt
290        # to generate a complete rule/expression that matches the corresponding number of repeats
291        mn_target = rule
292        for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD):
293            mn_target = self._add_repeat_rule(a, b, mn_target, rule)
294        if mx == mn:
295            return mn_target
296
297        diff = mx - mn + 1  # We add one because _add_repeat_opt_rule generates rules that match one less
298        diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD)
299        diff_target = rule  # Match rule 1 times
300        diff_opt_target = ST('expansion', [])  # match rule 0 times (e.g. up to 1 -1 times)
301        for a, b in diff_factors[:-1]:
302            diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
303            diff_target = self._add_repeat_rule(a, b, diff_target, rule)
304
305        a, b = diff_factors[-1]
306        diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
307
308        return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])])
309
310    def expr(self, rule, op, *args):
311        if op.value == '?':
312            empty = ST('expansion', [])
313            return ST('expansions', [rule, empty])
314        elif op.value == '+':
315            # a : b c+ d
316            #   -->
317            # a : b _c d
318            # _c : _c c | c;
319            return self._add_recurse_rule('plus', rule)
320        elif op.value == '*':
321            # a : b c* d
322            #   -->
323            # a : b _c? d
324            # _c : _c c | c;
325            new_name = self._add_recurse_rule('star', rule)
326            return ST('expansions', [new_name, ST('expansion', [])])
327        elif op.value == '~':
328            if len(args) == 1:
329                mn = mx = int(args[0])
330            else:
331                mn, mx = map(int, args)
332                if mx < mn or mn < 0:
333                    raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
334
335            return self._generate_repeats(rule, mn, mx)
336
337        assert False, op
338
339    def maybe(self, rule):
340        keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens
341
342        def will_not_get_removed(sym):
343            if isinstance(sym, NonTerminal):
344                return not sym.name.startswith('_')
345            if isinstance(sym, Terminal):
346                return keep_all_tokens or not sym.filter_out
347            assert False
348
349        if any(rule.scan_values(will_not_get_removed)):
350            empty = _EMPTY
351        else:
352            empty = ST('expansion', [])
353
354        return ST('expansions', [rule, empty])
355
356
357class SimplifyRule_Visitor(Visitor):
358
359    @staticmethod
360    def _flatten(tree):
361        while tree.expand_kids_by_data(tree.data):
362            pass
363
364    def expansion(self, tree):
365        # rules_list unpacking
366        # a : b (c|d) e
367        #  -->
368        # a : b c e | b d e
369        #
370        # In AST terms:
371        # expansion(b, expansions(c, d), e)
372        #   -->
373        # expansions( expansion(b, c, e), expansion(b, d, e) )
374
375        self._flatten(tree)
376
377        for i, child in enumerate(tree.children):
378            if isinstance(child, Tree) and child.data == 'expansions':
379                tree.data = 'expansions'
380                tree.children = [self.visit(ST('expansion', [option if i == j else other
381                                                             for j, other in enumerate(tree.children)]))
382                                 for option in dedup_list(child.children)]
383                self._flatten(tree)
384                break
385
386    def alias(self, tree):
387        rule, alias_name = tree.children
388        if rule.data == 'expansions':
389            aliases = []
390            for child in tree.children[0].children:
391                aliases.append(ST('alias', [child, alias_name]))
392            tree.data = 'expansions'
393            tree.children = aliases
394
395    def expansions(self, tree):
396        self._flatten(tree)
397        # Ensure all children are unique
398        if len(set(tree.children)) != len(tree.children):
399            tree.children = dedup_list(tree.children)   # dedup is expensive, so try to minimize its use
400
401
402class RuleTreeToText(Transformer):
403    def expansions(self, x):
404        return x
405
406    def expansion(self, symbols):
407        return symbols, None
408
409    def alias(self, x):
410        (expansion, _alias), alias = x
411        assert _alias is None, (alias, expansion, '-', _alias)  # Double alias not allowed
412        return expansion, alias.value
413
414
415class PrepareAnonTerminals(Transformer_InPlace):
416    """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""
417
418    def __init__(self, terminals):
419        self.terminals = terminals
420        self.term_set = {td.name for td in self.terminals}
421        self.term_reverse = {td.pattern: td for td in terminals}
422        self.i = 0
423        self.rule_options = None
424
425    @inline_args
426    def pattern(self, p):
427        value = p.value
428        if p in self.term_reverse and p.flags != self.term_reverse[p].pattern.flags:
429            raise GrammarError(u'Conflicting flags for the same terminal: %s' % p)
430
431        term_name = None
432
433        if isinstance(p, PatternStr):
434            try:
435                # If already defined, use the user-defined terminal name
436                term_name = self.term_reverse[p].name
437            except KeyError:
438                # Try to assign an indicative anon-terminal name
439                try:
440                    term_name = _TERMINAL_NAMES[value]
441                except KeyError:
442                    if value and is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set:
443                        term_name = value.upper()
444
445                if term_name in self.term_set:
446                    term_name = None
447
448        elif isinstance(p, PatternRE):
449            if p in self.term_reverse:  # Kind of a weird placement.name
450                term_name = self.term_reverse[p].name
451        else:
452            assert False, p
453
454        if term_name is None:
455            term_name = '__ANON_%d' % self.i
456            self.i += 1
457
458        if term_name not in self.term_set:
459            assert p not in self.term_reverse
460            self.term_set.add(term_name)
461            termdef = TerminalDef(term_name, p)
462            self.term_reverse[p] = termdef
463            self.terminals.append(termdef)
464
465        filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr)
466
467        return Terminal(term_name, filter_out=filter_out)
468
469
470class _ReplaceSymbols(Transformer_InPlace):
471    """Helper for ApplyTemplates"""
472
473    def __init__(self):
474        self.names = {}
475
476    def value(self, c):
477        if len(c) == 1 and isinstance(c[0], Token) and c[0].value in self.names:
478            return self.names[c[0].value]
479        return self.__default__('value', c, None)
480
481    def template_usage(self, c):
482        if c[0] in self.names:
483            return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None)
484        return self.__default__('template_usage', c, None)
485
486
487class ApplyTemplates(Transformer_InPlace):
488    """Apply the templates, creating new rules that represent the used templates"""
489
490    def __init__(self, rule_defs):
491        self.rule_defs = rule_defs
492        self.replacer = _ReplaceSymbols()
493        self.created_templates = set()
494
495    def template_usage(self, c):
496        name = c[0]
497        args = c[1:]
498        result_name = "%s{%s}" % (name, ",".join(a.name for a in args))
499        if result_name not in self.created_templates:
500            self.created_templates.add(result_name)
501            (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name)
502            assert len(params) == len(args), args
503            result_tree = deepcopy(tree)
504            self.replacer.names = dict(zip(params, args))
505            self.replacer.transform(result_tree)
506            self.rule_defs.append((result_name, [], result_tree, deepcopy(options)))
507        return NonTerminal(result_name)
508
509
510def _rfind(s, choices):
511    return max(s.rfind(c) for c in choices)
512
513
514def eval_escaping(s):
515    w = ''
516    i = iter(s)
517    for n in i:
518        w += n
519        if n == '\\':
520            try:
521                n2 = next(i)
522            except StopIteration:
523                raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)
524            if n2 == '\\':
525                w += '\\\\'
526            elif n2 not in 'Uuxnftr':
527                w += '\\'
528            w += n2
529    w = w.replace('\\"', '"').replace("'", "\\'")
530
531    to_eval = "u'''%s'''" % w
532    try:
533        s = literal_eval(to_eval)
534    except SyntaxError as e:
535        raise GrammarError(s, e)
536
537    return s
538
539
540def _literal_to_pattern(literal):
541    v = literal.value
542    flag_start = _rfind(v, '/"')+1
543    assert flag_start > 0
544    flags = v[flag_start:]
545    assert all(f in _RE_FLAGS for f in flags), flags
546
547    if literal.type == 'STRING' and '\n' in v:
548        raise GrammarError('You cannot put newlines in string literals')
549
550    if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags:
551        raise GrammarError('You can only use newlines in regular expressions '
552                           'with the `x` (verbose) flag')
553
554    v = v[:flag_start]
555    assert v[0] == v[-1] and v[0] in '"/'
556    x = v[1:-1]
557
558    s = eval_escaping(x)
559
560    if s == "":
561        raise GrammarError("Empty terminals are not allowed (%s)" % literal)
562
563    if literal.type == 'STRING':
564        s = s.replace('\\\\', '\\')
565        return PatternStr(s, flags, raw=literal.value)
566    elif literal.type == 'REGEXP':
567        return PatternRE(s, flags, raw=literal.value)
568    else:
569        assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]'
570
571
572@inline_args
573class PrepareLiterals(Transformer_InPlace):
574    def literal(self, literal):
575        return ST('pattern', [_literal_to_pattern(literal)])
576
577    def range(self, start, end):
578        assert start.type == end.type == 'STRING'
579        start = start.value[1:-1]
580        end = end.value[1:-1]
581        assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1
582        regexp = '[%s-%s]' % (start, end)
583        return ST('pattern', [PatternRE(regexp)])
584
585
586def _make_joined_pattern(regexp, flags_set):
587    # In Python 3.6, a new syntax for flags was introduced, that allows us to restrict the scope
588    # of flags to a specific regexp group. We are already using it in `lexer.Pattern._get_flags`
589    # However, for prior Python versions, we still need to use global flags, so we have to make sure
590    # that there are no flag collisions when we merge several terminals.
591    flags = ()
592    if not Py36:
593        if len(flags_set) > 1:
594            raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!")
595        elif len(flags_set) == 1:
596            flags ,= flags_set
597
598    return PatternRE(regexp, flags)
599
600class TerminalTreeToPattern(Transformer_NonRecursive):
601    def pattern(self, ps):
602        p ,= ps
603        return p
604
605    def expansion(self, items):
606        assert items
607        if len(items) == 1:
608            return items[0]
609
610        pattern = ''.join(i.to_regexp() for i in items)
611        return _make_joined_pattern(pattern, {i.flags for i in items})
612
613    def expansions(self, exps):
614        if len(exps) == 1:
615            return exps[0]
616
617        # Do a bit of sorting to make sure that the longest option is returned
618        # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match)
619        exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value)))
620
621        pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))
622        return _make_joined_pattern(pattern, {i.flags for i in exps})
623
624    def expr(self, args):
625        inner, op = args[:2]
626        if op == '~':
627            if len(args) == 3:
628                op = "{%d}" % int(args[2])
629            else:
630                mn, mx = map(int, args[2:])
631                if mx < mn:
632                    raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx))
633                op = "{%d,%d}" % (mn, mx)
634        else:
635            assert len(args) == 2
636        return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)
637
638    def maybe(self, expr):
639        return self.expr(expr + ['?'])
640
641    def alias(self, t):
642        raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)")
643
644    def value(self, v):
645        return v[0]
646
647
648class PrepareSymbols(Transformer_InPlace):
649    def value(self, v):
650        v ,= v
651        if isinstance(v, Tree):
652            return v
653        elif v.type == 'RULE':
654            return NonTerminal(Str(v.value))
655        elif v.type == 'TERMINAL':
656            return Terminal(Str(v.value), filter_out=v.startswith('_'))
657        assert False
658
659
660def nr_deepcopy_tree(t):
661    """Deepcopy tree `t` without recursion"""
662    return Transformer_NonRecursive(False).transform(t)
663
664
665class Grammar:
666    def __init__(self, rule_defs, term_defs, ignore):
667        self.term_defs = term_defs
668        self.rule_defs = rule_defs
669        self.ignore = ignore
670
671    def compile(self, start, terminals_to_keep):
672        # We change the trees in-place (to support huge grammars)
673        # So deepcopy allows calling compile more than once.
674        term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs]
675        rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs]
676
677        # ===================
678        #  Compile Terminals
679        # ===================
680
681        # Convert terminal-trees to strings/regexps
682
683        for name, (term_tree, priority) in term_defs:
684            if term_tree is None:  # Terminal added through %declare
685                continue
686            expansions = list(term_tree.find_data('expansion'))
687            if len(expansions) == 1 and not expansions[0].children:
688                raise GrammarError("Terminals cannot be empty (%s)" % name)
689
690        transformer = PrepareLiterals() * TerminalTreeToPattern()
691        terminals = [TerminalDef(name, transformer.transform(term_tree), priority)
692                     for name, (term_tree, priority) in term_defs if term_tree]
693
694        # =================
695        #  Compile Rules
696        # =================
697
698        # 1. Pre-process terminals
699        anon_tokens_transf = PrepareAnonTerminals(terminals)
700        transformer = PrepareLiterals() * PrepareSymbols() * anon_tokens_transf  # Adds to terminals
701
702        # 2. Inline Templates
703
704        transformer *= ApplyTemplates(rule_defs)
705
706        # 3. Convert EBNF to BNF (and apply step 1 & 2)
707        ebnf_to_bnf = EBNF_to_BNF()
708        rules = []
709        i = 0
710        while i < len(rule_defs):  # We have to do it like this because rule_defs might grow due to templates
711            name, params, rule_tree, options = rule_defs[i]
712            i += 1
713            if len(params) != 0:  # Dont transform templates
714                continue
715            rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
716            ebnf_to_bnf.rule_options = rule_options
717            ebnf_to_bnf.prefix = name
718            anon_tokens_transf.rule_options = rule_options
719            tree = transformer.transform(rule_tree)
720            res = ebnf_to_bnf.transform(tree)
721            rules.append((name, res, options))
722        rules += ebnf_to_bnf.new_rules
723
724        assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"
725
726        # 4. Compile tree to Rule objects
727        rule_tree_to_text = RuleTreeToText()
728
729        simplify_rule = SimplifyRule_Visitor()
730        compiled_rules = []
731        for rule_content in rules:
732            name, tree, options = rule_content
733            simplify_rule.visit(tree)
734            expansions = rule_tree_to_text.transform(tree)
735
736            for i, (expansion, alias) in enumerate(expansions):
737                if alias and name.startswith('_'):
738                    raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias))
739
740                empty_indices = [x==_EMPTY for x in expansion]
741                if any(empty_indices):
742                    exp_options = copy(options) or RuleOptions()
743                    exp_options.empty_indices = empty_indices
744                    expansion = [x for x in expansion if x!=_EMPTY]
745                else:
746                    exp_options = options
747
748                for sym in expansion:
749                    assert isinstance(sym, Symbol)
750                    if sym.is_term and exp_options and exp_options.keep_all_tokens:
751                        sym.filter_out = False
752                rule = Rule(NonTerminal(name), expansion, i, alias, exp_options)
753                compiled_rules.append(rule)
754
755        # Remove duplicates of empty rules, throw error for non-empty duplicates
756        if len(set(compiled_rules)) != len(compiled_rules):
757            duplicates = classify(compiled_rules, lambda x: x)
758            for dups in duplicates.values():
759                if len(dups) > 1:
760                    if dups[0].expansion:
761                        raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)"
762                                           % ''.join('\n  * %s' % i for i in dups))
763
764                    # Empty rule; assert all other attributes are equal
765                    assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)
766
767            # Remove duplicates
768            compiled_rules = list(set(compiled_rules))
769
770        # Filter out unused rules
771        while True:
772            c = len(compiled_rules)
773            used_rules = {s for r in compiled_rules
774                            for s in r.expansion
775                            if isinstance(s, NonTerminal)
776                            and s != r.origin}
777            used_rules |= {NonTerminal(s) for s in start}
778            compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules)
779            for r in unused:
780                logger.debug("Unused rule: %s", r)
781            if len(compiled_rules) == c:
782                break
783
784        # Filter out unused terminals
785        if terminals_to_keep != '*':
786            used_terms = {t.name for r in compiled_rules
787                                 for t in r.expansion
788                                 if isinstance(t, Terminal)}
789            terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep)
790            if unused:
791                logger.debug("Unused terminals: %s", [t.name for t in unused])
792
793        return terminals, compiled_rules, self.ignore
794
795
796PackageResource = namedtuple('PackageResource', 'pkg_name path')
797
798
799class FromPackageLoader(object):
800    """
801    Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.
802    This allows them to be compatible even from within zip files.
803
804    Relative imports are handled, so you can just freely use them.
805
806    pkg_name: The name of the package. You can probably provide `__name__` most of the time
807    search_paths: All the path that will be search on absolute imports.
808    """
809    def __init__(self, pkg_name, search_paths=("", )):
810        self.pkg_name = pkg_name
811        self.search_paths = search_paths
812
813    def __repr__(self):
814        return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)
815
816    def __call__(self, base_path, grammar_path):
817        if base_path is None:
818            to_try = self.search_paths
819        else:
820            # Check whether or not the importing grammar was loaded by this module.
821            if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name:
822                # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway
823                raise IOError()
824            to_try = [base_path.path]
825        for path in to_try:
826            full_path = os.path.join(path, grammar_path)
827            try:
828                text = pkgutil.get_data(self.pkg_name, full_path)
829            except IOError:
830                continue
831            else:
832                return PackageResource(self.pkg_name, full_path), text.decode()
833        raise IOError()
834
835
836stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)
837
838
839
840def resolve_term_references(term_dict):
841    # TODO Solve with transitive closure (maybe)
842
843    while True:
844        changed = False
845        for name, token_tree in term_dict.items():
846            if token_tree is None:  # Terminal added through %declare
847                continue
848            for exp in token_tree.find_data('value'):
849                item ,= exp.children
850                if isinstance(item, Token):
851                    if item.type == 'RULE':
852                        raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name))
853                    if item.type == 'TERMINAL':
854                        try:
855                            term_value = term_dict[item]
856                        except KeyError:
857                            raise GrammarError("Terminal used but not defined: %s" % item)
858                        assert term_value is not None
859                        exp.children[0] = term_value
860                        changed = True
861        if not changed:
862            break
863
864    for name, term in term_dict.items():
865        if term:    # Not just declared
866            for child in term.children:
867                ids = [id(x) for x in child.iter_subtrees()]
868                if id(term) in ids:
869                    raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)
870
871
872def options_from_rule(name, params, *x):
873    if len(x) > 1:
874        priority, expansions = x
875        priority = int(priority)
876    else:
877        expansions ,= x
878        priority = None
879    params = [t.value for t in params.children] if params is not None else []  # For the grammar parser
880
881    keep_all_tokens = name.startswith('!')
882    name = name.lstrip('!')
883    expand1 = name.startswith('?')
884    name = name.lstrip('?')
885
886    return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority,
887                                                 template_source=(name if params else None))
888
889
890def symbols_from_strcase(expansion):
891    return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion]
892
893
894@inline_args
895class PrepareGrammar(Transformer_InPlace):
896    def terminal(self, name):
897        return name
898
899    def nonterminal(self, name):
900        return name
901
902
903def _find_used_symbols(tree):
904    assert tree.data == 'expansions'
905    return {t for x in tree.find_data('expansion')
906              for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}
907
908
909def _get_parser():
910    try:
911        return _get_parser.cache
912    except AttributeError:
913        terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
914
915        rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
916        rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o)
917                 for r, _p, xs, o in rules for i, x in enumerate(xs)]
918        callback = ParseTreeBuilder(rules, ST).create_callback()
919        import re
920        lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT'])
921        parser_conf = ParserConf(rules, callback, ['start'])
922        lexer_conf.lexer_type = 'standard'
923        parser_conf.parser_type = 'lalr'
924        _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None)
925        return _get_parser.cache
926
927GRAMMAR_ERRORS = [
928        ('Incorrect type of value', ['a: 1\n']),
929        ('Unclosed parenthesis', ['a: (\n']),
930        ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
931        ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
932        ('Illegal name for rules or terminals', ['Aa:\n']),
933        ('Alias expects lowercase name', ['a: -> "a"\n']),
934        ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']),
935        ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']),
936        ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']),
937        ('Terminal names cannot contain dots', ['A.B\n']),
938        ('Expecting rule or terminal definition', ['"a"\n']),
939        ('%import expects a name', ['%import "a"\n']),
940        ('%ignore expects a value', ['%ignore %import\n']),
941    ]
942
943def _translate_parser_exception(parse, e):
944        error = e.match_examples(parse, GRAMMAR_ERRORS, use_accepts=True)
945        if error:
946            return error
947        elif 'STRING' in e.expected:
948            return "Expecting a value"
949
950def _parse_grammar(text, name, start='start'):
951    try:
952        tree = _get_parser().parse(text + '\n', start)
953    except UnexpectedCharacters as e:
954        context = e.get_context(text)
955        raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
956                           (e.line, e.column, name, context))
957    except UnexpectedToken as e:
958        context = e.get_context(text)
959        error = _translate_parser_exception(_get_parser().parse, e)
960        if error:
961            raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))
962        raise
963
964    return PrepareGrammar().transform(tree)
965
966
967def _error_repr(error):
968    if isinstance(error, UnexpectedToken):
969        error2 = _translate_parser_exception(_get_parser().parse, error)
970        if error2:
971            return error2
972        expected = ', '.join(error.accepts or error.expected)
973        return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected)
974    else:
975        return str(error)
976
977def _search_interactive_parser(interactive_parser, predicate):
978    def expand(node):
979        path, p = node
980        for choice in p.choices():
981            t = Token(choice, '')
982            try:
983                new_p = p.feed_token(t)
984            except ParseError:    # Illegal
985                pass
986            else:
987                yield path + (choice,), new_p
988
989    for path, p in bfs_all_unique([((), interactive_parser)], expand):
990        if predicate(p):
991            return path, p
992
993def find_grammar_errors(text, start='start'):
994    errors = []
995    def on_error(e):
996        errors.append((e, _error_repr(e)))
997
998        # recover to a new line
999        token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices())
1000        for token_type in token_path:
1001            e.interactive_parser.feed_token(Token(token_type, ''))
1002        e.interactive_parser.feed_token(Token('_NL', '\n'))
1003        return True
1004
1005    _tree = _get_parser().parse(text + '\n', start, on_error=on_error)
1006
1007    errors_by_line = classify(errors, lambda e: e[0].line)
1008    errors = [el[0] for el in errors_by_line.values()]      # already sorted
1009
1010    for e in errors:
1011        e[0].interactive_parser = None
1012    return errors
1013
1014
1015def _get_mangle(prefix, aliases, base_mangle=None):
1016    def mangle(s):
1017        if s in aliases:
1018            s = aliases[s]
1019        else:
1020            if s[0] == '_':
1021                s = '_%s__%s' % (prefix, s[1:])
1022            else:
1023                s = '%s__%s' % (prefix, s)
1024        if base_mangle is not None:
1025            s = base_mangle(s)
1026        return s
1027    return mangle
1028
1029def _mangle_exp(exp, mangle):
1030    if mangle is None:
1031        return exp
1032    exp = deepcopy(exp) # TODO: is this needed
1033    for t in exp.iter_subtrees():
1034        for i, c in enumerate(t.children):
1035            if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
1036                t.children[i] = Token(c.type, mangle(c.value))
1037    return exp
1038
1039
1040
1041class GrammarBuilder:
1042    def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None):
1043        self.global_keep_all_tokens = global_keep_all_tokens
1044        self.import_paths = import_paths or []
1045        self.used_files = used_files or {}
1046
1047        self._definitions = {}
1048        self._ignore_names = []
1049
1050    def _is_term(self, name):
1051        # Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME`
1052        # Only the last part is the actual name, and the rest might contain mixed case
1053        return name.rpartition('__')[-1].isupper()
1054
1055    def _grammar_error(self, msg, *names):
1056        args = {}
1057        for i, name in enumerate(names, start=1):
1058            postfix = '' if i == 1 else str(i)
1059            args['name' + postfix] = name
1060            args['type' + postfix] = lowercase_type = ("rule", "terminal")[self._is_term(name)]
1061            args['Type' + postfix] = lowercase_type.title()
1062        raise GrammarError(msg.format(**args))
1063
1064    def _check_options(self, name, options):
1065        if self._is_term(name):
1066            if options is None:
1067                options = 1
1068            # if we don't use Integral here, we run into python2.7/python3 problems with long vs int
1069            elif not isinstance(options, Integral):
1070                raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),))
1071        else:
1072            if options is None:
1073                options = RuleOptions()
1074            elif not isinstance(options, RuleOptions):
1075                raise GrammarError("Rules require a RuleOptions instance as 'options'")
1076            if self.global_keep_all_tokens:
1077                options.keep_all_tokens = True
1078        return options
1079
1080
1081    def _define(self, name, exp, params=(), options=None, override=False):
1082        if name in self._definitions:
1083            if not override:
1084                self._grammar_error("{Type} '{name}' defined more than once", name)
1085        elif override:
1086            self._grammar_error("Cannot override a nonexisting {type} {name}", name)
1087
1088        if name.startswith('__'):
1089            self._grammar_error('Names starting with double-underscore are reserved (Error at {name})', name)
1090
1091        self._definitions[name] = (params, exp, self._check_options(name, options))
1092
1093    def _extend(self, name, exp, params=(), options=None):
1094        if name not in self._definitions:
1095            self._grammar_error("Can't extend {type} {name} as it wasn't defined before", name)
1096        if tuple(params) != tuple(self._definitions[name][0]):
1097            self._grammar_error("Cannot extend {type} with different parameters: {name}", name)
1098        # TODO: think about what to do with 'options'
1099        base = self._definitions[name][1]
1100
1101        assert isinstance(base, Tree) and base.data == 'expansions'
1102        base.children.insert(0, exp)
1103
1104    def _ignore(self, exp_or_name):
1105        if isinstance(exp_or_name, str):
1106            self._ignore_names.append(exp_or_name)
1107        else:
1108            assert isinstance(exp_or_name, Tree)
1109            t = exp_or_name
1110            if t.data == 'expansions' and len(t.children) == 1:
1111                t2 ,= t.children
1112                if t2.data=='expansion' and len(t2.children) == 1:
1113                    item ,= t2.children
1114                    if item.data == 'value':
1115                        item ,= item.children
1116                        if isinstance(item, Token) and item.type == 'TERMINAL':
1117                            self._ignore_names.append(item.value)
1118                            return
1119
1120            name = '__IGNORE_%d'% len(self._ignore_names)
1121            self._ignore_names.append(name)
1122            self._definitions[name] = ((), t, 1)
1123
1124    def _declare(self, *names):
1125        for name in names:
1126            self._define(name, None)
1127
1128    def _unpack_import(self, stmt, grammar_name):
1129        if len(stmt.children) > 1:
1130            path_node, arg1 = stmt.children
1131        else:
1132            path_node, = stmt.children
1133            arg1 = None
1134
1135        if isinstance(arg1, Tree):  # Multi import
1136            dotted_path = tuple(path_node.children)
1137            names = arg1.children
1138            aliases = dict(zip(names, names))  # Can't have aliased multi import, so all aliases will be the same as names
1139        else:  # Single import
1140            dotted_path = tuple(path_node.children[:-1])
1141            if not dotted_path:
1142                name ,= path_node.children
1143                raise GrammarError("Nothing was imported from grammar `%s`" % name)
1144            name = path_node.children[-1]  # Get name from dotted path
1145            aliases = {name.value: (arg1 or name).value}  # Aliases if exist
1146
1147        if path_node.data == 'import_lib':  # Import from library
1148            base_path = None
1149        else:  # Relative import
1150            if grammar_name == '<string>':  # Import relative to script file path if grammar is coded in script
1151                try:
1152                    base_file = os.path.abspath(sys.modules['__main__'].__file__)
1153                except AttributeError:
1154                    base_file = None
1155            else:
1156                base_file = grammar_name  # Import relative to grammar file path if external grammar file
1157            if base_file:
1158                if isinstance(base_file, PackageResource):
1159                    base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])
1160                else:
1161                    base_path = os.path.split(base_file)[0]
1162            else:
1163                base_path = os.path.abspath(os.path.curdir)
1164
1165        return dotted_path, base_path, aliases
1166
1167    def _unpack_definition(self, tree, mangle):
1168        if tree.data == 'rule':
1169            name, params, exp, opts = options_from_rule(*tree.children)
1170        else:
1171            name = tree.children[0].value
1172            params = ()     # TODO terminal templates
1173            opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority
1174            exp = tree.children[-1]
1175
1176        if mangle is not None:
1177            params = tuple(mangle(p) for p in params)
1178            name = mangle(name)
1179
1180        exp = _mangle_exp(exp, mangle)
1181        return name, exp, params, opts
1182
1183
1184    def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None):
1185        tree = _parse_grammar(grammar_text, grammar_name)
1186
1187        imports = {}
1188        for stmt in tree.children:
1189            if stmt.data == 'import':
1190                dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)
1191                try:
1192                    import_base_path, import_aliases = imports[dotted_path]
1193                    assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
1194                    import_aliases.update(aliases)
1195                except KeyError:
1196                    imports[dotted_path] = base_path, aliases
1197
1198        for dotted_path, (base_path, aliases) in imports.items():
1199            self.do_import(dotted_path, base_path, aliases, mangle)
1200
1201        for stmt in tree.children:
1202            if stmt.data in ('term', 'rule'):
1203                self._define(*self._unpack_definition(stmt, mangle))
1204            elif stmt.data == 'override':
1205                r ,= stmt.children
1206                self._define(*self._unpack_definition(r, mangle), override=True)
1207            elif stmt.data == 'extend':
1208                r ,= stmt.children
1209                self._extend(*self._unpack_definition(r, mangle))
1210            elif stmt.data == 'ignore':
1211                # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar
1212                if mangle is None:
1213                    self._ignore(*stmt.children)
1214            elif stmt.data == 'declare':
1215                names = [t.value for t in stmt.children]
1216                if mangle is None:
1217                    self._declare(*names)
1218                else:
1219                    self._declare(*map(mangle, names))
1220            elif stmt.data == 'import':
1221                pass
1222            else:
1223                assert False, stmt
1224
1225
1226        term_defs = { name: exp
1227            for name, (_params, exp, _options) in self._definitions.items()
1228            if self._is_term(name)
1229        }
1230        resolve_term_references(term_defs)
1231
1232
1233    def _remove_unused(self, used):
1234        def rule_dependencies(symbol):
1235            if self._is_term(symbol):
1236                return []
1237            try:
1238                params, tree,_ = self._definitions[symbol]
1239            except KeyError:
1240                return []
1241            return _find_used_symbols(tree) - set(params)
1242
1243        _used = set(bfs(used, rule_dependencies))
1244        self._definitions = {k: v for k, v in self._definitions.items() if k in _used}
1245
1246
1247    def do_import(self, dotted_path, base_path, aliases, base_mangle=None):
1248        assert dotted_path
1249        mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)
1250        grammar_path = os.path.join(*dotted_path) + EXT
1251        to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
1252        for source in to_try:
1253            try:
1254                if callable(source):
1255                    joined_path, text = source(base_path, grammar_path)
1256                else:
1257                    joined_path = os.path.join(source, grammar_path)
1258                    with open(joined_path, encoding='utf8') as f:
1259                        text = f.read()
1260            except IOError:
1261                continue
1262            else:
1263                h = hashlib.md5(text.encode('utf8')).hexdigest()
1264                if self.used_files.get(joined_path, h) != h:
1265                    raise RuntimeError("Grammar file was changed during importing")
1266                self.used_files[joined_path] = h
1267
1268                gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files)
1269                gb.load_grammar(text, joined_path, mangle)
1270                gb._remove_unused(map(mangle, aliases))
1271                for name in gb._definitions:
1272                    if name in self._definitions:
1273                        raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path))
1274
1275                self._definitions.update(**gb._definitions)
1276                break
1277        else:
1278            # Search failed. Make Python throw a nice error.
1279            open(grammar_path, encoding='utf8')
1280            assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)
1281
1282
1283    def validate(self):
1284        for name, (params, exp, _options) in self._definitions.items():
1285            for i, p in enumerate(params):
1286                if p in self._definitions:
1287                    raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
1288                if p in params[:i]:
1289                    raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))
1290
1291            if exp is None: # Remaining checks don't apply to abstract rules/terminals
1292                continue
1293
1294            for temp in exp.find_data('template_usage'):
1295                sym = temp.children[0]
1296                args = temp.children[1:]
1297                if sym not in params:
1298                    if sym not in self._definitions:
1299                        self._grammar_error("Template '%s' used but not defined (in {type} {name})" % sym, name)
1300                    if len(args) != len(self._definitions[sym][0]):
1301                        expected, actual = len(self._definitions[sym][0]), len(args)
1302                        self._grammar_error("Wrong number of template arguments used for {name} "
1303                                            "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name)
1304
1305            for sym in _find_used_symbols(exp):
1306                if sym not in self._definitions and sym not in params:
1307                    self._grammar_error("{Type} '{name}' used but not defined (in {type2} {name2})", sym, name)
1308
1309        if not set(self._definitions).issuperset(self._ignore_names):
1310            raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))
1311
1312    def build(self):
1313        self.validate()
1314        rule_defs = []
1315        term_defs = []
1316        for name, (params, exp, options) in self._definitions.items():
1317            if self._is_term(name):
1318                assert len(params) == 0
1319                term_defs.append((name, (exp, options)))
1320            else:
1321                rule_defs.append((name, params, exp, options))
1322        # resolve_term_references(term_defs)
1323        return Grammar(rule_defs, term_defs, self._ignore_names)
1324
1325
1326def verify_used_files(file_hashes):
1327    for path, old in file_hashes.items():
1328        text = None
1329        if isinstance(path, str) and os.path.exists(path):
1330            with open(path, encoding='utf8') as f:
1331                text = f.read()
1332        elif isinstance(path, PackageResource):
1333            with suppress(IOError):
1334                text = pkgutil.get_data(*path).decode('utf-8')
1335        if text is None: # We don't know how to load the path. ignore it.
1336            continue
1337
1338        current = hashlib.md5(text.encode()).hexdigest()
1339        if old != current:
1340            logger.info("File %r changed, rebuilding Parser" % path)
1341            return False
1342    return True
1343
1344def list_grammar_imports(grammar, import_paths=[]):
1345    "Returns a list of paths to the lark grammars imported by the given grammar (recursively)"
1346    builder = GrammarBuilder(False, import_paths)
1347    builder.load_grammar(grammar, '<string>')
1348    return list(builder.used_files.keys())
1349
1350def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
1351    builder = GrammarBuilder(global_keep_all_tokens, import_paths)
1352    builder.load_grammar(grammar, source)
1353    return builder.build(), builder.used_files
1354