1"""Parses and creates Grammar objects""" 2import hashlib 3import os.path 4import sys 5from collections import namedtuple 6from copy import copy, deepcopy 7from io import open 8import pkgutil 9from ast import literal_eval 10from numbers import Integral 11 12from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors 13from .lexer import Token, TerminalDef, PatternStr, PatternRE 14 15from .parse_tree_builder import ParseTreeBuilder 16from .parser_frontends import ParsingFrontend 17from .common import LexerConf, ParserConf 18from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol 19from .utils import classify, suppress, dedup_list, Str 20from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError 21 22from .tree import Tree, SlottedTree as ST 23from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive 24inline_args = v_args(inline=True) 25 26__path__ = os.path.dirname(__file__) 27IMPORT_PATHS = ['grammars'] 28 29EXT = '.lark' 30 31_RE_FLAGS = 'imslux' 32 33_EMPTY = Symbol('__empty__') 34 35_TERMINAL_NAMES = { 36 '.' : 'DOT', 37 ',' : 'COMMA', 38 ':' : 'COLON', 39 ';' : 'SEMICOLON', 40 '+' : 'PLUS', 41 '-' : 'MINUS', 42 '*' : 'STAR', 43 '/' : 'SLASH', 44 '\\' : 'BACKSLASH', 45 '|' : 'VBAR', 46 '?' : 'QMARK', 47 '!' : 'BANG', 48 '@' : 'AT', 49 '#' : 'HASH', 50 '$' : 'DOLLAR', 51 '%' : 'PERCENT', 52 '^' : 'CIRCUMFLEX', 53 '&' : 'AMPERSAND', 54 '_' : 'UNDERSCORE', 55 '<' : 'LESSTHAN', 56 '>' : 'MORETHAN', 57 '=' : 'EQUAL', 58 '"' : 'DBLQUOTE', 59 '\'' : 'QUOTE', 60 '`' : 'BACKQUOTE', 61 '~' : 'TILDE', 62 '(' : 'LPAR', 63 ')' : 'RPAR', 64 '{' : 'LBRACE', 65 '}' : 'RBRACE', 66 '[' : 'LSQB', 67 ']' : 'RSQB', 68 '\n' : 'NEWLINE', 69 '\r\n' : 'CRLF', 70 '\t' : 'TAB', 71 ' ' : 'SPACE', 72} 73 74# Grammar Parser 75TERMINALS = { 76 '_LPAR': r'\(', 77 '_RPAR': r'\)', 78 '_LBRA': r'\[', 79 '_RBRA': r'\]', 80 '_LBRACE': r'\{', 81 '_RBRACE': r'\}', 82 'OP': '[+*]|[?](?![a-z])', 83 '_COLON': ':', 84 '_COMMA': ',', 85 '_OR': r'\|', 86 '_DOT': r'\.(?!\.)', 87 '_DOTDOT': r'\.\.', 88 'TILDE': '~', 89 'RULE': '!?[_?]?[a-z][_a-z0-9]*', 90 'TERMINAL': '_?[A-Z][_A-Z0-9]*', 91 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', 92 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS, 93 '_NL': r'(\r?\n)+\s*', 94 '_NL_OR': r'(\r?\n)+\s*\|', 95 'WS': r'[ \t]+', 96 'COMMENT': r'\s*//[^\n]*', 97 '_TO': '->', 98 '_IGNORE': r'%ignore', 99 '_OVERRIDE': r'%override', 100 '_DECLARE': r'%declare', 101 '_EXTEND': r'%extend', 102 '_IMPORT': r'%import', 103 'NUMBER': r'[+-]?\d+', 104} 105 106RULES = { 107 'start': ['_list'], 108 '_list': ['_item', '_list _item'], 109 '_item': ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'], 110 111 'rule': ['RULE template_params _COLON expansions _NL', 112 'RULE template_params _DOT NUMBER _COLON expansions _NL'], 113 'template_params': ['_LBRACE _template_params _RBRACE', 114 ''], 115 '_template_params': ['RULE', 116 '_template_params _COMMA RULE'], 117 'expansions': ['_expansions'], 118 '_expansions': ['alias', 119 '_expansions _OR alias', 120 '_expansions _NL_OR alias'], 121 122 '?alias': ['expansion _TO RULE', 'expansion'], 123 'expansion': ['_expansion'], 124 125 '_expansion': ['', '_expansion expr'], 126 127 '?expr': ['atom', 128 'atom OP', 129 'atom TILDE NUMBER', 130 'atom TILDE NUMBER _DOTDOT NUMBER', 131 ], 132 133 '?atom': ['_LPAR expansions _RPAR', 134 'maybe', 135 'value'], 136 137 'value': ['terminal', 138 'nonterminal', 139 'literal', 140 'range', 141 'template_usage'], 142 143 'terminal': ['TERMINAL'], 144 'nonterminal': ['RULE'], 145 146 '?name': ['RULE', 'TERMINAL'], 147 148 'maybe': ['_LBRA expansions _RBRA'], 149 'range': ['STRING _DOTDOT STRING'], 150 151 'template_usage': ['RULE _LBRACE _template_args _RBRACE'], 152 '_template_args': ['value', 153 '_template_args _COMMA value'], 154 155 'term': ['TERMINAL _COLON expansions _NL', 156 'TERMINAL _DOT NUMBER _COLON expansions _NL'], 157 'override': ['_OVERRIDE rule', 158 '_OVERRIDE term'], 159 'extend': ['_EXTEND rule', 160 '_EXTEND term'], 161 'ignore': ['_IGNORE expansions _NL'], 162 'declare': ['_DECLARE _declare_args _NL'], 163 'import': ['_IMPORT _import_path _NL', 164 '_IMPORT _import_path _LPAR name_list _RPAR _NL', 165 '_IMPORT _import_path _TO name _NL'], 166 167 '_import_path': ['import_lib', 'import_rel'], 168 'import_lib': ['_import_args'], 169 'import_rel': ['_DOT _import_args'], 170 '_import_args': ['name', '_import_args _DOT name'], 171 172 'name_list': ['_name_list'], 173 '_name_list': ['name', '_name_list _COMMA name'], 174 175 '_declare_args': ['name', '_declare_args name'], 176 'literal': ['REGEXP', 'STRING'], 177} 178 179 180# Value 5 keeps the number of states in the lalr parser somewhat minimal 181# It isn't optimal, but close to it. See PR #949 182SMALL_FACTOR_THRESHOLD = 5 183# The Threshold whether repeat via ~ are split up into different rules 184# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low, 185# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts. 186# (See PR #949) 187REPEAT_BREAK_THRESHOLD = 50 188 189 190@inline_args 191class EBNF_to_BNF(Transformer_InPlace): 192 def __init__(self): 193 self.new_rules = [] 194 self.rules_cache = {} 195 self.prefix = 'anon' 196 self.i = 0 197 self.rule_options = None 198 199 def _name_rule(self, inner): 200 new_name = '__%s_%s_%d' % (self.prefix, inner, self.i) 201 self.i += 1 202 return new_name 203 204 def _add_rule(self, key, name, expansions): 205 t = NonTerminal(name) 206 self.new_rules.append((name, expansions, self.rule_options)) 207 self.rules_cache[key] = t 208 return t 209 210 def _add_recurse_rule(self, type_, expr): 211 try: 212 return self.rules_cache[expr] 213 except KeyError: 214 new_name = self._name_rule(type_) 215 t = NonTerminal(new_name) 216 tree = ST('expansions', [ 217 ST('expansion', [expr]), 218 ST('expansion', [t, expr]) 219 ]) 220 return self._add_rule(expr, new_name, tree) 221 222 def _add_repeat_rule(self, a, b, target, atom): 223 """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times. 224 225 When called recursively (into target), it repeats atom for x(n) times, where: 226 x(0) = 1 227 x(n) = a(n) * x(n-1) + b 228 229 Example rule when a=3, b=4: 230 231 new_rule: target target target atom atom atom atom 232 233 """ 234 key = (a, b, target, atom) 235 try: 236 return self.rules_cache[key] 237 except KeyError: 238 new_name = self._name_rule('repeat_a%d_b%d' % (a, b)) 239 tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)]) 240 return self._add_rule(key, new_name, tree) 241 242 def _add_repeat_opt_rule(self, a, b, target, target_opt, atom): 243 """Creates a rule that matches atom 0 to (a*n+b)-1 times. 244 245 When target matches n times atom, and target_opt 0 to n-1 times target_opt, 246 247 First we generate target * i followed by target_opt, for i from 0 to a-1 248 These match 0 to n*a - 1 times atom 249 250 Then we generate target * a followed by atom * i, for i from 0 to b-1 251 These match n*a to n*a + b-1 times atom 252 253 The created rule will not have any shift/reduce conflicts so that it can be used with lalr 254 255 Example rule when a=3, b=4: 256 257 new_rule: target_opt 258 | target target_opt 259 | target target target_opt 260 261 | target target target 262 | target target target atom 263 | target target target atom atom 264 | target target target atom atom atom 265 266 """ 267 key = (a, b, target, atom, "opt") 268 try: 269 return self.rules_cache[key] 270 except KeyError: 271 new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b)) 272 tree = ST('expansions', [ 273 ST('expansion', [target]*i + [target_opt]) for i in range(a) 274 ] + [ 275 ST('expansion', [target]*a + [atom]*i) for i in range(b) 276 ]) 277 return self._add_rule(key, new_name, tree) 278 279 def _generate_repeats(self, rule, mn, mx): 280 """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times. 281 """ 282 # For a small number of repeats, we can take the naive approach 283 if mx < REPEAT_BREAK_THRESHOLD: 284 return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)]) 285 286 # For large repeat values, we break the repetition into sub-rules. 287 # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``. 288 # We then use small_factors to split up mn and diff up into values [(a, b), ...] 289 # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt 290 # to generate a complete rule/expression that matches the corresponding number of repeats 291 mn_target = rule 292 for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD): 293 mn_target = self._add_repeat_rule(a, b, mn_target, rule) 294 if mx == mn: 295 return mn_target 296 297 diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less 298 diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD) 299 diff_target = rule # Match rule 1 times 300 diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times) 301 for a, b in diff_factors[:-1]: 302 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule) 303 diff_target = self._add_repeat_rule(a, b, diff_target, rule) 304 305 a, b = diff_factors[-1] 306 diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule) 307 308 return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])]) 309 310 def expr(self, rule, op, *args): 311 if op.value == '?': 312 empty = ST('expansion', []) 313 return ST('expansions', [rule, empty]) 314 elif op.value == '+': 315 # a : b c+ d 316 # --> 317 # a : b _c d 318 # _c : _c c | c; 319 return self._add_recurse_rule('plus', rule) 320 elif op.value == '*': 321 # a : b c* d 322 # --> 323 # a : b _c? d 324 # _c : _c c | c; 325 new_name = self._add_recurse_rule('star', rule) 326 return ST('expansions', [new_name, ST('expansion', [])]) 327 elif op.value == '~': 328 if len(args) == 1: 329 mn = mx = int(args[0]) 330 else: 331 mn, mx = map(int, args) 332 if mx < mn or mn < 0: 333 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx)) 334 335 return self._generate_repeats(rule, mn, mx) 336 337 assert False, op 338 339 def maybe(self, rule): 340 keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens 341 342 def will_not_get_removed(sym): 343 if isinstance(sym, NonTerminal): 344 return not sym.name.startswith('_') 345 if isinstance(sym, Terminal): 346 return keep_all_tokens or not sym.filter_out 347 assert False 348 349 if any(rule.scan_values(will_not_get_removed)): 350 empty = _EMPTY 351 else: 352 empty = ST('expansion', []) 353 354 return ST('expansions', [rule, empty]) 355 356 357class SimplifyRule_Visitor(Visitor): 358 359 @staticmethod 360 def _flatten(tree): 361 while tree.expand_kids_by_data(tree.data): 362 pass 363 364 def expansion(self, tree): 365 # rules_list unpacking 366 # a : b (c|d) e 367 # --> 368 # a : b c e | b d e 369 # 370 # In AST terms: 371 # expansion(b, expansions(c, d), e) 372 # --> 373 # expansions( expansion(b, c, e), expansion(b, d, e) ) 374 375 self._flatten(tree) 376 377 for i, child in enumerate(tree.children): 378 if isinstance(child, Tree) and child.data == 'expansions': 379 tree.data = 'expansions' 380 tree.children = [self.visit(ST('expansion', [option if i == j else other 381 for j, other in enumerate(tree.children)])) 382 for option in dedup_list(child.children)] 383 self._flatten(tree) 384 break 385 386 def alias(self, tree): 387 rule, alias_name = tree.children 388 if rule.data == 'expansions': 389 aliases = [] 390 for child in tree.children[0].children: 391 aliases.append(ST('alias', [child, alias_name])) 392 tree.data = 'expansions' 393 tree.children = aliases 394 395 def expansions(self, tree): 396 self._flatten(tree) 397 # Ensure all children are unique 398 if len(set(tree.children)) != len(tree.children): 399 tree.children = dedup_list(tree.children) # dedup is expensive, so try to minimize its use 400 401 402class RuleTreeToText(Transformer): 403 def expansions(self, x): 404 return x 405 406 def expansion(self, symbols): 407 return symbols, None 408 409 def alias(self, x): 410 (expansion, _alias), alias = x 411 assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed 412 return expansion, alias.value 413 414 415class PrepareAnonTerminals(Transformer_InPlace): 416 """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them""" 417 418 def __init__(self, terminals): 419 self.terminals = terminals 420 self.term_set = {td.name for td in self.terminals} 421 self.term_reverse = {td.pattern: td for td in terminals} 422 self.i = 0 423 self.rule_options = None 424 425 @inline_args 426 def pattern(self, p): 427 value = p.value 428 if p in self.term_reverse and p.flags != self.term_reverse[p].pattern.flags: 429 raise GrammarError(u'Conflicting flags for the same terminal: %s' % p) 430 431 term_name = None 432 433 if isinstance(p, PatternStr): 434 try: 435 # If already defined, use the user-defined terminal name 436 term_name = self.term_reverse[p].name 437 except KeyError: 438 # Try to assign an indicative anon-terminal name 439 try: 440 term_name = _TERMINAL_NAMES[value] 441 except KeyError: 442 if value and is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set: 443 term_name = value.upper() 444 445 if term_name in self.term_set: 446 term_name = None 447 448 elif isinstance(p, PatternRE): 449 if p in self.term_reverse: # Kind of a weird placement.name 450 term_name = self.term_reverse[p].name 451 else: 452 assert False, p 453 454 if term_name is None: 455 term_name = '__ANON_%d' % self.i 456 self.i += 1 457 458 if term_name not in self.term_set: 459 assert p not in self.term_reverse 460 self.term_set.add(term_name) 461 termdef = TerminalDef(term_name, p) 462 self.term_reverse[p] = termdef 463 self.terminals.append(termdef) 464 465 filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr) 466 467 return Terminal(term_name, filter_out=filter_out) 468 469 470class _ReplaceSymbols(Transformer_InPlace): 471 """Helper for ApplyTemplates""" 472 473 def __init__(self): 474 self.names = {} 475 476 def value(self, c): 477 if len(c) == 1 and isinstance(c[0], Token) and c[0].value in self.names: 478 return self.names[c[0].value] 479 return self.__default__('value', c, None) 480 481 def template_usage(self, c): 482 if c[0] in self.names: 483 return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None) 484 return self.__default__('template_usage', c, None) 485 486 487class ApplyTemplates(Transformer_InPlace): 488 """Apply the templates, creating new rules that represent the used templates""" 489 490 def __init__(self, rule_defs): 491 self.rule_defs = rule_defs 492 self.replacer = _ReplaceSymbols() 493 self.created_templates = set() 494 495 def template_usage(self, c): 496 name = c[0] 497 args = c[1:] 498 result_name = "%s{%s}" % (name, ",".join(a.name for a in args)) 499 if result_name not in self.created_templates: 500 self.created_templates.add(result_name) 501 (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name) 502 assert len(params) == len(args), args 503 result_tree = deepcopy(tree) 504 self.replacer.names = dict(zip(params, args)) 505 self.replacer.transform(result_tree) 506 self.rule_defs.append((result_name, [], result_tree, deepcopy(options))) 507 return NonTerminal(result_name) 508 509 510def _rfind(s, choices): 511 return max(s.rfind(c) for c in choices) 512 513 514def eval_escaping(s): 515 w = '' 516 i = iter(s) 517 for n in i: 518 w += n 519 if n == '\\': 520 try: 521 n2 = next(i) 522 except StopIteration: 523 raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s) 524 if n2 == '\\': 525 w += '\\\\' 526 elif n2 not in 'Uuxnftr': 527 w += '\\' 528 w += n2 529 w = w.replace('\\"', '"').replace("'", "\\'") 530 531 to_eval = "u'''%s'''" % w 532 try: 533 s = literal_eval(to_eval) 534 except SyntaxError as e: 535 raise GrammarError(s, e) 536 537 return s 538 539 540def _literal_to_pattern(literal): 541 v = literal.value 542 flag_start = _rfind(v, '/"')+1 543 assert flag_start > 0 544 flags = v[flag_start:] 545 assert all(f in _RE_FLAGS for f in flags), flags 546 547 if literal.type == 'STRING' and '\n' in v: 548 raise GrammarError('You cannot put newlines in string literals') 549 550 if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags: 551 raise GrammarError('You can only use newlines in regular expressions ' 552 'with the `x` (verbose) flag') 553 554 v = v[:flag_start] 555 assert v[0] == v[-1] and v[0] in '"/' 556 x = v[1:-1] 557 558 s = eval_escaping(x) 559 560 if s == "": 561 raise GrammarError("Empty terminals are not allowed (%s)" % literal) 562 563 if literal.type == 'STRING': 564 s = s.replace('\\\\', '\\') 565 return PatternStr(s, flags, raw=literal.value) 566 elif literal.type == 'REGEXP': 567 return PatternRE(s, flags, raw=literal.value) 568 else: 569 assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]' 570 571 572@inline_args 573class PrepareLiterals(Transformer_InPlace): 574 def literal(self, literal): 575 return ST('pattern', [_literal_to_pattern(literal)]) 576 577 def range(self, start, end): 578 assert start.type == end.type == 'STRING' 579 start = start.value[1:-1] 580 end = end.value[1:-1] 581 assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1 582 regexp = '[%s-%s]' % (start, end) 583 return ST('pattern', [PatternRE(regexp)]) 584 585 586def _make_joined_pattern(regexp, flags_set): 587 # In Python 3.6, a new syntax for flags was introduced, that allows us to restrict the scope 588 # of flags to a specific regexp group. We are already using it in `lexer.Pattern._get_flags` 589 # However, for prior Python versions, we still need to use global flags, so we have to make sure 590 # that there are no flag collisions when we merge several terminals. 591 flags = () 592 if not Py36: 593 if len(flags_set) > 1: 594 raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!") 595 elif len(flags_set) == 1: 596 flags ,= flags_set 597 598 return PatternRE(regexp, flags) 599 600class TerminalTreeToPattern(Transformer_NonRecursive): 601 def pattern(self, ps): 602 p ,= ps 603 return p 604 605 def expansion(self, items): 606 assert items 607 if len(items) == 1: 608 return items[0] 609 610 pattern = ''.join(i.to_regexp() for i in items) 611 return _make_joined_pattern(pattern, {i.flags for i in items}) 612 613 def expansions(self, exps): 614 if len(exps) == 1: 615 return exps[0] 616 617 # Do a bit of sorting to make sure that the longest option is returned 618 # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match) 619 exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value))) 620 621 pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps)) 622 return _make_joined_pattern(pattern, {i.flags for i in exps}) 623 624 def expr(self, args): 625 inner, op = args[:2] 626 if op == '~': 627 if len(args) == 3: 628 op = "{%d}" % int(args[2]) 629 else: 630 mn, mx = map(int, args[2:]) 631 if mx < mn: 632 raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx)) 633 op = "{%d,%d}" % (mn, mx) 634 else: 635 assert len(args) == 2 636 return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) 637 638 def maybe(self, expr): 639 return self.expr(expr + ['?']) 640 641 def alias(self, t): 642 raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)") 643 644 def value(self, v): 645 return v[0] 646 647 648class PrepareSymbols(Transformer_InPlace): 649 def value(self, v): 650 v ,= v 651 if isinstance(v, Tree): 652 return v 653 elif v.type == 'RULE': 654 return NonTerminal(Str(v.value)) 655 elif v.type == 'TERMINAL': 656 return Terminal(Str(v.value), filter_out=v.startswith('_')) 657 assert False 658 659 660def nr_deepcopy_tree(t): 661 """Deepcopy tree `t` without recursion""" 662 return Transformer_NonRecursive(False).transform(t) 663 664 665class Grammar: 666 def __init__(self, rule_defs, term_defs, ignore): 667 self.term_defs = term_defs 668 self.rule_defs = rule_defs 669 self.ignore = ignore 670 671 def compile(self, start, terminals_to_keep): 672 # We change the trees in-place (to support huge grammars) 673 # So deepcopy allows calling compile more than once. 674 term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs] 675 rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs] 676 677 # =================== 678 # Compile Terminals 679 # =================== 680 681 # Convert terminal-trees to strings/regexps 682 683 for name, (term_tree, priority) in term_defs: 684 if term_tree is None: # Terminal added through %declare 685 continue 686 expansions = list(term_tree.find_data('expansion')) 687 if len(expansions) == 1 and not expansions[0].children: 688 raise GrammarError("Terminals cannot be empty (%s)" % name) 689 690 transformer = PrepareLiterals() * TerminalTreeToPattern() 691 terminals = [TerminalDef(name, transformer.transform(term_tree), priority) 692 for name, (term_tree, priority) in term_defs if term_tree] 693 694 # ================= 695 # Compile Rules 696 # ================= 697 698 # 1. Pre-process terminals 699 anon_tokens_transf = PrepareAnonTerminals(terminals) 700 transformer = PrepareLiterals() * PrepareSymbols() * anon_tokens_transf # Adds to terminals 701 702 # 2. Inline Templates 703 704 transformer *= ApplyTemplates(rule_defs) 705 706 # 3. Convert EBNF to BNF (and apply step 1 & 2) 707 ebnf_to_bnf = EBNF_to_BNF() 708 rules = [] 709 i = 0 710 while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates 711 name, params, rule_tree, options = rule_defs[i] 712 i += 1 713 if len(params) != 0: # Dont transform templates 714 continue 715 rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None 716 ebnf_to_bnf.rule_options = rule_options 717 ebnf_to_bnf.prefix = name 718 anon_tokens_transf.rule_options = rule_options 719 tree = transformer.transform(rule_tree) 720 res = ebnf_to_bnf.transform(tree) 721 rules.append((name, res, options)) 722 rules += ebnf_to_bnf.new_rules 723 724 assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" 725 726 # 4. Compile tree to Rule objects 727 rule_tree_to_text = RuleTreeToText() 728 729 simplify_rule = SimplifyRule_Visitor() 730 compiled_rules = [] 731 for rule_content in rules: 732 name, tree, options = rule_content 733 simplify_rule.visit(tree) 734 expansions = rule_tree_to_text.transform(tree) 735 736 for i, (expansion, alias) in enumerate(expansions): 737 if alias and name.startswith('_'): 738 raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias)) 739 740 empty_indices = [x==_EMPTY for x in expansion] 741 if any(empty_indices): 742 exp_options = copy(options) or RuleOptions() 743 exp_options.empty_indices = empty_indices 744 expansion = [x for x in expansion if x!=_EMPTY] 745 else: 746 exp_options = options 747 748 for sym in expansion: 749 assert isinstance(sym, Symbol) 750 if sym.is_term and exp_options and exp_options.keep_all_tokens: 751 sym.filter_out = False 752 rule = Rule(NonTerminal(name), expansion, i, alias, exp_options) 753 compiled_rules.append(rule) 754 755 # Remove duplicates of empty rules, throw error for non-empty duplicates 756 if len(set(compiled_rules)) != len(compiled_rules): 757 duplicates = classify(compiled_rules, lambda x: x) 758 for dups in duplicates.values(): 759 if len(dups) > 1: 760 if dups[0].expansion: 761 raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)" 762 % ''.join('\n * %s' % i for i in dups)) 763 764 # Empty rule; assert all other attributes are equal 765 assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups) 766 767 # Remove duplicates 768 compiled_rules = list(set(compiled_rules)) 769 770 # Filter out unused rules 771 while True: 772 c = len(compiled_rules) 773 used_rules = {s for r in compiled_rules 774 for s in r.expansion 775 if isinstance(s, NonTerminal) 776 and s != r.origin} 777 used_rules |= {NonTerminal(s) for s in start} 778 compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) 779 for r in unused: 780 logger.debug("Unused rule: %s", r) 781 if len(compiled_rules) == c: 782 break 783 784 # Filter out unused terminals 785 if terminals_to_keep != '*': 786 used_terms = {t.name for r in compiled_rules 787 for t in r.expansion 788 if isinstance(t, Terminal)} 789 terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep) 790 if unused: 791 logger.debug("Unused terminals: %s", [t.name for t in unused]) 792 793 return terminals, compiled_rules, self.ignore 794 795 796PackageResource = namedtuple('PackageResource', 'pkg_name path') 797 798 799class FromPackageLoader(object): 800 """ 801 Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. 802 This allows them to be compatible even from within zip files. 803 804 Relative imports are handled, so you can just freely use them. 805 806 pkg_name: The name of the package. You can probably provide `__name__` most of the time 807 search_paths: All the path that will be search on absolute imports. 808 """ 809 def __init__(self, pkg_name, search_paths=("", )): 810 self.pkg_name = pkg_name 811 self.search_paths = search_paths 812 813 def __repr__(self): 814 return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) 815 816 def __call__(self, base_path, grammar_path): 817 if base_path is None: 818 to_try = self.search_paths 819 else: 820 # Check whether or not the importing grammar was loaded by this module. 821 if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name: 822 # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway 823 raise IOError() 824 to_try = [base_path.path] 825 for path in to_try: 826 full_path = os.path.join(path, grammar_path) 827 try: 828 text = pkgutil.get_data(self.pkg_name, full_path) 829 except IOError: 830 continue 831 else: 832 return PackageResource(self.pkg_name, full_path), text.decode() 833 raise IOError() 834 835 836stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) 837 838 839 840def resolve_term_references(term_dict): 841 # TODO Solve with transitive closure (maybe) 842 843 while True: 844 changed = False 845 for name, token_tree in term_dict.items(): 846 if token_tree is None: # Terminal added through %declare 847 continue 848 for exp in token_tree.find_data('value'): 849 item ,= exp.children 850 if isinstance(item, Token): 851 if item.type == 'RULE': 852 raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name)) 853 if item.type == 'TERMINAL': 854 try: 855 term_value = term_dict[item] 856 except KeyError: 857 raise GrammarError("Terminal used but not defined: %s" % item) 858 assert term_value is not None 859 exp.children[0] = term_value 860 changed = True 861 if not changed: 862 break 863 864 for name, term in term_dict.items(): 865 if term: # Not just declared 866 for child in term.children: 867 ids = [id(x) for x in child.iter_subtrees()] 868 if id(term) in ids: 869 raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name) 870 871 872def options_from_rule(name, params, *x): 873 if len(x) > 1: 874 priority, expansions = x 875 priority = int(priority) 876 else: 877 expansions ,= x 878 priority = None 879 params = [t.value for t in params.children] if params is not None else [] # For the grammar parser 880 881 keep_all_tokens = name.startswith('!') 882 name = name.lstrip('!') 883 expand1 = name.startswith('?') 884 name = name.lstrip('?') 885 886 return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority, 887 template_source=(name if params else None)) 888 889 890def symbols_from_strcase(expansion): 891 return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] 892 893 894@inline_args 895class PrepareGrammar(Transformer_InPlace): 896 def terminal(self, name): 897 return name 898 899 def nonterminal(self, name): 900 return name 901 902 903def _find_used_symbols(tree): 904 assert tree.data == 'expansions' 905 return {t for x in tree.find_data('expansion') 906 for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} 907 908 909def _get_parser(): 910 try: 911 return _get_parser.cache 912 except AttributeError: 913 terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] 914 915 rules = [options_from_rule(name, None, x) for name, x in RULES.items()] 916 rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) 917 for r, _p, xs, o in rules for i, x in enumerate(xs)] 918 callback = ParseTreeBuilder(rules, ST).create_callback() 919 import re 920 lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) 921 parser_conf = ParserConf(rules, callback, ['start']) 922 lexer_conf.lexer_type = 'standard' 923 parser_conf.parser_type = 'lalr' 924 _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None) 925 return _get_parser.cache 926 927GRAMMAR_ERRORS = [ 928 ('Incorrect type of value', ['a: 1\n']), 929 ('Unclosed parenthesis', ['a: (\n']), 930 ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), 931 ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), 932 ('Illegal name for rules or terminals', ['Aa:\n']), 933 ('Alias expects lowercase name', ['a: -> "a"\n']), 934 ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']), 935 ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']), 936 ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']), 937 ('Terminal names cannot contain dots', ['A.B\n']), 938 ('Expecting rule or terminal definition', ['"a"\n']), 939 ('%import expects a name', ['%import "a"\n']), 940 ('%ignore expects a value', ['%ignore %import\n']), 941 ] 942 943def _translate_parser_exception(parse, e): 944 error = e.match_examples(parse, GRAMMAR_ERRORS, use_accepts=True) 945 if error: 946 return error 947 elif 'STRING' in e.expected: 948 return "Expecting a value" 949 950def _parse_grammar(text, name, start='start'): 951 try: 952 tree = _get_parser().parse(text + '\n', start) 953 except UnexpectedCharacters as e: 954 context = e.get_context(text) 955 raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % 956 (e.line, e.column, name, context)) 957 except UnexpectedToken as e: 958 context = e.get_context(text) 959 error = _translate_parser_exception(_get_parser().parse, e) 960 if error: 961 raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context)) 962 raise 963 964 return PrepareGrammar().transform(tree) 965 966 967def _error_repr(error): 968 if isinstance(error, UnexpectedToken): 969 error2 = _translate_parser_exception(_get_parser().parse, error) 970 if error2: 971 return error2 972 expected = ', '.join(error.accepts or error.expected) 973 return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected) 974 else: 975 return str(error) 976 977def _search_interactive_parser(interactive_parser, predicate): 978 def expand(node): 979 path, p = node 980 for choice in p.choices(): 981 t = Token(choice, '') 982 try: 983 new_p = p.feed_token(t) 984 except ParseError: # Illegal 985 pass 986 else: 987 yield path + (choice,), new_p 988 989 for path, p in bfs_all_unique([((), interactive_parser)], expand): 990 if predicate(p): 991 return path, p 992 993def find_grammar_errors(text, start='start'): 994 errors = [] 995 def on_error(e): 996 errors.append((e, _error_repr(e))) 997 998 # recover to a new line 999 token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices()) 1000 for token_type in token_path: 1001 e.interactive_parser.feed_token(Token(token_type, '')) 1002 e.interactive_parser.feed_token(Token('_NL', '\n')) 1003 return True 1004 1005 _tree = _get_parser().parse(text + '\n', start, on_error=on_error) 1006 1007 errors_by_line = classify(errors, lambda e: e[0].line) 1008 errors = [el[0] for el in errors_by_line.values()] # already sorted 1009 1010 for e in errors: 1011 e[0].interactive_parser = None 1012 return errors 1013 1014 1015def _get_mangle(prefix, aliases, base_mangle=None): 1016 def mangle(s): 1017 if s in aliases: 1018 s = aliases[s] 1019 else: 1020 if s[0] == '_': 1021 s = '_%s__%s' % (prefix, s[1:]) 1022 else: 1023 s = '%s__%s' % (prefix, s) 1024 if base_mangle is not None: 1025 s = base_mangle(s) 1026 return s 1027 return mangle 1028 1029def _mangle_exp(exp, mangle): 1030 if mangle is None: 1031 return exp 1032 exp = deepcopy(exp) # TODO: is this needed 1033 for t in exp.iter_subtrees(): 1034 for i, c in enumerate(t.children): 1035 if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): 1036 t.children[i] = Token(c.type, mangle(c.value)) 1037 return exp 1038 1039 1040 1041class GrammarBuilder: 1042 def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None): 1043 self.global_keep_all_tokens = global_keep_all_tokens 1044 self.import_paths = import_paths or [] 1045 self.used_files = used_files or {} 1046 1047 self._definitions = {} 1048 self._ignore_names = [] 1049 1050 def _is_term(self, name): 1051 # Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME` 1052 # Only the last part is the actual name, and the rest might contain mixed case 1053 return name.rpartition('__')[-1].isupper() 1054 1055 def _grammar_error(self, msg, *names): 1056 args = {} 1057 for i, name in enumerate(names, start=1): 1058 postfix = '' if i == 1 else str(i) 1059 args['name' + postfix] = name 1060 args['type' + postfix] = lowercase_type = ("rule", "terminal")[self._is_term(name)] 1061 args['Type' + postfix] = lowercase_type.title() 1062 raise GrammarError(msg.format(**args)) 1063 1064 def _check_options(self, name, options): 1065 if self._is_term(name): 1066 if options is None: 1067 options = 1 1068 # if we don't use Integral here, we run into python2.7/python3 problems with long vs int 1069 elif not isinstance(options, Integral): 1070 raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),)) 1071 else: 1072 if options is None: 1073 options = RuleOptions() 1074 elif not isinstance(options, RuleOptions): 1075 raise GrammarError("Rules require a RuleOptions instance as 'options'") 1076 if self.global_keep_all_tokens: 1077 options.keep_all_tokens = True 1078 return options 1079 1080 1081 def _define(self, name, exp, params=(), options=None, override=False): 1082 if name in self._definitions: 1083 if not override: 1084 self._grammar_error("{Type} '{name}' defined more than once", name) 1085 elif override: 1086 self._grammar_error("Cannot override a nonexisting {type} {name}", name) 1087 1088 if name.startswith('__'): 1089 self._grammar_error('Names starting with double-underscore are reserved (Error at {name})', name) 1090 1091 self._definitions[name] = (params, exp, self._check_options(name, options)) 1092 1093 def _extend(self, name, exp, params=(), options=None): 1094 if name not in self._definitions: 1095 self._grammar_error("Can't extend {type} {name} as it wasn't defined before", name) 1096 if tuple(params) != tuple(self._definitions[name][0]): 1097 self._grammar_error("Cannot extend {type} with different parameters: {name}", name) 1098 # TODO: think about what to do with 'options' 1099 base = self._definitions[name][1] 1100 1101 assert isinstance(base, Tree) and base.data == 'expansions' 1102 base.children.insert(0, exp) 1103 1104 def _ignore(self, exp_or_name): 1105 if isinstance(exp_or_name, str): 1106 self._ignore_names.append(exp_or_name) 1107 else: 1108 assert isinstance(exp_or_name, Tree) 1109 t = exp_or_name 1110 if t.data == 'expansions' and len(t.children) == 1: 1111 t2 ,= t.children 1112 if t2.data=='expansion' and len(t2.children) == 1: 1113 item ,= t2.children 1114 if item.data == 'value': 1115 item ,= item.children 1116 if isinstance(item, Token) and item.type == 'TERMINAL': 1117 self._ignore_names.append(item.value) 1118 return 1119 1120 name = '__IGNORE_%d'% len(self._ignore_names) 1121 self._ignore_names.append(name) 1122 self._definitions[name] = ((), t, 1) 1123 1124 def _declare(self, *names): 1125 for name in names: 1126 self._define(name, None) 1127 1128 def _unpack_import(self, stmt, grammar_name): 1129 if len(stmt.children) > 1: 1130 path_node, arg1 = stmt.children 1131 else: 1132 path_node, = stmt.children 1133 arg1 = None 1134 1135 if isinstance(arg1, Tree): # Multi import 1136 dotted_path = tuple(path_node.children) 1137 names = arg1.children 1138 aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names 1139 else: # Single import 1140 dotted_path = tuple(path_node.children[:-1]) 1141 if not dotted_path: 1142 name ,= path_node.children 1143 raise GrammarError("Nothing was imported from grammar `%s`" % name) 1144 name = path_node.children[-1] # Get name from dotted path 1145 aliases = {name.value: (arg1 or name).value} # Aliases if exist 1146 1147 if path_node.data == 'import_lib': # Import from library 1148 base_path = None 1149 else: # Relative import 1150 if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script 1151 try: 1152 base_file = os.path.abspath(sys.modules['__main__'].__file__) 1153 except AttributeError: 1154 base_file = None 1155 else: 1156 base_file = grammar_name # Import relative to grammar file path if external grammar file 1157 if base_file: 1158 if isinstance(base_file, PackageResource): 1159 base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0]) 1160 else: 1161 base_path = os.path.split(base_file)[0] 1162 else: 1163 base_path = os.path.abspath(os.path.curdir) 1164 1165 return dotted_path, base_path, aliases 1166 1167 def _unpack_definition(self, tree, mangle): 1168 if tree.data == 'rule': 1169 name, params, exp, opts = options_from_rule(*tree.children) 1170 else: 1171 name = tree.children[0].value 1172 params = () # TODO terminal templates 1173 opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority 1174 exp = tree.children[-1] 1175 1176 if mangle is not None: 1177 params = tuple(mangle(p) for p in params) 1178 name = mangle(name) 1179 1180 exp = _mangle_exp(exp, mangle) 1181 return name, exp, params, opts 1182 1183 1184 def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None): 1185 tree = _parse_grammar(grammar_text, grammar_name) 1186 1187 imports = {} 1188 for stmt in tree.children: 1189 if stmt.data == 'import': 1190 dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name) 1191 try: 1192 import_base_path, import_aliases = imports[dotted_path] 1193 assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path) 1194 import_aliases.update(aliases) 1195 except KeyError: 1196 imports[dotted_path] = base_path, aliases 1197 1198 for dotted_path, (base_path, aliases) in imports.items(): 1199 self.do_import(dotted_path, base_path, aliases, mangle) 1200 1201 for stmt in tree.children: 1202 if stmt.data in ('term', 'rule'): 1203 self._define(*self._unpack_definition(stmt, mangle)) 1204 elif stmt.data == 'override': 1205 r ,= stmt.children 1206 self._define(*self._unpack_definition(r, mangle), override=True) 1207 elif stmt.data == 'extend': 1208 r ,= stmt.children 1209 self._extend(*self._unpack_definition(r, mangle)) 1210 elif stmt.data == 'ignore': 1211 # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar 1212 if mangle is None: 1213 self._ignore(*stmt.children) 1214 elif stmt.data == 'declare': 1215 names = [t.value for t in stmt.children] 1216 if mangle is None: 1217 self._declare(*names) 1218 else: 1219 self._declare(*map(mangle, names)) 1220 elif stmt.data == 'import': 1221 pass 1222 else: 1223 assert False, stmt 1224 1225 1226 term_defs = { name: exp 1227 for name, (_params, exp, _options) in self._definitions.items() 1228 if self._is_term(name) 1229 } 1230 resolve_term_references(term_defs) 1231 1232 1233 def _remove_unused(self, used): 1234 def rule_dependencies(symbol): 1235 if self._is_term(symbol): 1236 return [] 1237 try: 1238 params, tree,_ = self._definitions[symbol] 1239 except KeyError: 1240 return [] 1241 return _find_used_symbols(tree) - set(params) 1242 1243 _used = set(bfs(used, rule_dependencies)) 1244 self._definitions = {k: v for k, v in self._definitions.items() if k in _used} 1245 1246 1247 def do_import(self, dotted_path, base_path, aliases, base_mangle=None): 1248 assert dotted_path 1249 mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) 1250 grammar_path = os.path.join(*dotted_path) + EXT 1251 to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] 1252 for source in to_try: 1253 try: 1254 if callable(source): 1255 joined_path, text = source(base_path, grammar_path) 1256 else: 1257 joined_path = os.path.join(source, grammar_path) 1258 with open(joined_path, encoding='utf8') as f: 1259 text = f.read() 1260 except IOError: 1261 continue 1262 else: 1263 h = hashlib.md5(text.encode('utf8')).hexdigest() 1264 if self.used_files.get(joined_path, h) != h: 1265 raise RuntimeError("Grammar file was changed during importing") 1266 self.used_files[joined_path] = h 1267 1268 gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files) 1269 gb.load_grammar(text, joined_path, mangle) 1270 gb._remove_unused(map(mangle, aliases)) 1271 for name in gb._definitions: 1272 if name in self._definitions: 1273 raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path)) 1274 1275 self._definitions.update(**gb._definitions) 1276 break 1277 else: 1278 # Search failed. Make Python throw a nice error. 1279 open(grammar_path, encoding='utf8') 1280 assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) 1281 1282 1283 def validate(self): 1284 for name, (params, exp, _options) in self._definitions.items(): 1285 for i, p in enumerate(params): 1286 if p in self._definitions: 1287 raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) 1288 if p in params[:i]: 1289 raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name)) 1290 1291 if exp is None: # Remaining checks don't apply to abstract rules/terminals 1292 continue 1293 1294 for temp in exp.find_data('template_usage'): 1295 sym = temp.children[0] 1296 args = temp.children[1:] 1297 if sym not in params: 1298 if sym not in self._definitions: 1299 self._grammar_error("Template '%s' used but not defined (in {type} {name})" % sym, name) 1300 if len(args) != len(self._definitions[sym][0]): 1301 expected, actual = len(self._definitions[sym][0]), len(args) 1302 self._grammar_error("Wrong number of template arguments used for {name} " 1303 "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name) 1304 1305 for sym in _find_used_symbols(exp): 1306 if sym not in self._definitions and sym not in params: 1307 self._grammar_error("{Type} '{name}' used but not defined (in {type2} {name2})", sym, name) 1308 1309 if not set(self._definitions).issuperset(self._ignore_names): 1310 raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) 1311 1312 def build(self): 1313 self.validate() 1314 rule_defs = [] 1315 term_defs = [] 1316 for name, (params, exp, options) in self._definitions.items(): 1317 if self._is_term(name): 1318 assert len(params) == 0 1319 term_defs.append((name, (exp, options))) 1320 else: 1321 rule_defs.append((name, params, exp, options)) 1322 # resolve_term_references(term_defs) 1323 return Grammar(rule_defs, term_defs, self._ignore_names) 1324 1325 1326def verify_used_files(file_hashes): 1327 for path, old in file_hashes.items(): 1328 text = None 1329 if isinstance(path, str) and os.path.exists(path): 1330 with open(path, encoding='utf8') as f: 1331 text = f.read() 1332 elif isinstance(path, PackageResource): 1333 with suppress(IOError): 1334 text = pkgutil.get_data(*path).decode('utf-8') 1335 if text is None: # We don't know how to load the path. ignore it. 1336 continue 1337 1338 current = hashlib.md5(text.encode()).hexdigest() 1339 if old != current: 1340 logger.info("File %r changed, rebuilding Parser" % path) 1341 return False 1342 return True 1343 1344def list_grammar_imports(grammar, import_paths=[]): 1345 "Returns a list of paths to the lark grammars imported by the given grammar (recursively)" 1346 builder = GrammarBuilder(False, import_paths) 1347 builder.load_grammar(grammar, '<string>') 1348 return list(builder.used_files.keys()) 1349 1350def load_grammar(grammar, source, import_paths, global_keep_all_tokens): 1351 builder = GrammarBuilder(global_keep_all_tokens, import_paths) 1352 builder.load_grammar(grammar, source) 1353 return builder.build(), builder.used_files 1354