1"This module implements an Earley Parser" 2 3# The parser uses a parse-forest to keep track of derivations and ambiguations. 4# When the parse ends successfully, a disambiguation stage resolves all ambiguity 5# (right now ambiguity resolution is not developed beyond the needs of lark) 6# Afterwards the parse tree is reduced (transformed) according to user callbacks. 7# I use the no-recursion version of Transformer, because the tree might be 8# deeper than Python's recursion limit (a bit absurd, but that's life) 9# 10# The algorithm keeps track of each state set, using a corresponding Column instance. 11# Column keeps track of new items using NewsList instances. 12# 13# Author: Erez Shinan (2017) 14# Email : erezshin@gmail.com 15 16from ..grammar import NonTerminal, Terminal 17 18class Item(object): 19 "An Earley Item, the atom of the algorithm." 20 21 __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash') 22 def __init__(self, rule, ptr, start): 23 self.is_complete = len(rule.expansion) == ptr 24 self.rule = rule # rule 25 self.ptr = ptr # ptr 26 self.start = start # j 27 self.node = None # w 28 if self.is_complete: 29 self.s = rule.origin 30 self.expect = None 31 self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None 32 else: 33 self.s = (rule, ptr) 34 self.expect = rule.expansion[ptr] 35 self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None 36 self._hash = hash((self.s, self.start)) 37 38 def advance(self): 39 return Item(self.rule, self.ptr + 1, self.start) 40 41 def __eq__(self, other): 42 return self is other or (self.s == other.s and self.start == other.start) 43 44 def __hash__(self): 45 return self._hash 46 47 def __repr__(self): 48 before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] ) 49 after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] ) 50 symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after)) 51 return '%s (%d)' % (symbol, self.start) 52 53 54class TransitiveItem(Item): 55 __slots__ = ('recognized', 'reduction', 'column', 'next_titem') 56 def __init__(self, recognized, trule, originator, start): 57 super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start) 58 self.recognized = recognized 59 self.reduction = originator 60 self.column = start 61 self.next_titem = None 62 self._hash = hash((self.s, self.start, self.recognized)) 63 64 def __eq__(self, other): 65 if not isinstance(other, TransitiveItem): 66 return False 67 return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized) 68 69 def __hash__(self): 70 return self._hash 71 72 def __repr__(self): 73 before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] ) 74 after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] ) 75 return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start) 76