1"This module implements an Earley Parser"
2
3# The parser uses a parse-forest to keep track of derivations and ambiguations.
4# When the parse ends successfully, a disambiguation stage resolves all ambiguity
5# (right now ambiguity resolution is not developed beyond the needs of lark)
6# Afterwards the parse tree is reduced (transformed) according to user callbacks.
7# I use the no-recursion version of Transformer, because the tree might be
8# deeper than Python's recursion limit (a bit absurd, but that's life)
9#
10# The algorithm keeps track of each state set, using a corresponding Column instance.
11# Column keeps track of new items using NewsList instances.
12#
13# Author: Erez Shinan (2017)
14# Email : erezshin@gmail.com
15
16from ..grammar import NonTerminal, Terminal
17
18class Item(object):
19    "An Earley Item, the atom of the algorithm."
20
21    __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
22    def __init__(self, rule, ptr, start):
23        self.is_complete = len(rule.expansion) == ptr
24        self.rule = rule    # rule
25        self.ptr = ptr      # ptr
26        self.start = start  # j
27        self.node = None    # w
28        if self.is_complete:
29            self.s = rule.origin
30            self.expect = None
31            self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
32        else:
33            self.s = (rule, ptr)
34            self.expect = rule.expansion[ptr]
35            self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
36        self._hash = hash((self.s, self.start))
37
38    def advance(self):
39        return Item(self.rule, self.ptr + 1, self.start)
40
41    def __eq__(self, other):
42        return self is other or (self.s == other.s and self.start == other.start)
43
44    def __hash__(self):
45        return self._hash
46
47    def __repr__(self):
48        before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
49        after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
50        symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
51        return '%s (%d)' % (symbol, self.start)
52
53
54class TransitiveItem(Item):
55    __slots__ = ('recognized', 'reduction', 'column', 'next_titem')
56    def __init__(self, recognized, trule, originator, start):
57        super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start)
58        self.recognized = recognized
59        self.reduction = originator
60        self.column = start
61        self.next_titem = None
62        self._hash = hash((self.s, self.start, self.recognized))
63
64    def __eq__(self, other):
65        if not isinstance(other, TransitiveItem):
66            return False
67        return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized)
68
69    def __hash__(self):
70        return self._hash
71
72    def __repr__(self):
73        before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
74        after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
75        return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start)
76