1"""This module implements a LALR(1) Parser
2"""
3# Author: Erez Shinan (2017)
4# Email : erezshin@gmail.com
5from copy import deepcopy, copy
6from ..exceptions import UnexpectedInput, UnexpectedToken
7from ..lexer import Token
8from ..utils import Serialize
9
10from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
11from .lalr_interactive_parser import InteractiveParser
12from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
13
14###{standalone
15
16class LALR_Parser(Serialize):
17    def __init__(self, parser_conf, debug=False):
18        analysis = LALR_Analyzer(parser_conf, debug=debug)
19        analysis.compute_lalr()
20        callbacks = parser_conf.callbacks
21
22        self._parse_table = analysis.parse_table
23        self.parser_conf = parser_conf
24        self.parser = _Parser(analysis.parse_table, callbacks, debug)
25
26    @classmethod
27    def deserialize(cls, data, memo, callbacks, debug=False):
28        inst = cls.__new__(cls)
29        inst._parse_table = IntParseTable.deserialize(data, memo)
30        inst.parser = _Parser(inst._parse_table, callbacks, debug)
31        return inst
32
33    def serialize(self, memo):
34        return self._parse_table.serialize(memo)
35
36    def parse_interactive(self, lexer, start):
37        return self.parser.parse(lexer, start, start_interactive=True)
38
39    def parse(self, lexer, start, on_error=None):
40        try:
41            return self.parser.parse(lexer, start)
42        except UnexpectedInput as e:
43            if on_error is None:
44                raise
45
46            while True:
47                if isinstance(e, UnexpectedCharacters):
48                    s = e.interactive_parser.lexer_state.state
49                    p = s.line_ctr.char_pos
50
51                if not on_error(e):
52                    raise e
53
54                if isinstance(e, UnexpectedCharacters):
55                    # If user didn't change the character position, then we should
56                    if p == s.line_ctr.char_pos:
57                        s.line_ctr.feed(s.text[p:p+1])
58
59                try:
60                    return e.interactive_parser.resume_parse()
61                except UnexpectedToken as e2:
62                    if (isinstance(e, UnexpectedToken)
63                        and e.token.type == e2.token.type == '$END'
64                        and e.interactive_parser == e2.interactive_parser):
65                        # Prevent infinite loop
66                        raise e2
67                    e = e2
68                except UnexpectedCharacters as e2:
69                    e = e2
70
71
72class ParseConf(object):
73    __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
74
75    def __init__(self, parse_table, callbacks, start):
76        self.parse_table = parse_table
77
78        self.start_state = self.parse_table.start_states[start]
79        self.end_state = self.parse_table.end_states[start]
80        self.states = self.parse_table.states
81
82        self.callbacks = callbacks
83        self.start = start
84
85
86class ParserState(object):
87    __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
88
89    def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None):
90        self.parse_conf = parse_conf
91        self.lexer = lexer
92        self.state_stack = state_stack or [self.parse_conf.start_state]
93        self.value_stack = value_stack or []
94
95    @property
96    def position(self):
97        return self.state_stack[-1]
98
99    # Necessary for match_examples() to work
100    def __eq__(self, other):
101        if not isinstance(other, ParserState):
102            return NotImplemented
103        return len(self.state_stack) == len(other.state_stack) and self.position == other.position
104
105    def __copy__(self):
106        return type(self)(
107            self.parse_conf,
108            self.lexer, # XXX copy
109            copy(self.state_stack),
110            deepcopy(self.value_stack),
111        )
112
113    def copy(self):
114        return copy(self)
115
116    def feed_token(self, token, is_end=False):
117        state_stack = self.state_stack
118        value_stack = self.value_stack
119        states = self.parse_conf.states
120        end_state = self.parse_conf.end_state
121        callbacks = self.parse_conf.callbacks
122
123        while True:
124            state = state_stack[-1]
125            try:
126                action, arg = states[state][token.type]
127            except KeyError:
128                expected = {s for s in states[state].keys() if s.isupper()}
129                raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
130
131            assert arg != end_state
132
133            if action is Shift:
134                # shift once and return
135                assert not is_end
136                state_stack.append(arg)
137                value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
138                return
139            else:
140                # reduce+shift as many times as necessary
141                rule = arg
142                size = len(rule.expansion)
143                if size:
144                    s = value_stack[-size:]
145                    del state_stack[-size:]
146                    del value_stack[-size:]
147                else:
148                    s = []
149
150                value = callbacks[rule](s)
151
152                _action, new_state = states[state_stack[-1]][rule.origin.name]
153                assert _action is Shift
154                state_stack.append(new_state)
155                value_stack.append(value)
156
157                if is_end and state_stack[-1] == end_state:
158                    return value_stack[-1]
159
160class _Parser(object):
161    def __init__(self, parse_table, callbacks, debug=False):
162        self.parse_table = parse_table
163        self.callbacks = callbacks
164        self.debug = debug
165
166    def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False):
167        parse_conf = ParseConf(self.parse_table, self.callbacks, start)
168        parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
169        if start_interactive:
170            return InteractiveParser(self, parser_state, parser_state.lexer)
171        return self.parse_from_state(parser_state)
172
173
174    def parse_from_state(self, state):
175        # Main LALR-parser loop
176        try:
177            token = None
178            for token in state.lexer.lex(state):
179                state.feed_token(token)
180
181            end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
182            return state.feed_token(end_token, True)
183        except UnexpectedInput as e:
184            try:
185                e.interactive_parser = InteractiveParser(self, state, state.lexer)
186            except NameError:
187                pass
188            raise e
189        except Exception as e:
190            if self.debug:
191                print("")
192                print("STATE STACK DUMP")
193                print("----------------")
194                for i, s in enumerate(state.state_stack):
195                    print('%d)' % i , s)
196                print("")
197
198            raise
199###}
200
201