1"""This module implements a LALR(1) Parser 2""" 3# Author: Erez Shinan (2017) 4# Email : erezshin@gmail.com 5from copy import deepcopy, copy 6from ..exceptions import UnexpectedInput, UnexpectedToken 7from ..lexer import Token 8from ..utils import Serialize 9 10from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable 11from .lalr_interactive_parser import InteractiveParser 12from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken 13 14###{standalone 15 16class LALR_Parser(Serialize): 17 def __init__(self, parser_conf, debug=False): 18 analysis = LALR_Analyzer(parser_conf, debug=debug) 19 analysis.compute_lalr() 20 callbacks = parser_conf.callbacks 21 22 self._parse_table = analysis.parse_table 23 self.parser_conf = parser_conf 24 self.parser = _Parser(analysis.parse_table, callbacks, debug) 25 26 @classmethod 27 def deserialize(cls, data, memo, callbacks, debug=False): 28 inst = cls.__new__(cls) 29 inst._parse_table = IntParseTable.deserialize(data, memo) 30 inst.parser = _Parser(inst._parse_table, callbacks, debug) 31 return inst 32 33 def serialize(self, memo): 34 return self._parse_table.serialize(memo) 35 36 def parse_interactive(self, lexer, start): 37 return self.parser.parse(lexer, start, start_interactive=True) 38 39 def parse(self, lexer, start, on_error=None): 40 try: 41 return self.parser.parse(lexer, start) 42 except UnexpectedInput as e: 43 if on_error is None: 44 raise 45 46 while True: 47 if isinstance(e, UnexpectedCharacters): 48 s = e.interactive_parser.lexer_state.state 49 p = s.line_ctr.char_pos 50 51 if not on_error(e): 52 raise e 53 54 if isinstance(e, UnexpectedCharacters): 55 # If user didn't change the character position, then we should 56 if p == s.line_ctr.char_pos: 57 s.line_ctr.feed(s.text[p:p+1]) 58 59 try: 60 return e.interactive_parser.resume_parse() 61 except UnexpectedToken as e2: 62 if (isinstance(e, UnexpectedToken) 63 and e.token.type == e2.token.type == '$END' 64 and e.interactive_parser == e2.interactive_parser): 65 # Prevent infinite loop 66 raise e2 67 e = e2 68 except UnexpectedCharacters as e2: 69 e = e2 70 71 72class ParseConf(object): 73 __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states' 74 75 def __init__(self, parse_table, callbacks, start): 76 self.parse_table = parse_table 77 78 self.start_state = self.parse_table.start_states[start] 79 self.end_state = self.parse_table.end_states[start] 80 self.states = self.parse_table.states 81 82 self.callbacks = callbacks 83 self.start = start 84 85 86class ParserState(object): 87 __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack' 88 89 def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None): 90 self.parse_conf = parse_conf 91 self.lexer = lexer 92 self.state_stack = state_stack or [self.parse_conf.start_state] 93 self.value_stack = value_stack or [] 94 95 @property 96 def position(self): 97 return self.state_stack[-1] 98 99 # Necessary for match_examples() to work 100 def __eq__(self, other): 101 if not isinstance(other, ParserState): 102 return NotImplemented 103 return len(self.state_stack) == len(other.state_stack) and self.position == other.position 104 105 def __copy__(self): 106 return type(self)( 107 self.parse_conf, 108 self.lexer, # XXX copy 109 copy(self.state_stack), 110 deepcopy(self.value_stack), 111 ) 112 113 def copy(self): 114 return copy(self) 115 116 def feed_token(self, token, is_end=False): 117 state_stack = self.state_stack 118 value_stack = self.value_stack 119 states = self.parse_conf.states 120 end_state = self.parse_conf.end_state 121 callbacks = self.parse_conf.callbacks 122 123 while True: 124 state = state_stack[-1] 125 try: 126 action, arg = states[state][token.type] 127 except KeyError: 128 expected = {s for s in states[state].keys() if s.isupper()} 129 raise UnexpectedToken(token, expected, state=self, interactive_parser=None) 130 131 assert arg != end_state 132 133 if action is Shift: 134 # shift once and return 135 assert not is_end 136 state_stack.append(arg) 137 value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) 138 return 139 else: 140 # reduce+shift as many times as necessary 141 rule = arg 142 size = len(rule.expansion) 143 if size: 144 s = value_stack[-size:] 145 del state_stack[-size:] 146 del value_stack[-size:] 147 else: 148 s = [] 149 150 value = callbacks[rule](s) 151 152 _action, new_state = states[state_stack[-1]][rule.origin.name] 153 assert _action is Shift 154 state_stack.append(new_state) 155 value_stack.append(value) 156 157 if is_end and state_stack[-1] == end_state: 158 return value_stack[-1] 159 160class _Parser(object): 161 def __init__(self, parse_table, callbacks, debug=False): 162 self.parse_table = parse_table 163 self.callbacks = callbacks 164 self.debug = debug 165 166 def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False): 167 parse_conf = ParseConf(self.parse_table, self.callbacks, start) 168 parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) 169 if start_interactive: 170 return InteractiveParser(self, parser_state, parser_state.lexer) 171 return self.parse_from_state(parser_state) 172 173 174 def parse_from_state(self, state): 175 # Main LALR-parser loop 176 try: 177 token = None 178 for token in state.lexer.lex(state): 179 state.feed_token(token) 180 181 end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) 182 return state.feed_token(end_token, True) 183 except UnexpectedInput as e: 184 try: 185 e.interactive_parser = InteractiveParser(self, state, state.lexer) 186 except NameError: 187 pass 188 raise e 189 except Exception as e: 190 if self.debug: 191 print("") 192 print("STATE STACK DUMP") 193 print("----------------") 194 for i, s in enumerate(state.state_stack): 195 print('%d)' % i , s) 196 print("") 197 198 raise 199###} 200 201