1# ----------------------------------------------------------------------------- 2# ply: yacc.py 3# 4# Copyright (C) 2001-2015, 5# David M. Beazley (Dabeaz LLC) 6# All rights reserved. 7# 8# SPDX-License-Identifier: BSD-3-Clause 9# ----------------------------------------------------------------------------- 10# 11# This implements an LR parser that is constructed from grammar rules defined 12# as Python functions. The grammer is specified by supplying the BNF inside 13# Python documentation strings. The inspiration for this technique was borrowed 14# from John Aycock's Spark parsing system. PLY might be viewed as cross between 15# Spark and the GNU bison utility. 16# 17# The current implementation is only somewhat object-oriented. The 18# LR parser itself is defined in terms of an object (which allows multiple 19# parsers to co-exist). However, most of the variables used during table 20# construction are defined in terms of global variables. Users shouldn't 21# notice unless they are trying to define multiple parsers at the same 22# time using threads (in which case they should have their head examined). 23# 24# This implementation supports both SLR and LALR(1) parsing. LALR(1) 25# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 26# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 27# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 28# by the more efficient DeRemer and Pennello algorithm. 29# 30# :::::::: WARNING ::::::: 31# 32# Construction of LR parsing tables is fairly complicated and expensive. 33# To make this module run fast, a *LOT* of work has been put into 34# optimization---often at the expensive of readability and what might 35# consider to be good Python "coding style." Modify the code at your 36# own risk! 37# ---------------------------------------------------------------------------- 38 39import re 40import types 41import sys 42import os.path 43import inspect 44import base64 45import warnings 46 47__version__ = '3.8' 48__tabversion__ = '3.8' 49 50#----------------------------------------------------------------------------- 51# === User configurable parameters === 52# 53# Change these to modify the default behavior of yacc (if you wish) 54#----------------------------------------------------------------------------- 55 56yaccdebug = True # Debugging mode. If set, yacc generates a 57 # a 'parser.out' file in the current directory 58 59debug_file = 'parser.out' # Default name of the debugging file 60tab_module = 'parsetab' # Default name of the table module 61default_lr = 'LALR' # Default LR table generation method 62 63error_count = 3 # Number of symbols that must be shifted to leave recovery mode 64 65yaccdevel = False # Set to True if developing yacc. This turns off optimized 66 # implementations of certain functions. 67 68resultlimit = 40 # Size limit of results when running in debug mode. 69 70pickle_protocol = 0 # Protocol to use when writing pickle files 71 72# String type-checking compatibility 73if sys.version_info[0] < 3: 74 string_types = basestring 75else: 76 string_types = str 77 78MAXINT = sys.maxsize 79 80# This object is a stand-in for a logging object created by the 81# logging module. PLY will use this by default to create things 82# such as the parser.out file. If a user wants more detailed 83# information, they can create their own logging object and pass 84# it into PLY. 85 86class PlyLogger(object): 87 def __init__(self, f): 88 self.f = f 89 90 def debug(self, msg, *args, **kwargs): 91 self.f.write((msg % args) + '\n') 92 93 info = debug 94 95 def warning(self, msg, *args, **kwargs): 96 self.f.write('WARNING: ' + (msg % args) + '\n') 97 98 def error(self, msg, *args, **kwargs): 99 self.f.write('ERROR: ' + (msg % args) + '\n') 100 101 critical = debug 102 103# Null logger is used when no output is generated. Does nothing. 104class NullLogger(object): 105 def __getattribute__(self, name): 106 return self 107 108 def __call__(self, *args, **kwargs): 109 return self 110 111# Exception raised for yacc-related errors 112class YaccError(Exception): 113 pass 114 115# Format the result message that the parser produces when running in debug mode. 116def format_result(r): 117 repr_str = repr(r) 118 if '\n' in repr_str: 119 repr_str = repr(repr_str) 120 if len(repr_str) > resultlimit: 121 repr_str = repr_str[:resultlimit] + ' ...' 122 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) 123 return result 124 125# Format stack entries when the parser is running in debug mode 126def format_stack_entry(r): 127 repr_str = repr(r) 128 if '\n' in repr_str: 129 repr_str = repr(repr_str) 130 if len(repr_str) < 16: 131 return repr_str 132 else: 133 return '<%s @ 0x%x>' % (type(r).__name__, id(r)) 134 135# Panic mode error recovery support. This feature is being reworked--much of the 136# code here is to offer a deprecation/backwards compatible transition 137 138_errok = None 139_token = None 140_restart = None 141_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). 142Instead, invoke the methods on the associated parser instance: 143 144 def p_error(p): 145 ... 146 # Use parser.errok(), parser.token(), parser.restart() 147 ... 148 149 parser = yacc.yacc() 150''' 151 152def errok(): 153 warnings.warn(_warnmsg) 154 return _errok() 155 156def restart(): 157 warnings.warn(_warnmsg) 158 return _restart() 159 160def token(): 161 warnings.warn(_warnmsg) 162 return _token() 163 164# Utility function to call the p_error() function with some deprecation hacks 165def call_errorfunc(errorfunc, token, parser): 166 global _errok, _token, _restart 167 _errok = parser.errok 168 _token = parser.token 169 _restart = parser.restart 170 r = errorfunc(token) 171 try: 172 del _errok, _token, _restart 173 except NameError: 174 pass 175 return r 176 177#----------------------------------------------------------------------------- 178# === LR Parsing Engine === 179# 180# The following classes are used for the LR parser itself. These are not 181# used during table construction and are independent of the actual LR 182# table generation algorithm 183#----------------------------------------------------------------------------- 184 185# This class is used to hold non-terminal grammar symbols during parsing. 186# It normally has the following attributes set: 187# .type = Grammar symbol type 188# .value = Symbol value 189# .lineno = Starting line number 190# .endlineno = Ending line number (optional, set automatically) 191# .lexpos = Starting lex position 192# .endlexpos = Ending lex position (optional, set automatically) 193 194class YaccSymbol: 195 def __str__(self): 196 return self.type 197 198 def __repr__(self): 199 return str(self) 200 201# This class is a wrapper around the objects actually passed to each 202# grammar rule. Index lookup and assignment actually assign the 203# .value attribute of the underlying YaccSymbol object. 204# The lineno() method returns the line number of a given 205# item (or 0 if not defined). The linespan() method returns 206# a tuple of (startline,endline) representing the range of lines 207# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 208# representing the range of positional information for a symbol. 209 210class YaccProduction: 211 def __init__(self, s, stack=None): 212 self.slice = s 213 self.stack = stack 214 self.lexer = None 215 self.parser = None 216 217 def __getitem__(self, n): 218 if isinstance(n, slice): 219 return [s.value for s in self.slice[n]] 220 elif n >= 0: 221 return self.slice[n].value 222 else: 223 return self.stack[n].value 224 225 def __setitem__(self, n, v): 226 self.slice[n].value = v 227 228 def __getslice__(self, i, j): 229 return [s.value for s in self.slice[i:j]] 230 231 def __len__(self): 232 return len(self.slice) 233 234 def lineno(self, n): 235 return getattr(self.slice[n], 'lineno', 0) 236 237 def set_lineno(self, n, lineno): 238 self.slice[n].lineno = lineno 239 240 def linespan(self, n): 241 startline = getattr(self.slice[n], 'lineno', 0) 242 endline = getattr(self.slice[n], 'endlineno', startline) 243 return startline, endline 244 245 def lexpos(self, n): 246 return getattr(self.slice[n], 'lexpos', 0) 247 248 def lexspan(self, n): 249 startpos = getattr(self.slice[n], 'lexpos', 0) 250 endpos = getattr(self.slice[n], 'endlexpos', startpos) 251 return startpos, endpos 252 253 def error(self): 254 raise SyntaxError 255 256# ----------------------------------------------------------------------------- 257# == LRParser == 258# 259# The LR Parsing engine. 260# ----------------------------------------------------------------------------- 261 262class LRParser: 263 def __init__(self, lrtab, errorf): 264 self.productions = lrtab.lr_productions 265 self.action = lrtab.lr_action 266 self.goto = lrtab.lr_goto 267 self.errorfunc = errorf 268 self.set_defaulted_states() 269 self.errorok = True 270 271 def errok(self): 272 self.errorok = True 273 274 def restart(self): 275 del self.statestack[:] 276 del self.symstack[:] 277 sym = YaccSymbol() 278 sym.type = '$end' 279 self.symstack.append(sym) 280 self.statestack.append(0) 281 282 # Defaulted state support. 283 # This method identifies parser states where there is only one possible reduction action. 284 # For such states, the parser can make a choose to make a rule reduction without consuming 285 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in 286 # certain kinds of advanced parsing situations where the lexer and parser interact with 287 # each other or change states (i.e., manipulation of scope, lexer states, etc.). 288 # 289 # See: https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions 290 def set_defaulted_states(self): 291 self.defaulted_states = {} 292 for state, actions in self.action.items(): 293 rules = list(actions.values()) 294 if len(rules) == 1 and rules[0] < 0: 295 self.defaulted_states[state] = rules[0] 296 297 def disable_defaulted_states(self): 298 self.defaulted_states = {} 299 300 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 301 if debug or yaccdevel: 302 if isinstance(debug, int): 303 debug = PlyLogger(sys.stderr) 304 return self.parsedebug(input, lexer, debug, tracking, tokenfunc) 305 elif tracking: 306 return self.parseopt(input, lexer, debug, tracking, tokenfunc) 307 else: 308 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) 309 310 311 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 312 # parsedebug(). 313 # 314 # This is the debugging enabled version of parse(). All changes made to the 315 # parsing engine should be made here. Optimized versions of this function 316 # are automatically created by the ply/ygen.py script. This script cuts out 317 # sections enclosed in markers such as this: 318 # 319 # #--! DEBUG 320 # statements 321 # #--! DEBUG 322 # 323 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 324 325 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 326 #--! parsedebug-start 327 lookahead = None # Current lookahead symbol 328 lookaheadstack = [] # Stack of lookahead symbols 329 actions = self.action # Local reference to action table (to avoid lookup on self.) 330 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 331 prod = self.productions # Local reference to production list (to avoid lookup on self.) 332 defaulted_states = self.defaulted_states # Local reference to defaulted states 333 pslice = YaccProduction(None) # Production object passed to grammar rules 334 errorcount = 0 # Used during error recovery 335 336 #--! DEBUG 337 debug.info('PLY: PARSE DEBUG START') 338 #--! DEBUG 339 340 # If no lexer was given, we will try to use the lex module 341 if not lexer: 342 from . import lex 343 lexer = lex.lexer 344 345 # Set up the lexer and parser objects on pslice 346 pslice.lexer = lexer 347 pslice.parser = self 348 349 # If input was supplied, pass to lexer 350 if input is not None: 351 lexer.input(input) 352 353 if tokenfunc is None: 354 # Tokenize function 355 get_token = lexer.token 356 else: 357 get_token = tokenfunc 358 359 # Set the parser() token method (sometimes used in error recovery) 360 self.token = get_token 361 362 # Set up the state and symbol stacks 363 364 statestack = [] # Stack of parsing states 365 self.statestack = statestack 366 symstack = [] # Stack of grammar symbols 367 self.symstack = symstack 368 369 pslice.stack = symstack # Put in the production 370 errtoken = None # Err token 371 372 # The start state is assumed to be (0,$end) 373 374 statestack.append(0) 375 sym = YaccSymbol() 376 sym.type = '$end' 377 symstack.append(sym) 378 state = 0 379 while True: 380 # Get the next symbol on the input. If a lookahead symbol 381 # is already set, we just use that. Otherwise, we'll pull 382 # the next token off of the lookaheadstack or from the lexer 383 384 #--! DEBUG 385 debug.debug('') 386 debug.debug('State : %s', state) 387 #--! DEBUG 388 389 if state not in defaulted_states: 390 if not lookahead: 391 if not lookaheadstack: 392 lookahead = get_token() # Get the next token 393 else: 394 lookahead = lookaheadstack.pop() 395 if not lookahead: 396 lookahead = YaccSymbol() 397 lookahead.type = '$end' 398 399 # Check the action table 400 ltype = lookahead.type 401 t = actions[state].get(ltype) 402 else: 403 t = defaulted_states[state] 404 #--! DEBUG 405 debug.debug('Defaulted state %s: Reduce using %d', state, -t) 406 #--! DEBUG 407 408 #--! DEBUG 409 debug.debug('Stack : %s', 410 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 411 #--! DEBUG 412 413 if t is not None: 414 if t > 0: 415 # shift a symbol on the stack 416 statestack.append(t) 417 state = t 418 419 #--! DEBUG 420 debug.debug('Action : Shift and goto state %s', t) 421 #--! DEBUG 422 423 symstack.append(lookahead) 424 lookahead = None 425 426 # Decrease error count on successful shift 427 if errorcount: 428 errorcount -= 1 429 continue 430 431 if t < 0: 432 # reduce a symbol on the stack, emit a production 433 p = prod[-t] 434 pname = p.name 435 plen = p.len 436 437 # Get production function 438 sym = YaccSymbol() 439 sym.type = pname # Production name 440 sym.value = None 441 442 #--! DEBUG 443 if plen: 444 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, 445 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', 446 goto[statestack[-1-plen]][pname]) 447 else: 448 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], 449 goto[statestack[-1]][pname]) 450 451 #--! DEBUG 452 453 if plen: 454 targ = symstack[-plen-1:] 455 targ[0] = sym 456 457 #--! TRACKING 458 if tracking: 459 t1 = targ[1] 460 sym.lineno = t1.lineno 461 sym.lexpos = t1.lexpos 462 t1 = targ[-1] 463 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 464 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 465 #--! TRACKING 466 467 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 468 # The code enclosed in this section is duplicated 469 # below as a performance optimization. Make sure 470 # changes get made in both locations. 471 472 pslice.slice = targ 473 474 try: 475 # Call the grammar rule with our special slice object 476 del symstack[-plen:] 477 del statestack[-plen:] 478 p.callable(pslice) 479 #--! DEBUG 480 debug.info('Result : %s', format_result(pslice[0])) 481 #--! DEBUG 482 symstack.append(sym) 483 state = goto[statestack[-1]][pname] 484 statestack.append(state) 485 except SyntaxError: 486 # If an error was set. Enter error recovery state 487 lookaheadstack.append(lookahead) 488 symstack.pop() 489 statestack.pop() 490 state = statestack[-1] 491 sym.type = 'error' 492 lookahead = sym 493 errorcount = error_count 494 self.errorok = False 495 continue 496 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 497 498 else: 499 500 #--! TRACKING 501 if tracking: 502 sym.lineno = lexer.lineno 503 sym.lexpos = lexer.lexpos 504 #--! TRACKING 505 506 targ = [sym] 507 508 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 509 # The code enclosed in this section is duplicated 510 # above as a performance optimization. Make sure 511 # changes get made in both locations. 512 513 pslice.slice = targ 514 515 try: 516 # Call the grammar rule with our special slice object 517 p.callable(pslice) 518 #--! DEBUG 519 debug.info('Result : %s', format_result(pslice[0])) 520 #--! DEBUG 521 symstack.append(sym) 522 state = goto[statestack[-1]][pname] 523 statestack.append(state) 524 except SyntaxError: 525 # If an error was set. Enter error recovery state 526 lookaheadstack.append(lookahead) 527 symstack.pop() 528 statestack.pop() 529 state = statestack[-1] 530 sym.type = 'error' 531 lookahead = sym 532 errorcount = error_count 533 self.errorok = False 534 continue 535 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 536 537 if t == 0: 538 n = symstack[-1] 539 result = getattr(n, 'value', None) 540 #--! DEBUG 541 debug.info('Done : Returning %s', format_result(result)) 542 debug.info('PLY: PARSE DEBUG END') 543 #--! DEBUG 544 return result 545 546 if t is None: 547 548 #--! DEBUG 549 debug.error('Error : %s', 550 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 551 #--! DEBUG 552 553 # We have some kind of parsing error here. To handle 554 # this, we are going to push the current token onto 555 # the tokenstack and replace it with an 'error' token. 556 # If there are any synchronization rules, they may 557 # catch it. 558 # 559 # In addition to pushing the error token, we call call 560 # the user defined p_error() function if this is the 561 # first syntax error. This function is only called if 562 # errorcount == 0. 563 if errorcount == 0 or self.errorok: 564 errorcount = error_count 565 self.errorok = False 566 errtoken = lookahead 567 if errtoken.type == '$end': 568 errtoken = None # End of file! 569 if self.errorfunc: 570 if errtoken and not hasattr(errtoken, 'lexer'): 571 errtoken.lexer = lexer 572 tok = call_errorfunc(self.errorfunc, errtoken, self) 573 if self.errorok: 574 # User must have done some kind of panic 575 # mode recovery on their own. The 576 # returned token is the next lookahead 577 lookahead = tok 578 errtoken = None 579 continue 580 else: 581 if errtoken: 582 if hasattr(errtoken, 'lineno'): 583 lineno = lookahead.lineno 584 else: 585 lineno = 0 586 if lineno: 587 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 588 else: 589 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 590 else: 591 sys.stderr.write('yacc: Parse error in input. EOF\n') 592 return 593 594 else: 595 errorcount = error_count 596 597 # case 1: the statestack only has 1 entry on it. If we're in this state, the 598 # entire parse has been rolled back and we're completely hosed. The token is 599 # discarded and we just keep going. 600 601 if len(statestack) <= 1 and lookahead.type != '$end': 602 lookahead = None 603 errtoken = None 604 state = 0 605 # Nuke the pushback stack 606 del lookaheadstack[:] 607 continue 608 609 # case 2: the statestack has a couple of entries on it, but we're 610 # at the end of the file. nuke the top entry and generate an error token 611 612 # Start nuking entries on the stack 613 if lookahead.type == '$end': 614 # Whoa. We're really hosed here. Bail out 615 return 616 617 if lookahead.type != 'error': 618 sym = symstack[-1] 619 if sym.type == 'error': 620 # Hmmm. Error is on top of stack, we'll just nuke input 621 # symbol and continue 622 #--! TRACKING 623 if tracking: 624 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 625 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 626 #--! TRACKING 627 lookahead = None 628 continue 629 630 # Create the error symbol for the first time and make it the new lookahead symbol 631 t = YaccSymbol() 632 t.type = 'error' 633 634 if hasattr(lookahead, 'lineno'): 635 t.lineno = t.endlineno = lookahead.lineno 636 if hasattr(lookahead, 'lexpos'): 637 t.lexpos = t.endlexpos = lookahead.lexpos 638 t.value = lookahead 639 lookaheadstack.append(lookahead) 640 lookahead = t 641 else: 642 sym = symstack.pop() 643 #--! TRACKING 644 if tracking: 645 lookahead.lineno = sym.lineno 646 lookahead.lexpos = sym.lexpos 647 #--! TRACKING 648 statestack.pop() 649 state = statestack[-1] 650 651 continue 652 653 # Call an error function here 654 raise RuntimeError('yacc: internal parser error!!!\n') 655 656 #--! parsedebug-end 657 658 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 659 # parseopt(). 660 # 661 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! 662 # This code is automatically generated by the ply/ygen.py script. Make 663 # changes to the parsedebug() method instead. 664 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 665 666 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 667 #--! parseopt-start 668 lookahead = None # Current lookahead symbol 669 lookaheadstack = [] # Stack of lookahead symbols 670 actions = self.action # Local reference to action table (to avoid lookup on self.) 671 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 672 prod = self.productions # Local reference to production list (to avoid lookup on self.) 673 defaulted_states = self.defaulted_states # Local reference to defaulted states 674 pslice = YaccProduction(None) # Production object passed to grammar rules 675 errorcount = 0 # Used during error recovery 676 677 678 # If no lexer was given, we will try to use the lex module 679 if not lexer: 680 from . import lex 681 lexer = lex.lexer 682 683 # Set up the lexer and parser objects on pslice 684 pslice.lexer = lexer 685 pslice.parser = self 686 687 # If input was supplied, pass to lexer 688 if input is not None: 689 lexer.input(input) 690 691 if tokenfunc is None: 692 # Tokenize function 693 get_token = lexer.token 694 else: 695 get_token = tokenfunc 696 697 # Set the parser() token method (sometimes used in error recovery) 698 self.token = get_token 699 700 # Set up the state and symbol stacks 701 702 statestack = [] # Stack of parsing states 703 self.statestack = statestack 704 symstack = [] # Stack of grammar symbols 705 self.symstack = symstack 706 707 pslice.stack = symstack # Put in the production 708 errtoken = None # Err token 709 710 # The start state is assumed to be (0,$end) 711 712 statestack.append(0) 713 sym = YaccSymbol() 714 sym.type = '$end' 715 symstack.append(sym) 716 state = 0 717 while True: 718 # Get the next symbol on the input. If a lookahead symbol 719 # is already set, we just use that. Otherwise, we'll pull 720 # the next token off of the lookaheadstack or from the lexer 721 722 723 if state not in defaulted_states: 724 if not lookahead: 725 if not lookaheadstack: 726 lookahead = get_token() # Get the next token 727 else: 728 lookahead = lookaheadstack.pop() 729 if not lookahead: 730 lookahead = YaccSymbol() 731 lookahead.type = '$end' 732 733 # Check the action table 734 ltype = lookahead.type 735 t = actions[state].get(ltype) 736 else: 737 t = defaulted_states[state] 738 739 740 if t is not None: 741 if t > 0: 742 # shift a symbol on the stack 743 statestack.append(t) 744 state = t 745 746 747 symstack.append(lookahead) 748 lookahead = None 749 750 # Decrease error count on successful shift 751 if errorcount: 752 errorcount -= 1 753 continue 754 755 if t < 0: 756 # reduce a symbol on the stack, emit a production 757 p = prod[-t] 758 pname = p.name 759 plen = p.len 760 761 # Get production function 762 sym = YaccSymbol() 763 sym.type = pname # Production name 764 sym.value = None 765 766 767 if plen: 768 targ = symstack[-plen-1:] 769 targ[0] = sym 770 771 #--! TRACKING 772 if tracking: 773 t1 = targ[1] 774 sym.lineno = t1.lineno 775 sym.lexpos = t1.lexpos 776 t1 = targ[-1] 777 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 778 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 779 #--! TRACKING 780 781 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 782 # The code enclosed in this section is duplicated 783 # below as a performance optimization. Make sure 784 # changes get made in both locations. 785 786 pslice.slice = targ 787 788 try: 789 # Call the grammar rule with our special slice object 790 del symstack[-plen:] 791 del statestack[-plen:] 792 p.callable(pslice) 793 symstack.append(sym) 794 state = goto[statestack[-1]][pname] 795 statestack.append(state) 796 except SyntaxError: 797 # If an error was set. Enter error recovery state 798 lookaheadstack.append(lookahead) 799 symstack.pop() 800 statestack.pop() 801 state = statestack[-1] 802 sym.type = 'error' 803 lookahead = sym 804 errorcount = error_count 805 self.errorok = False 806 continue 807 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 808 809 else: 810 811 #--! TRACKING 812 if tracking: 813 sym.lineno = lexer.lineno 814 sym.lexpos = lexer.lexpos 815 #--! TRACKING 816 817 targ = [sym] 818 819 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 820 # The code enclosed in this section is duplicated 821 # above as a performance optimization. Make sure 822 # changes get made in both locations. 823 824 pslice.slice = targ 825 826 try: 827 # Call the grammar rule with our special slice object 828 p.callable(pslice) 829 symstack.append(sym) 830 state = goto[statestack[-1]][pname] 831 statestack.append(state) 832 except SyntaxError: 833 # If an error was set. Enter error recovery state 834 lookaheadstack.append(lookahead) 835 symstack.pop() 836 statestack.pop() 837 state = statestack[-1] 838 sym.type = 'error' 839 lookahead = sym 840 errorcount = error_count 841 self.errorok = False 842 continue 843 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 844 845 if t == 0: 846 n = symstack[-1] 847 result = getattr(n, 'value', None) 848 return result 849 850 if t is None: 851 852 853 # We have some kind of parsing error here. To handle 854 # this, we are going to push the current token onto 855 # the tokenstack and replace it with an 'error' token. 856 # If there are any synchronization rules, they may 857 # catch it. 858 # 859 # In addition to pushing the error token, we call call 860 # the user defined p_error() function if this is the 861 # first syntax error. This function is only called if 862 # errorcount == 0. 863 if errorcount == 0 or self.errorok: 864 errorcount = error_count 865 self.errorok = False 866 errtoken = lookahead 867 if errtoken.type == '$end': 868 errtoken = None # End of file! 869 if self.errorfunc: 870 if errtoken and not hasattr(errtoken, 'lexer'): 871 errtoken.lexer = lexer 872 tok = call_errorfunc(self.errorfunc, errtoken, self) 873 if self.errorok: 874 # User must have done some kind of panic 875 # mode recovery on their own. The 876 # returned token is the next lookahead 877 lookahead = tok 878 errtoken = None 879 continue 880 else: 881 if errtoken: 882 if hasattr(errtoken, 'lineno'): 883 lineno = lookahead.lineno 884 else: 885 lineno = 0 886 if lineno: 887 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 888 else: 889 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 890 else: 891 sys.stderr.write('yacc: Parse error in input. EOF\n') 892 return 893 894 else: 895 errorcount = error_count 896 897 # case 1: the statestack only has 1 entry on it. If we're in this state, the 898 # entire parse has been rolled back and we're completely hosed. The token is 899 # discarded and we just keep going. 900 901 if len(statestack) <= 1 and lookahead.type != '$end': 902 lookahead = None 903 errtoken = None 904 state = 0 905 # Nuke the pushback stack 906 del lookaheadstack[:] 907 continue 908 909 # case 2: the statestack has a couple of entries on it, but we're 910 # at the end of the file. nuke the top entry and generate an error token 911 912 # Start nuking entries on the stack 913 if lookahead.type == '$end': 914 # Whoa. We're really hosed here. Bail out 915 return 916 917 if lookahead.type != 'error': 918 sym = symstack[-1] 919 if sym.type == 'error': 920 # Hmmm. Error is on top of stack, we'll just nuke input 921 # symbol and continue 922 #--! TRACKING 923 if tracking: 924 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 925 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 926 #--! TRACKING 927 lookahead = None 928 continue 929 930 # Create the error symbol for the first time and make it the new lookahead symbol 931 t = YaccSymbol() 932 t.type = 'error' 933 934 if hasattr(lookahead, 'lineno'): 935 t.lineno = t.endlineno = lookahead.lineno 936 if hasattr(lookahead, 'lexpos'): 937 t.lexpos = t.endlexpos = lookahead.lexpos 938 t.value = lookahead 939 lookaheadstack.append(lookahead) 940 lookahead = t 941 else: 942 sym = symstack.pop() 943 #--! TRACKING 944 if tracking: 945 lookahead.lineno = sym.lineno 946 lookahead.lexpos = sym.lexpos 947 #--! TRACKING 948 statestack.pop() 949 state = statestack[-1] 950 951 continue 952 953 # Call an error function here 954 raise RuntimeError('yacc: internal parser error!!!\n') 955 956 #--! parseopt-end 957 958 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 959 # parseopt_notrack(). 960 # 961 # Optimized version of parseopt() with line number tracking removed. 962 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated 963 # by the ply/ygen.py script. Make changes to the parsedebug() method instead. 964 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 965 966 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 967 #--! parseopt-notrack-start 968 lookahead = None # Current lookahead symbol 969 lookaheadstack = [] # Stack of lookahead symbols 970 actions = self.action # Local reference to action table (to avoid lookup on self.) 971 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 972 prod = self.productions # Local reference to production list (to avoid lookup on self.) 973 defaulted_states = self.defaulted_states # Local reference to defaulted states 974 pslice = YaccProduction(None) # Production object passed to grammar rules 975 errorcount = 0 # Used during error recovery 976 977 978 # If no lexer was given, we will try to use the lex module 979 if not lexer: 980 from . import lex 981 lexer = lex.lexer 982 983 # Set up the lexer and parser objects on pslice 984 pslice.lexer = lexer 985 pslice.parser = self 986 987 # If input was supplied, pass to lexer 988 if input is not None: 989 lexer.input(input) 990 991 if tokenfunc is None: 992 # Tokenize function 993 get_token = lexer.token 994 else: 995 get_token = tokenfunc 996 997 # Set the parser() token method (sometimes used in error recovery) 998 self.token = get_token 999 1000 # Set up the state and symbol stacks 1001 1002 statestack = [] # Stack of parsing states 1003 self.statestack = statestack 1004 symstack = [] # Stack of grammar symbols 1005 self.symstack = symstack 1006 1007 pslice.stack = symstack # Put in the production 1008 errtoken = None # Err token 1009 1010 # The start state is assumed to be (0,$end) 1011 1012 statestack.append(0) 1013 sym = YaccSymbol() 1014 sym.type = '$end' 1015 symstack.append(sym) 1016 state = 0 1017 while True: 1018 # Get the next symbol on the input. If a lookahead symbol 1019 # is already set, we just use that. Otherwise, we'll pull 1020 # the next token off of the lookaheadstack or from the lexer 1021 1022 1023 if state not in defaulted_states: 1024 if not lookahead: 1025 if not lookaheadstack: 1026 lookahead = get_token() # Get the next token 1027 else: 1028 lookahead = lookaheadstack.pop() 1029 if not lookahead: 1030 lookahead = YaccSymbol() 1031 lookahead.type = '$end' 1032 1033 # Check the action table 1034 ltype = lookahead.type 1035 t = actions[state].get(ltype) 1036 else: 1037 t = defaulted_states[state] 1038 1039 1040 if t is not None: 1041 if t > 0: 1042 # shift a symbol on the stack 1043 statestack.append(t) 1044 state = t 1045 1046 1047 symstack.append(lookahead) 1048 lookahead = None 1049 1050 # Decrease error count on successful shift 1051 if errorcount: 1052 errorcount -= 1 1053 continue 1054 1055 if t < 0: 1056 # reduce a symbol on the stack, emit a production 1057 p = prod[-t] 1058 pname = p.name 1059 plen = p.len 1060 1061 # Get production function 1062 sym = YaccSymbol() 1063 sym.type = pname # Production name 1064 sym.value = None 1065 1066 1067 if plen: 1068 targ = symstack[-plen-1:] 1069 targ[0] = sym 1070 1071 1072 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1073 # The code enclosed in this section is duplicated 1074 # below as a performance optimization. Make sure 1075 # changes get made in both locations. 1076 1077 pslice.slice = targ 1078 1079 try: 1080 # Call the grammar rule with our special slice object 1081 del symstack[-plen:] 1082 del statestack[-plen:] 1083 p.callable(pslice) 1084 symstack.append(sym) 1085 state = goto[statestack[-1]][pname] 1086 statestack.append(state) 1087 except SyntaxError: 1088 # If an error was set. Enter error recovery state 1089 lookaheadstack.append(lookahead) 1090 symstack.pop() 1091 statestack.pop() 1092 state = statestack[-1] 1093 sym.type = 'error' 1094 lookahead = sym 1095 errorcount = error_count 1096 self.errorok = False 1097 continue 1098 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1099 1100 else: 1101 1102 1103 targ = [sym] 1104 1105 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1106 # The code enclosed in this section is duplicated 1107 # above as a performance optimization. Make sure 1108 # changes get made in both locations. 1109 1110 pslice.slice = targ 1111 1112 try: 1113 # Call the grammar rule with our special slice object 1114 p.callable(pslice) 1115 symstack.append(sym) 1116 state = goto[statestack[-1]][pname] 1117 statestack.append(state) 1118 except SyntaxError: 1119 # If an error was set. Enter error recovery state 1120 lookaheadstack.append(lookahead) 1121 symstack.pop() 1122 statestack.pop() 1123 state = statestack[-1] 1124 sym.type = 'error' 1125 lookahead = sym 1126 errorcount = error_count 1127 self.errorok = False 1128 continue 1129 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1130 1131 if t == 0: 1132 n = symstack[-1] 1133 result = getattr(n, 'value', None) 1134 return result 1135 1136 if t is None: 1137 1138 1139 # We have some kind of parsing error here. To handle 1140 # this, we are going to push the current token onto 1141 # the tokenstack and replace it with an 'error' token. 1142 # If there are any synchronization rules, they may 1143 # catch it. 1144 # 1145 # In addition to pushing the error token, we call call 1146 # the user defined p_error() function if this is the 1147 # first syntax error. This function is only called if 1148 # errorcount == 0. 1149 if errorcount == 0 or self.errorok: 1150 errorcount = error_count 1151 self.errorok = False 1152 errtoken = lookahead 1153 if errtoken.type == '$end': 1154 errtoken = None # End of file! 1155 if self.errorfunc: 1156 if errtoken and not hasattr(errtoken, 'lexer'): 1157 errtoken.lexer = lexer 1158 tok = call_errorfunc(self.errorfunc, errtoken, self) 1159 if self.errorok: 1160 # User must have done some kind of panic 1161 # mode recovery on their own. The 1162 # returned token is the next lookahead 1163 lookahead = tok 1164 errtoken = None 1165 continue 1166 else: 1167 if errtoken: 1168 if hasattr(errtoken, 'lineno'): 1169 lineno = lookahead.lineno 1170 else: 1171 lineno = 0 1172 if lineno: 1173 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 1174 else: 1175 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 1176 else: 1177 sys.stderr.write('yacc: Parse error in input. EOF\n') 1178 return 1179 1180 else: 1181 errorcount = error_count 1182 1183 # case 1: the statestack only has 1 entry on it. If we're in this state, the 1184 # entire parse has been rolled back and we're completely hosed. The token is 1185 # discarded and we just keep going. 1186 1187 if len(statestack) <= 1 and lookahead.type != '$end': 1188 lookahead = None 1189 errtoken = None 1190 state = 0 1191 # Nuke the pushback stack 1192 del lookaheadstack[:] 1193 continue 1194 1195 # case 2: the statestack has a couple of entries on it, but we're 1196 # at the end of the file. nuke the top entry and generate an error token 1197 1198 # Start nuking entries on the stack 1199 if lookahead.type == '$end': 1200 # Whoa. We're really hosed here. Bail out 1201 return 1202 1203 if lookahead.type != 'error': 1204 sym = symstack[-1] 1205 if sym.type == 'error': 1206 # Hmmm. Error is on top of stack, we'll just nuke input 1207 # symbol and continue 1208 lookahead = None 1209 continue 1210 1211 # Create the error symbol for the first time and make it the new lookahead symbol 1212 t = YaccSymbol() 1213 t.type = 'error' 1214 1215 if hasattr(lookahead, 'lineno'): 1216 t.lineno = t.endlineno = lookahead.lineno 1217 if hasattr(lookahead, 'lexpos'): 1218 t.lexpos = t.endlexpos = lookahead.lexpos 1219 t.value = lookahead 1220 lookaheadstack.append(lookahead) 1221 lookahead = t 1222 else: 1223 sym = symstack.pop() 1224 statestack.pop() 1225 state = statestack[-1] 1226 1227 continue 1228 1229 # Call an error function here 1230 raise RuntimeError('yacc: internal parser error!!!\n') 1231 1232 #--! parseopt-notrack-end 1233 1234# ----------------------------------------------------------------------------- 1235# === Grammar Representation === 1236# 1237# The following functions, classes, and variables are used to represent and 1238# manipulate the rules that make up a grammar. 1239# ----------------------------------------------------------------------------- 1240 1241# regex matching identifiers 1242_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 1243 1244# ----------------------------------------------------------------------------- 1245# class Production: 1246# 1247# This class stores the raw information about a single production or grammar rule. 1248# A grammar rule refers to a specification such as this: 1249# 1250# expr : expr PLUS term 1251# 1252# Here are the basic attributes defined on all productions 1253# 1254# name - Name of the production. For example 'expr' 1255# prod - A list of symbols on the right side ['expr','PLUS','term'] 1256# prec - Production precedence level 1257# number - Production number. 1258# func - Function that executes on reduce 1259# file - File where production function is defined 1260# lineno - Line number where production function is defined 1261# 1262# The following attributes are defined or optional. 1263# 1264# len - Length of the production (number of symbols on right hand side) 1265# usyms - Set of unique symbols found in the production 1266# ----------------------------------------------------------------------------- 1267 1268class Production(object): 1269 reduced = 0 1270 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): 1271 self.name = name 1272 self.prod = tuple(prod) 1273 self.number = number 1274 self.func = func 1275 self.callable = None 1276 self.file = file 1277 self.line = line 1278 self.prec = precedence 1279 1280 # Internal settings used during table construction 1281 1282 self.len = len(self.prod) # Length of the production 1283 1284 # Create a list of unique production symbols used in the production 1285 self.usyms = [] 1286 for s in self.prod: 1287 if s not in self.usyms: 1288 self.usyms.append(s) 1289 1290 # List of all LR items for the production 1291 self.lr_items = [] 1292 self.lr_next = None 1293 1294 # Create a string representation 1295 if self.prod: 1296 self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) 1297 else: 1298 self.str = '%s -> <empty>' % self.name 1299 1300 def __str__(self): 1301 return self.str 1302 1303 def __repr__(self): 1304 return 'Production(' + str(self) + ')' 1305 1306 def __len__(self): 1307 return len(self.prod) 1308 1309 def __nonzero__(self): 1310 return 1 1311 1312 def __getitem__(self, index): 1313 return self.prod[index] 1314 1315 # Return the nth lr_item from the production (or None if at the end) 1316 def lr_item(self, n): 1317 if n > len(self.prod): 1318 return None 1319 p = LRItem(self, n) 1320 # Precompute the list of productions immediately following. 1321 try: 1322 p.lr_after = Prodnames[p.prod[n+1]] 1323 except (IndexError, KeyError): 1324 p.lr_after = [] 1325 try: 1326 p.lr_before = p.prod[n-1] 1327 except IndexError: 1328 p.lr_before = None 1329 return p 1330 1331 # Bind the production function name to a callable 1332 def bind(self, pdict): 1333 if self.func: 1334 self.callable = pdict[self.func] 1335 1336# This class serves as a minimal standin for Production objects when 1337# reading table data from files. It only contains information 1338# actually used by the LR parsing engine, plus some additional 1339# debugging information. 1340class MiniProduction(object): 1341 def __init__(self, str, name, len, func, file, line): 1342 self.name = name 1343 self.len = len 1344 self.func = func 1345 self.callable = None 1346 self.file = file 1347 self.line = line 1348 self.str = str 1349 1350 def __str__(self): 1351 return self.str 1352 1353 def __repr__(self): 1354 return 'MiniProduction(%s)' % self.str 1355 1356 # Bind the production function name to a callable 1357 def bind(self, pdict): 1358 if self.func: 1359 self.callable = pdict[self.func] 1360 1361 1362# ----------------------------------------------------------------------------- 1363# class LRItem 1364# 1365# This class represents a specific stage of parsing a production rule. For 1366# example: 1367# 1368# expr : expr . PLUS term 1369# 1370# In the above, the "." represents the current location of the parse. Here 1371# basic attributes: 1372# 1373# name - Name of the production. For example 'expr' 1374# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 1375# number - Production number. 1376# 1377# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 1378# then lr_next refers to 'expr -> expr PLUS . term' 1379# lr_index - LR item index (location of the ".") in the prod list. 1380# lookaheads - LALR lookahead symbols for this item 1381# len - Length of the production (number of symbols on right hand side) 1382# lr_after - List of all productions that immediately follow 1383# lr_before - Grammar symbol immediately before 1384# ----------------------------------------------------------------------------- 1385 1386class LRItem(object): 1387 def __init__(self, p, n): 1388 self.name = p.name 1389 self.prod = list(p.prod) 1390 self.number = p.number 1391 self.lr_index = n 1392 self.lookaheads = {} 1393 self.prod.insert(n, '.') 1394 self.prod = tuple(self.prod) 1395 self.len = len(self.prod) 1396 self.usyms = p.usyms 1397 1398 def __str__(self): 1399 if self.prod: 1400 s = '%s -> %s' % (self.name, ' '.join(self.prod)) 1401 else: 1402 s = '%s -> <empty>' % self.name 1403 return s 1404 1405 def __repr__(self): 1406 return 'LRItem(' + str(self) + ')' 1407 1408# ----------------------------------------------------------------------------- 1409# rightmost_terminal() 1410# 1411# Return the rightmost terminal from a list of symbols. Used in add_production() 1412# ----------------------------------------------------------------------------- 1413def rightmost_terminal(symbols, terminals): 1414 i = len(symbols) - 1 1415 while i >= 0: 1416 if symbols[i] in terminals: 1417 return symbols[i] 1418 i -= 1 1419 return None 1420 1421# ----------------------------------------------------------------------------- 1422# === GRAMMAR CLASS === 1423# 1424# The following class represents the contents of the specified grammar along 1425# with various computed properties such as first sets, follow sets, LR items, etc. 1426# This data is used for critical parts of the table generation process later. 1427# ----------------------------------------------------------------------------- 1428 1429class GrammarError(YaccError): 1430 pass 1431 1432class Grammar(object): 1433 def __init__(self, terminals): 1434 self.Productions = [None] # A list of all of the productions. The first 1435 # entry is always reserved for the purpose of 1436 # building an augmented grammar 1437 1438 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all 1439 # productions of that nonterminal. 1440 1441 self.Prodmap = {} # A dictionary that is only used to detect duplicate 1442 # productions. 1443 1444 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a 1445 # list of the rules where they are used. 1446 1447 for term in terminals: 1448 self.Terminals[term] = [] 1449 1450 self.Terminals['error'] = [] 1451 1452 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list 1453 # of rule numbers where they are used. 1454 1455 self.First = {} # A dictionary of precomputed FIRST(x) symbols 1456 1457 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols 1458 1459 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the 1460 # form ('right',level) or ('nonassoc', level) or ('left',level) 1461 1462 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. 1463 # This is only used to provide error checking and to generate 1464 # a warning about unused precedence rules. 1465 1466 self.Start = None # Starting symbol for the grammar 1467 1468 1469 def __len__(self): 1470 return len(self.Productions) 1471 1472 def __getitem__(self, index): 1473 return self.Productions[index] 1474 1475 # ----------------------------------------------------------------------------- 1476 # set_precedence() 1477 # 1478 # Sets the precedence for a given terminal. assoc is the associativity such as 1479 # 'left','right', or 'nonassoc'. level is a numeric level. 1480 # 1481 # ----------------------------------------------------------------------------- 1482 1483 def set_precedence(self, term, assoc, level): 1484 assert self.Productions == [None], 'Must call set_precedence() before add_production()' 1485 if term in self.Precedence: 1486 raise GrammarError('Precedence already specified for terminal %r' % term) 1487 if assoc not in ['left', 'right', 'nonassoc']: 1488 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 1489 self.Precedence[term] = (assoc, level) 1490 1491 # ----------------------------------------------------------------------------- 1492 # add_production() 1493 # 1494 # Given an action function, this function assembles a production rule and 1495 # computes its precedence level. 1496 # 1497 # The production rule is supplied as a list of symbols. For example, 1498 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 1499 # symbols ['expr','PLUS','term']. 1500 # 1501 # Precedence is determined by the precedence of the right-most non-terminal 1502 # or the precedence of a terminal specified by %prec. 1503 # 1504 # A variety of error checks are performed to make sure production symbols 1505 # are valid and that %prec is used correctly. 1506 # ----------------------------------------------------------------------------- 1507 1508 def add_production(self, prodname, syms, func=None, file='', line=0): 1509 1510 if prodname in self.Terminals: 1511 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) 1512 if prodname == 'error': 1513 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) 1514 if not _is_identifier.match(prodname): 1515 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) 1516 1517 # Look for literal tokens 1518 for n, s in enumerate(syms): 1519 if s[0] in "'\"": 1520 try: 1521 c = eval(s) 1522 if (len(c) > 1): 1523 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % 1524 (file, line, s, prodname)) 1525 if c not in self.Terminals: 1526 self.Terminals[c] = [] 1527 syms[n] = c 1528 continue 1529 except SyntaxError: 1530 pass 1531 if not _is_identifier.match(s) and s != '%prec': 1532 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) 1533 1534 # Determine the precedence level 1535 if '%prec' in syms: 1536 if syms[-1] == '%prec': 1537 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) 1538 if syms[-2] != '%prec': 1539 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % 1540 (file, line)) 1541 precname = syms[-1] 1542 prodprec = self.Precedence.get(precname) 1543 if not prodprec: 1544 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) 1545 else: 1546 self.UsedPrecedence.add(precname) 1547 del syms[-2:] # Drop %prec from the rule 1548 else: 1549 # If no %prec, precedence is determined by the rightmost terminal symbol 1550 precname = rightmost_terminal(syms, self.Terminals) 1551 prodprec = self.Precedence.get(precname, ('right', 0)) 1552 1553 # See if the rule is already in the rulemap 1554 map = '%s -> %s' % (prodname, syms) 1555 if map in self.Prodmap: 1556 m = self.Prodmap[map] 1557 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + 1558 'Previous definition at %s:%d' % (m.file, m.line)) 1559 1560 # From this point on, everything is valid. Create a new Production instance 1561 pnumber = len(self.Productions) 1562 if prodname not in self.Nonterminals: 1563 self.Nonterminals[prodname] = [] 1564 1565 # Add the production number to Terminals and Nonterminals 1566 for t in syms: 1567 if t in self.Terminals: 1568 self.Terminals[t].append(pnumber) 1569 else: 1570 if t not in self.Nonterminals: 1571 self.Nonterminals[t] = [] 1572 self.Nonterminals[t].append(pnumber) 1573 1574 # Create a production and add it to the list of productions 1575 p = Production(pnumber, prodname, syms, prodprec, func, file, line) 1576 self.Productions.append(p) 1577 self.Prodmap[map] = p 1578 1579 # Add to the global productions list 1580 try: 1581 self.Prodnames[prodname].append(p) 1582 except KeyError: 1583 self.Prodnames[prodname] = [p] 1584 1585 # ----------------------------------------------------------------------------- 1586 # set_start() 1587 # 1588 # Sets the starting symbol and creates the augmented grammar. Production 1589 # rule 0 is S' -> start where start is the start symbol. 1590 # ----------------------------------------------------------------------------- 1591 1592 def set_start(self, start=None): 1593 if not start: 1594 start = self.Productions[1].name 1595 if start not in self.Nonterminals: 1596 raise GrammarError('start symbol %s undefined' % start) 1597 self.Productions[0] = Production(0, "S'", [start]) 1598 self.Nonterminals[start].append(0) 1599 self.Start = start 1600 1601 # ----------------------------------------------------------------------------- 1602 # find_unreachable() 1603 # 1604 # Find all of the nonterminal symbols that can't be reached from the starting 1605 # symbol. Returns a list of nonterminals that can't be reached. 1606 # ----------------------------------------------------------------------------- 1607 1608 def find_unreachable(self): 1609 1610 # Mark all symbols that are reachable from a symbol s 1611 def mark_reachable_from(s): 1612 if s in reachable: 1613 return 1614 reachable.add(s) 1615 for p in self.Prodnames.get(s, []): 1616 for r in p.prod: 1617 mark_reachable_from(r) 1618 1619 reachable = set() 1620 mark_reachable_from(self.Productions[0].prod[0]) 1621 return [s for s in self.Nonterminals if s not in reachable] 1622 1623 # ----------------------------------------------------------------------------- 1624 # infinite_cycles() 1625 # 1626 # This function looks at the various parsing rules and tries to detect 1627 # infinite recursion cycles (grammar rules where there is no possible way 1628 # to derive a string of only terminals). 1629 # ----------------------------------------------------------------------------- 1630 1631 def infinite_cycles(self): 1632 terminates = {} 1633 1634 # Terminals: 1635 for t in self.Terminals: 1636 terminates[t] = True 1637 1638 terminates['$end'] = True 1639 1640 # Nonterminals: 1641 1642 # Initialize to false: 1643 for n in self.Nonterminals: 1644 terminates[n] = False 1645 1646 # Then propagate termination until no change: 1647 while True: 1648 some_change = False 1649 for (n, pl) in self.Prodnames.items(): 1650 # Nonterminal n terminates iff any of its productions terminates. 1651 for p in pl: 1652 # Production p terminates iff all of its rhs symbols terminate. 1653 for s in p.prod: 1654 if not terminates[s]: 1655 # The symbol s does not terminate, 1656 # so production p does not terminate. 1657 p_terminates = False 1658 break 1659 else: 1660 # didn't break from the loop, 1661 # so every symbol s terminates 1662 # so production p terminates. 1663 p_terminates = True 1664 1665 if p_terminates: 1666 # symbol n terminates! 1667 if not terminates[n]: 1668 terminates[n] = True 1669 some_change = True 1670 # Don't need to consider any more productions for this n. 1671 break 1672 1673 if not some_change: 1674 break 1675 1676 infinite = [] 1677 for (s, term) in terminates.items(): 1678 if not term: 1679 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 1680 # s is used-but-not-defined, and we've already warned of that, 1681 # so it would be overkill to say that it's also non-terminating. 1682 pass 1683 else: 1684 infinite.append(s) 1685 1686 return infinite 1687 1688 # ----------------------------------------------------------------------------- 1689 # undefined_symbols() 1690 # 1691 # Find all symbols that were used the grammar, but not defined as tokens or 1692 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 1693 # and prod is the production where the symbol was used. 1694 # ----------------------------------------------------------------------------- 1695 def undefined_symbols(self): 1696 result = [] 1697 for p in self.Productions: 1698 if not p: 1699 continue 1700 1701 for s in p.prod: 1702 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 1703 result.append((s, p)) 1704 return result 1705 1706 # ----------------------------------------------------------------------------- 1707 # unused_terminals() 1708 # 1709 # Find all terminals that were defined, but not used by the grammar. Returns 1710 # a list of all symbols. 1711 # ----------------------------------------------------------------------------- 1712 def unused_terminals(self): 1713 unused_tok = [] 1714 for s, v in self.Terminals.items(): 1715 if s != 'error' and not v: 1716 unused_tok.append(s) 1717 1718 return unused_tok 1719 1720 # ------------------------------------------------------------------------------ 1721 # unused_rules() 1722 # 1723 # Find all grammar rules that were defined, but not used (maybe not reachable) 1724 # Returns a list of productions. 1725 # ------------------------------------------------------------------------------ 1726 1727 def unused_rules(self): 1728 unused_prod = [] 1729 for s, v in self.Nonterminals.items(): 1730 if not v: 1731 p = self.Prodnames[s][0] 1732 unused_prod.append(p) 1733 return unused_prod 1734 1735 # ----------------------------------------------------------------------------- 1736 # unused_precedence() 1737 # 1738 # Returns a list of tuples (term,precedence) corresponding to precedence 1739 # rules that were never used by the grammar. term is the name of the terminal 1740 # on which precedence was applied and precedence is a string such as 'left' or 1741 # 'right' corresponding to the type of precedence. 1742 # ----------------------------------------------------------------------------- 1743 1744 def unused_precedence(self): 1745 unused = [] 1746 for termname in self.Precedence: 1747 if not (termname in self.Terminals or termname in self.UsedPrecedence): 1748 unused.append((termname, self.Precedence[termname][0])) 1749 1750 return unused 1751 1752 # ------------------------------------------------------------------------- 1753 # _first() 1754 # 1755 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1756 # 1757 # During execution of compute_first1, the result may be incomplete. 1758 # Afterward (e.g., when called from compute_follow()), it will be complete. 1759 # ------------------------------------------------------------------------- 1760 def _first(self, beta): 1761 1762 # We are computing First(x1,x2,x3,...,xn) 1763 result = [] 1764 for x in beta: 1765 x_produces_empty = False 1766 1767 # Add all the non-<empty> symbols of First[x] to the result. 1768 for f in self.First[x]: 1769 if f == '<empty>': 1770 x_produces_empty = True 1771 else: 1772 if f not in result: 1773 result.append(f) 1774 1775 if x_produces_empty: 1776 # We have to consider the next x in beta, 1777 # i.e. stay in the loop. 1778 pass 1779 else: 1780 # We don't have to consider any further symbols in beta. 1781 break 1782 else: 1783 # There was no 'break' from the loop, 1784 # so x_produces_empty was true for all x in beta, 1785 # so beta produces empty as well. 1786 result.append('<empty>') 1787 1788 return result 1789 1790 # ------------------------------------------------------------------------- 1791 # compute_first() 1792 # 1793 # Compute the value of FIRST1(X) for all symbols 1794 # ------------------------------------------------------------------------- 1795 def compute_first(self): 1796 if self.First: 1797 return self.First 1798 1799 # Terminals: 1800 for t in self.Terminals: 1801 self.First[t] = [t] 1802 1803 self.First['$end'] = ['$end'] 1804 1805 # Nonterminals: 1806 1807 # Initialize to the empty set: 1808 for n in self.Nonterminals: 1809 self.First[n] = [] 1810 1811 # Then propagate symbols until no change: 1812 while True: 1813 some_change = False 1814 for n in self.Nonterminals: 1815 for p in self.Prodnames[n]: 1816 for f in self._first(p.prod): 1817 if f not in self.First[n]: 1818 self.First[n].append(f) 1819 some_change = True 1820 if not some_change: 1821 break 1822 1823 return self.First 1824 1825 # --------------------------------------------------------------------- 1826 # compute_follow() 1827 # 1828 # Computes all of the follow sets for every non-terminal symbol. The 1829 # follow set is the set of all symbols that might follow a given 1830 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 1831 # --------------------------------------------------------------------- 1832 def compute_follow(self, start=None): 1833 # If already computed, return the result 1834 if self.Follow: 1835 return self.Follow 1836 1837 # If first sets not computed yet, do that first. 1838 if not self.First: 1839 self.compute_first() 1840 1841 # Add '$end' to the follow list of the start symbol 1842 for k in self.Nonterminals: 1843 self.Follow[k] = [] 1844 1845 if not start: 1846 start = self.Productions[1].name 1847 1848 self.Follow[start] = ['$end'] 1849 1850 while True: 1851 didadd = False 1852 for p in self.Productions[1:]: 1853 # Here is the production set 1854 for i, B in enumerate(p.prod): 1855 if B in self.Nonterminals: 1856 # Okay. We got a non-terminal in a production 1857 fst = self._first(p.prod[i+1:]) 1858 hasempty = False 1859 for f in fst: 1860 if f != '<empty>' and f not in self.Follow[B]: 1861 self.Follow[B].append(f) 1862 didadd = True 1863 if f == '<empty>': 1864 hasempty = True 1865 if hasempty or i == (len(p.prod)-1): 1866 # Add elements of follow(a) to follow(b) 1867 for f in self.Follow[p.name]: 1868 if f not in self.Follow[B]: 1869 self.Follow[B].append(f) 1870 didadd = True 1871 if not didadd: 1872 break 1873 return self.Follow 1874 1875 1876 # ----------------------------------------------------------------------------- 1877 # build_lritems() 1878 # 1879 # This function walks the list of productions and builds a complete set of the 1880 # LR items. The LR items are stored in two ways: First, they are uniquely 1881 # numbered and placed in the list _lritems. Second, a linked list of LR items 1882 # is built for each production. For example: 1883 # 1884 # E -> E PLUS E 1885 # 1886 # Creates the list 1887 # 1888 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 1889 # ----------------------------------------------------------------------------- 1890 1891 def build_lritems(self): 1892 for p in self.Productions: 1893 lastlri = p 1894 i = 0 1895 lr_items = [] 1896 while True: 1897 if i > len(p): 1898 lri = None 1899 else: 1900 lri = LRItem(p, i) 1901 # Precompute the list of productions immediately following 1902 try: 1903 lri.lr_after = self.Prodnames[lri.prod[i+1]] 1904 except (IndexError, KeyError): 1905 lri.lr_after = [] 1906 try: 1907 lri.lr_before = lri.prod[i-1] 1908 except IndexError: 1909 lri.lr_before = None 1910 1911 lastlri.lr_next = lri 1912 if not lri: 1913 break 1914 lr_items.append(lri) 1915 lastlri = lri 1916 i += 1 1917 p.lr_items = lr_items 1918 1919# ----------------------------------------------------------------------------- 1920# == Class LRTable == 1921# 1922# This basic class represents a basic table of LR parsing information. 1923# Methods for generating the tables are not defined here. They are defined 1924# in the derived class LRGeneratedTable. 1925# ----------------------------------------------------------------------------- 1926 1927class VersionError(YaccError): 1928 pass 1929 1930class LRTable(object): 1931 def __init__(self): 1932 self.lr_action = None 1933 self.lr_goto = None 1934 self.lr_productions = None 1935 self.lr_method = None 1936 1937 def read_table(self, module): 1938 if isinstance(module, types.ModuleType): 1939 parsetab = module 1940 else: 1941 exec('import %s' % module) 1942 parsetab = sys.modules[module] 1943 1944 if parsetab._tabversion != __tabversion__: 1945 raise VersionError('yacc table file version is out of date') 1946 1947 self.lr_action = parsetab._lr_action 1948 self.lr_goto = parsetab._lr_goto 1949 1950 self.lr_productions = [] 1951 for p in parsetab._lr_productions: 1952 self.lr_productions.append(MiniProduction(*p)) 1953 1954 self.lr_method = parsetab._lr_method 1955 return parsetab._lr_signature 1956 1957 def read_pickle(self, filename): 1958 try: 1959 import cPickle as pickle 1960 except ImportError: 1961 import pickle 1962 1963 if not os.path.exists(filename): 1964 raise ImportError 1965 1966 in_f = open(filename, 'rb') 1967 1968 tabversion = pickle.load(in_f) 1969 if tabversion != __tabversion__: 1970 raise VersionError('yacc table file version is out of date') 1971 self.lr_method = pickle.load(in_f) 1972 signature = pickle.load(in_f) 1973 self.lr_action = pickle.load(in_f) 1974 self.lr_goto = pickle.load(in_f) 1975 productions = pickle.load(in_f) 1976 1977 self.lr_productions = [] 1978 for p in productions: 1979 self.lr_productions.append(MiniProduction(*p)) 1980 1981 in_f.close() 1982 return signature 1983 1984 # Bind all production function names to callable objects in pdict 1985 def bind_callables(self, pdict): 1986 for p in self.lr_productions: 1987 p.bind(pdict) 1988 1989 1990# ----------------------------------------------------------------------------- 1991# === LR Generator === 1992# 1993# The following classes and functions are used to generate LR parsing tables on 1994# a grammar. 1995# ----------------------------------------------------------------------------- 1996 1997# ----------------------------------------------------------------------------- 1998# digraph() 1999# traverse() 2000# 2001# The following two functions are used to compute set valued functions 2002# of the form: 2003# 2004# F(x) = F'(x) U U{F(y) | x R y} 2005# 2006# This is used to compute the values of Read() sets as well as FOLLOW sets 2007# in LALR(1) generation. 2008# 2009# Inputs: X - An input set 2010# R - A relation 2011# FP - Set-valued function 2012# ------------------------------------------------------------------------------ 2013 2014def digraph(X, R, FP): 2015 N = {} 2016 for x in X: 2017 N[x] = 0 2018 stack = [] 2019 F = {} 2020 for x in X: 2021 if N[x] == 0: 2022 traverse(x, N, stack, F, X, R, FP) 2023 return F 2024 2025def traverse(x, N, stack, F, X, R, FP): 2026 stack.append(x) 2027 d = len(stack) 2028 N[x] = d 2029 F[x] = FP(x) # F(X) <- F'(x) 2030 2031 rel = R(x) # Get y's related to x 2032 for y in rel: 2033 if N[y] == 0: 2034 traverse(y, N, stack, F, X, R, FP) 2035 N[x] = min(N[x], N[y]) 2036 for a in F.get(y, []): 2037 if a not in F[x]: 2038 F[x].append(a) 2039 if N[x] == d: 2040 N[stack[-1]] = MAXINT 2041 F[stack[-1]] = F[x] 2042 element = stack.pop() 2043 while element != x: 2044 N[stack[-1]] = MAXINT 2045 F[stack[-1]] = F[x] 2046 element = stack.pop() 2047 2048class LALRError(YaccError): 2049 pass 2050 2051# ----------------------------------------------------------------------------- 2052# == LRGeneratedTable == 2053# 2054# This class implements the LR table generation algorithm. There are no 2055# public methods except for write() 2056# ----------------------------------------------------------------------------- 2057 2058class LRGeneratedTable(LRTable): 2059 def __init__(self, grammar, method='LALR', log=None): 2060 if method not in ['SLR', 'LALR']: 2061 raise LALRError('Unsupported method %s' % method) 2062 2063 self.grammar = grammar 2064 self.lr_method = method 2065 2066 # Set up the logger 2067 if not log: 2068 log = NullLogger() 2069 self.log = log 2070 2071 # Internal attributes 2072 self.lr_action = {} # Action table 2073 self.lr_goto = {} # Goto table 2074 self.lr_productions = grammar.Productions # Copy of grammar Production array 2075 self.lr_goto_cache = {} # Cache of computed gotos 2076 self.lr0_cidhash = {} # Cache of closures 2077 2078 self._add_count = 0 # Internal counter used to detect cycles 2079 2080 # Diagonistic information filled in by the table generator 2081 self.sr_conflict = 0 2082 self.rr_conflict = 0 2083 self.conflicts = [] # List of conflicts 2084 2085 self.sr_conflicts = [] 2086 self.rr_conflicts = [] 2087 2088 # Build the tables 2089 self.grammar.build_lritems() 2090 self.grammar.compute_first() 2091 self.grammar.compute_follow() 2092 self.lr_parse_table() 2093 2094 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 2095 2096 def lr0_closure(self, I): 2097 self._add_count += 1 2098 2099 # Add everything in I to J 2100 J = I[:] 2101 didadd = True 2102 while didadd: 2103 didadd = False 2104 for j in J: 2105 for x in j.lr_after: 2106 if getattr(x, 'lr0_added', 0) == self._add_count: 2107 continue 2108 # Add B --> .G to J 2109 J.append(x.lr_next) 2110 x.lr0_added = self._add_count 2111 didadd = True 2112 2113 return J 2114 2115 # Compute the LR(0) goto function goto(I,X) where I is a set 2116 # of LR(0) items and X is a grammar symbol. This function is written 2117 # in a way that guarantees uniqueness of the generated goto sets 2118 # (i.e. the same goto set will never be returned as two different Python 2119 # objects). With uniqueness, we can later do fast set comparisons using 2120 # id(obj) instead of element-wise comparison. 2121 2122 def lr0_goto(self, I, x): 2123 # First we look for a previously cached entry 2124 g = self.lr_goto_cache.get((id(I), x)) 2125 if g: 2126 return g 2127 2128 # Now we generate the goto set in a way that guarantees uniqueness 2129 # of the result 2130 2131 s = self.lr_goto_cache.get(x) 2132 if not s: 2133 s = {} 2134 self.lr_goto_cache[x] = s 2135 2136 gs = [] 2137 for p in I: 2138 n = p.lr_next 2139 if n and n.lr_before == x: 2140 s1 = s.get(id(n)) 2141 if not s1: 2142 s1 = {} 2143 s[id(n)] = s1 2144 gs.append(n) 2145 s = s1 2146 g = s.get('$end') 2147 if not g: 2148 if gs: 2149 g = self.lr0_closure(gs) 2150 s['$end'] = g 2151 else: 2152 s['$end'] = gs 2153 self.lr_goto_cache[(id(I), x)] = g 2154 return g 2155 2156 # Compute the LR(0) sets of item function 2157 def lr0_items(self): 2158 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] 2159 i = 0 2160 for I in C: 2161 self.lr0_cidhash[id(I)] = i 2162 i += 1 2163 2164 # Loop over the items in C and each grammar symbols 2165 i = 0 2166 while i < len(C): 2167 I = C[i] 2168 i += 1 2169 2170 # Collect all of the symbols that could possibly be in the goto(I,X) sets 2171 asyms = {} 2172 for ii in I: 2173 for s in ii.usyms: 2174 asyms[s] = None 2175 2176 for x in asyms: 2177 g = self.lr0_goto(I, x) 2178 if not g or id(g) in self.lr0_cidhash: 2179 continue 2180 self.lr0_cidhash[id(g)] = len(C) 2181 C.append(g) 2182 2183 return C 2184 2185 # ----------------------------------------------------------------------------- 2186 # ==== LALR(1) Parsing ==== 2187 # 2188 # LALR(1) parsing is almost exactly the same as SLR except that instead of 2189 # relying upon Follow() sets when performing reductions, a more selective 2190 # lookahead set that incorporates the state of the LR(0) machine is utilized. 2191 # Thus, we mainly just have to focus on calculating the lookahead sets. 2192 # 2193 # The method used here is due to DeRemer and Pennelo (1982). 2194 # 2195 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 2196 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 2197 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 2198 # 2199 # Further details can also be found in: 2200 # 2201 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 2202 # McGraw-Hill Book Company, (1985). 2203 # 2204 # ----------------------------------------------------------------------------- 2205 2206 # ----------------------------------------------------------------------------- 2207 # compute_nullable_nonterminals() 2208 # 2209 # Creates a dictionary containing all of the non-terminals that might produce 2210 # an empty production. 2211 # ----------------------------------------------------------------------------- 2212 2213 def compute_nullable_nonterminals(self): 2214 nullable = set() 2215 num_nullable = 0 2216 while True: 2217 for p in self.grammar.Productions[1:]: 2218 if p.len == 0: 2219 nullable.add(p.name) 2220 continue 2221 for t in p.prod: 2222 if t not in nullable: 2223 break 2224 else: 2225 nullable.add(p.name) 2226 if len(nullable) == num_nullable: 2227 break 2228 num_nullable = len(nullable) 2229 return nullable 2230 2231 # ----------------------------------------------------------------------------- 2232 # find_nonterminal_trans(C) 2233 # 2234 # Given a set of LR(0) items, this functions finds all of the non-terminal 2235 # transitions. These are transitions in which a dot appears immediately before 2236 # a non-terminal. Returns a list of tuples of the form (state,N) where state 2237 # is the state number and N is the nonterminal symbol. 2238 # 2239 # The input C is the set of LR(0) items. 2240 # ----------------------------------------------------------------------------- 2241 2242 def find_nonterminal_transitions(self, C): 2243 trans = [] 2244 for stateno, state in enumerate(C): 2245 for p in state: 2246 if p.lr_index < p.len - 1: 2247 t = (stateno, p.prod[p.lr_index+1]) 2248 if t[1] in self.grammar.Nonterminals: 2249 if t not in trans: 2250 trans.append(t) 2251 return trans 2252 2253 # ----------------------------------------------------------------------------- 2254 # dr_relation() 2255 # 2256 # Computes the DR(p,A) relationships for non-terminal transitions. The input 2257 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 2258 # 2259 # Returns a list of terminals. 2260 # ----------------------------------------------------------------------------- 2261 2262 def dr_relation(self, C, trans, nullable): 2263 dr_set = {} 2264 state, N = trans 2265 terms = [] 2266 2267 g = self.lr0_goto(C[state], N) 2268 for p in g: 2269 if p.lr_index < p.len - 1: 2270 a = p.prod[p.lr_index+1] 2271 if a in self.grammar.Terminals: 2272 if a not in terms: 2273 terms.append(a) 2274 2275 # This extra bit is to handle the start state 2276 if state == 0 and N == self.grammar.Productions[0].prod[0]: 2277 terms.append('$end') 2278 2279 return terms 2280 2281 # ----------------------------------------------------------------------------- 2282 # reads_relation() 2283 # 2284 # Computes the READS() relation (p,A) READS (t,C). 2285 # ----------------------------------------------------------------------------- 2286 2287 def reads_relation(self, C, trans, empty): 2288 # Look for empty transitions 2289 rel = [] 2290 state, N = trans 2291 2292 g = self.lr0_goto(C[state], N) 2293 j = self.lr0_cidhash.get(id(g), -1) 2294 for p in g: 2295 if p.lr_index < p.len - 1: 2296 a = p.prod[p.lr_index + 1] 2297 if a in empty: 2298 rel.append((j, a)) 2299 2300 return rel 2301 2302 # ----------------------------------------------------------------------------- 2303 # compute_lookback_includes() 2304 # 2305 # Determines the lookback and includes relations 2306 # 2307 # LOOKBACK: 2308 # 2309 # This relation is determined by running the LR(0) state machine forward. 2310 # For example, starting with a production "N : . A B C", we run it forward 2311 # to obtain "N : A B C ." We then build a relationship between this final 2312 # state and the starting state. These relationships are stored in a dictionary 2313 # lookdict. 2314 # 2315 # INCLUDES: 2316 # 2317 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 2318 # 2319 # This relation is used to determine non-terminal transitions that occur 2320 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 2321 # if the following holds: 2322 # 2323 # B -> LAT, where T -> epsilon and p' -L-> p 2324 # 2325 # L is essentially a prefix (which may be empty), T is a suffix that must be 2326 # able to derive an empty string. State p' must lead to state p with the string L. 2327 # 2328 # ----------------------------------------------------------------------------- 2329 2330 def compute_lookback_includes(self, C, trans, nullable): 2331 lookdict = {} # Dictionary of lookback relations 2332 includedict = {} # Dictionary of include relations 2333 2334 # Make a dictionary of non-terminal transitions 2335 dtrans = {} 2336 for t in trans: 2337 dtrans[t] = 1 2338 2339 # Loop over all transitions and compute lookbacks and includes 2340 for state, N in trans: 2341 lookb = [] 2342 includes = [] 2343 for p in C[state]: 2344 if p.name != N: 2345 continue 2346 2347 # Okay, we have a name match. We now follow the production all the way 2348 # through the state machine until we get the . on the right hand side 2349 2350 lr_index = p.lr_index 2351 j = state 2352 while lr_index < p.len - 1: 2353 lr_index = lr_index + 1 2354 t = p.prod[lr_index] 2355 2356 # Check to see if this symbol and state are a non-terminal transition 2357 if (j, t) in dtrans: 2358 # Yes. Okay, there is some chance that this is an includes relation 2359 # the only way to know for certain is whether the rest of the 2360 # production derives empty 2361 2362 li = lr_index + 1 2363 while li < p.len: 2364 if p.prod[li] in self.grammar.Terminals: 2365 break # No forget it 2366 if p.prod[li] not in nullable: 2367 break 2368 li = li + 1 2369 else: 2370 # Appears to be a relation between (j,t) and (state,N) 2371 includes.append((j, t)) 2372 2373 g = self.lr0_goto(C[j], t) # Go to next set 2374 j = self.lr0_cidhash.get(id(g), -1) # Go to next state 2375 2376 # When we get here, j is the final state, now we have to locate the production 2377 for r in C[j]: 2378 if r.name != p.name: 2379 continue 2380 if r.len != p.len: 2381 continue 2382 i = 0 2383 # This look is comparing a production ". A B C" with "A B C ." 2384 while i < r.lr_index: 2385 if r.prod[i] != p.prod[i+1]: 2386 break 2387 i = i + 1 2388 else: 2389 lookb.append((j, r)) 2390 for i in includes: 2391 if i not in includedict: 2392 includedict[i] = [] 2393 includedict[i].append((state, N)) 2394 lookdict[(state, N)] = lookb 2395 2396 return lookdict, includedict 2397 2398 # ----------------------------------------------------------------------------- 2399 # compute_read_sets() 2400 # 2401 # Given a set of LR(0) items, this function computes the read sets. 2402 # 2403 # Inputs: C = Set of LR(0) items 2404 # ntrans = Set of nonterminal transitions 2405 # nullable = Set of empty transitions 2406 # 2407 # Returns a set containing the read sets 2408 # ----------------------------------------------------------------------------- 2409 2410 def compute_read_sets(self, C, ntrans, nullable): 2411 FP = lambda x: self.dr_relation(C, x, nullable) 2412 R = lambda x: self.reads_relation(C, x, nullable) 2413 F = digraph(ntrans, R, FP) 2414 return F 2415 2416 # ----------------------------------------------------------------------------- 2417 # compute_follow_sets() 2418 # 2419 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 2420 # and an include set, this function computes the follow sets 2421 # 2422 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 2423 # 2424 # Inputs: 2425 # ntrans = Set of nonterminal transitions 2426 # readsets = Readset (previously computed) 2427 # inclsets = Include sets (previously computed) 2428 # 2429 # Returns a set containing the follow sets 2430 # ----------------------------------------------------------------------------- 2431 2432 def compute_follow_sets(self, ntrans, readsets, inclsets): 2433 FP = lambda x: readsets[x] 2434 R = lambda x: inclsets.get(x, []) 2435 F = digraph(ntrans, R, FP) 2436 return F 2437 2438 # ----------------------------------------------------------------------------- 2439 # add_lookaheads() 2440 # 2441 # Attaches the lookahead symbols to grammar rules. 2442 # 2443 # Inputs: lookbacks - Set of lookback relations 2444 # followset - Computed follow set 2445 # 2446 # This function directly attaches the lookaheads to productions contained 2447 # in the lookbacks set 2448 # ----------------------------------------------------------------------------- 2449 2450 def add_lookaheads(self, lookbacks, followset): 2451 for trans, lb in lookbacks.items(): 2452 # Loop over productions in lookback 2453 for state, p in lb: 2454 if state not in p.lookaheads: 2455 p.lookaheads[state] = [] 2456 f = followset.get(trans, []) 2457 for a in f: 2458 if a not in p.lookaheads[state]: 2459 p.lookaheads[state].append(a) 2460 2461 # ----------------------------------------------------------------------------- 2462 # add_lalr_lookaheads() 2463 # 2464 # This function does all of the work of adding lookahead information for use 2465 # with LALR parsing 2466 # ----------------------------------------------------------------------------- 2467 2468 def add_lalr_lookaheads(self, C): 2469 # Determine all of the nullable nonterminals 2470 nullable = self.compute_nullable_nonterminals() 2471 2472 # Find all non-terminal transitions 2473 trans = self.find_nonterminal_transitions(C) 2474 2475 # Compute read sets 2476 readsets = self.compute_read_sets(C, trans, nullable) 2477 2478 # Compute lookback/includes relations 2479 lookd, included = self.compute_lookback_includes(C, trans, nullable) 2480 2481 # Compute LALR FOLLOW sets 2482 followsets = self.compute_follow_sets(trans, readsets, included) 2483 2484 # Add all of the lookaheads 2485 self.add_lookaheads(lookd, followsets) 2486 2487 # ----------------------------------------------------------------------------- 2488 # lr_parse_table() 2489 # 2490 # This function constructs the parse tables for SLR or LALR 2491 # ----------------------------------------------------------------------------- 2492 def lr_parse_table(self): 2493 Productions = self.grammar.Productions 2494 Precedence = self.grammar.Precedence 2495 goto = self.lr_goto # Goto array 2496 action = self.lr_action # Action array 2497 log = self.log # Logger for output 2498 2499 actionp = {} # Action production array (temporary) 2500 2501 log.info('Parsing method: %s', self.lr_method) 2502 2503 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 2504 # This determines the number of states 2505 2506 C = self.lr0_items() 2507 2508 if self.lr_method == 'LALR': 2509 self.add_lalr_lookaheads(C) 2510 2511 # Build the parser table, state by state 2512 st = 0 2513 for I in C: 2514 # Loop over each production in I 2515 actlist = [] # List of actions 2516 st_action = {} 2517 st_actionp = {} 2518 st_goto = {} 2519 log.info('') 2520 log.info('state %d', st) 2521 log.info('') 2522 for p in I: 2523 log.info(' (%d) %s', p.number, p) 2524 log.info('') 2525 2526 for p in I: 2527 if p.len == p.lr_index + 1: 2528 if p.name == "S'": 2529 # Start symbol. Accept! 2530 st_action['$end'] = 0 2531 st_actionp['$end'] = p 2532 else: 2533 # We are at the end of a production. Reduce! 2534 if self.lr_method == 'LALR': 2535 laheads = p.lookaheads[st] 2536 else: 2537 laheads = self.grammar.Follow[p.name] 2538 for a in laheads: 2539 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) 2540 r = st_action.get(a) 2541 if r is not None: 2542 # Whoa. Have a shift/reduce or reduce/reduce conflict 2543 if r > 0: 2544 # Need to decide on shift or reduce here 2545 # By default we favor shifting. Need to add 2546 # some precedence rules here. 2547 sprec, slevel = Productions[st_actionp[a].number].prec 2548 rprec, rlevel = Precedence.get(a, ('right', 0)) 2549 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 2550 # We really need to reduce here. 2551 st_action[a] = -p.number 2552 st_actionp[a] = p 2553 if not slevel and not rlevel: 2554 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 2555 self.sr_conflicts.append((st, a, 'reduce')) 2556 Productions[p.number].reduced += 1 2557 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2558 st_action[a] = None 2559 else: 2560 # Hmmm. Guess we'll keep the shift 2561 if not rlevel: 2562 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 2563 self.sr_conflicts.append((st, a, 'shift')) 2564 elif r < 0: 2565 # Reduce/reduce conflict. In this case, we favor the rule 2566 # that was defined first in the grammar file 2567 oldp = Productions[-r] 2568 pp = Productions[p.number] 2569 if oldp.line > pp.line: 2570 st_action[a] = -p.number 2571 st_actionp[a] = p 2572 chosenp, rejectp = pp, oldp 2573 Productions[p.number].reduced += 1 2574 Productions[oldp.number].reduced -= 1 2575 else: 2576 chosenp, rejectp = oldp, pp 2577 self.rr_conflicts.append((st, chosenp, rejectp)) 2578 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', 2579 a, st_actionp[a].number, st_actionp[a]) 2580 else: 2581 raise LALRError('Unknown conflict in state %d' % st) 2582 else: 2583 st_action[a] = -p.number 2584 st_actionp[a] = p 2585 Productions[p.number].reduced += 1 2586 else: 2587 i = p.lr_index 2588 a = p.prod[i+1] # Get symbol right after the "." 2589 if a in self.grammar.Terminals: 2590 g = self.lr0_goto(I, a) 2591 j = self.lr0_cidhash.get(id(g), -1) 2592 if j >= 0: 2593 # We are in a shift state 2594 actlist.append((a, p, 'shift and go to state %d' % j)) 2595 r = st_action.get(a) 2596 if r is not None: 2597 # Whoa have a shift/reduce or shift/shift conflict 2598 if r > 0: 2599 if r != j: 2600 raise LALRError('Shift/shift conflict in state %d' % st) 2601 elif r < 0: 2602 # Do a precedence check. 2603 # - if precedence of reduce rule is higher, we reduce. 2604 # - if precedence of reduce is same and left assoc, we reduce. 2605 # - otherwise we shift 2606 rprec, rlevel = Productions[st_actionp[a].number].prec 2607 sprec, slevel = Precedence.get(a, ('right', 0)) 2608 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 2609 # We decide to shift here... highest precedence to shift 2610 Productions[st_actionp[a].number].reduced -= 1 2611 st_action[a] = j 2612 st_actionp[a] = p 2613 if not rlevel: 2614 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 2615 self.sr_conflicts.append((st, a, 'shift')) 2616 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2617 st_action[a] = None 2618 else: 2619 # Hmmm. Guess we'll keep the reduce 2620 if not slevel and not rlevel: 2621 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 2622 self.sr_conflicts.append((st, a, 'reduce')) 2623 2624 else: 2625 raise LALRError('Unknown conflict in state %d' % st) 2626 else: 2627 st_action[a] = j 2628 st_actionp[a] = p 2629 2630 # Print the actions associated with each terminal 2631 _actprint = {} 2632 for a, p, m in actlist: 2633 if a in st_action: 2634 if p is st_actionp[a]: 2635 log.info(' %-15s %s', a, m) 2636 _actprint[(a, m)] = 1 2637 log.info('') 2638 # Print the actions that were not used. (debugging) 2639 not_used = 0 2640 for a, p, m in actlist: 2641 if a in st_action: 2642 if p is not st_actionp[a]: 2643 if not (a, m) in _actprint: 2644 log.debug(' ! %-15s [ %s ]', a, m) 2645 not_used = 1 2646 _actprint[(a, m)] = 1 2647 if not_used: 2648 log.debug('') 2649 2650 # Construct the goto table for this state 2651 2652 nkeys = {} 2653 for ii in I: 2654 for s in ii.usyms: 2655 if s in self.grammar.Nonterminals: 2656 nkeys[s] = None 2657 for n in nkeys: 2658 g = self.lr0_goto(I, n) 2659 j = self.lr0_cidhash.get(id(g), -1) 2660 if j >= 0: 2661 st_goto[n] = j 2662 log.info(' %-30s shift and go to state %d', n, j) 2663 2664 action[st] = st_action 2665 actionp[st] = st_actionp 2666 goto[st] = st_goto 2667 st += 1 2668 2669 # ----------------------------------------------------------------------------- 2670 # write() 2671 # 2672 # This function writes the LR parsing tables to a file 2673 # ----------------------------------------------------------------------------- 2674 2675 def write_table(self, tabmodule, outputdir='', signature=''): 2676 if isinstance(tabmodule, types.ModuleType): 2677 raise IOError("Won't overwrite existing tabmodule") 2678 2679 basemodulename = tabmodule.split('.')[-1] 2680 filename = os.path.join(outputdir, basemodulename) + '.py' 2681 try: 2682 f = open(filename, 'w') 2683 2684 f.write(''' 2685# %s 2686# This file is automatically generated. Do not edit. 2687_tabversion = %r 2688 2689_lr_method = %r 2690 2691_lr_signature = %r 2692 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) 2693 2694 # Change smaller to 0 to go back to original tables 2695 smaller = 1 2696 2697 # Factor out names to try and make smaller 2698 if smaller: 2699 items = {} 2700 2701 for s, nd in self.lr_action.items(): 2702 for name, v in nd.items(): 2703 i = items.get(name) 2704 if not i: 2705 i = ([], []) 2706 items[name] = i 2707 i[0].append(s) 2708 i[1].append(v) 2709 2710 f.write('\n_lr_action_items = {') 2711 for k, v in items.items(): 2712 f.write('%r:([' % k) 2713 for i in v[0]: 2714 f.write('%r,' % i) 2715 f.write('],[') 2716 for i in v[1]: 2717 f.write('%r,' % i) 2718 2719 f.write(']),') 2720 f.write('}\n') 2721 2722 f.write(''' 2723_lr_action = {} 2724for _k, _v in _lr_action_items.items(): 2725 for _x,_y in zip(_v[0],_v[1]): 2726 if not _x in _lr_action: _lr_action[_x] = {} 2727 _lr_action[_x][_k] = _y 2728del _lr_action_items 2729''') 2730 2731 else: 2732 f.write('\n_lr_action = { ') 2733 for k, v in self.lr_action.items(): 2734 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 2735 f.write('}\n') 2736 2737 if smaller: 2738 # Factor out names to try and make smaller 2739 items = {} 2740 2741 for s, nd in self.lr_goto.items(): 2742 for name, v in nd.items(): 2743 i = items.get(name) 2744 if not i: 2745 i = ([], []) 2746 items[name] = i 2747 i[0].append(s) 2748 i[1].append(v) 2749 2750 f.write('\n_lr_goto_items = {') 2751 for k, v in items.items(): 2752 f.write('%r:([' % k) 2753 for i in v[0]: 2754 f.write('%r,' % i) 2755 f.write('],[') 2756 for i in v[1]: 2757 f.write('%r,' % i) 2758 2759 f.write(']),') 2760 f.write('}\n') 2761 2762 f.write(''' 2763_lr_goto = {} 2764for _k, _v in _lr_goto_items.items(): 2765 for _x, _y in zip(_v[0], _v[1]): 2766 if not _x in _lr_goto: _lr_goto[_x] = {} 2767 _lr_goto[_x][_k] = _y 2768del _lr_goto_items 2769''') 2770 else: 2771 f.write('\n_lr_goto = { ') 2772 for k, v in self.lr_goto.items(): 2773 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 2774 f.write('}\n') 2775 2776 # Write production table 2777 f.write('_lr_productions = [\n') 2778 for p in self.lr_productions: 2779 if p.func: 2780 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, 2781 p.func, os.path.basename(p.file), p.line)) 2782 else: 2783 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) 2784 f.write(']\n') 2785 f.close() 2786 2787 except IOError as e: 2788 raise 2789 2790 2791 # ----------------------------------------------------------------------------- 2792 # pickle_table() 2793 # 2794 # This function pickles the LR parsing tables to a supplied file object 2795 # ----------------------------------------------------------------------------- 2796 2797 def pickle_table(self, filename, signature=''): 2798 try: 2799 import cPickle as pickle 2800 except ImportError: 2801 import pickle 2802 with open(filename, 'wb') as outf: 2803 pickle.dump(__tabversion__, outf, pickle_protocol) 2804 pickle.dump(self.lr_method, outf, pickle_protocol) 2805 pickle.dump(signature, outf, pickle_protocol) 2806 pickle.dump(self.lr_action, outf, pickle_protocol) 2807 pickle.dump(self.lr_goto, outf, pickle_protocol) 2808 2809 outp = [] 2810 for p in self.lr_productions: 2811 if p.func: 2812 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) 2813 else: 2814 outp.append((str(p), p.name, p.len, None, None, None)) 2815 pickle.dump(outp, outf, pickle_protocol) 2816 2817# ----------------------------------------------------------------------------- 2818# === INTROSPECTION === 2819# 2820# The following functions and classes are used to implement the PLY 2821# introspection features followed by the yacc() function itself. 2822# ----------------------------------------------------------------------------- 2823 2824# ----------------------------------------------------------------------------- 2825# get_caller_module_dict() 2826# 2827# This function returns a dictionary containing all of the symbols defined within 2828# a caller further down the call stack. This is used to get the environment 2829# associated with the yacc() call if none was provided. 2830# ----------------------------------------------------------------------------- 2831 2832def get_caller_module_dict(levels): 2833 f = sys._getframe(levels) 2834 ldict = f.f_globals.copy() 2835 if f.f_globals != f.f_locals: 2836 ldict.update(f.f_locals) 2837 return ldict 2838 2839# ----------------------------------------------------------------------------- 2840# parse_grammar() 2841# 2842# This takes a raw grammar rule string and parses it into production data 2843# ----------------------------------------------------------------------------- 2844def parse_grammar(doc, file, line): 2845 grammar = [] 2846 # Split the doc string into lines 2847 pstrings = doc.splitlines() 2848 lastp = None 2849 dline = line 2850 for ps in pstrings: 2851 dline += 1 2852 p = ps.split() 2853 if not p: 2854 continue 2855 try: 2856 if p[0] == '|': 2857 # This is a continuation of a previous rule 2858 if not lastp: 2859 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) 2860 prodname = lastp 2861 syms = p[1:] 2862 else: 2863 prodname = p[0] 2864 lastp = prodname 2865 syms = p[2:] 2866 assign = p[1] 2867 if assign != ':' and assign != '::=': 2868 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) 2869 2870 grammar.append((file, dline, prodname, syms)) 2871 except SyntaxError: 2872 raise 2873 except Exception: 2874 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) 2875 2876 return grammar 2877 2878# ----------------------------------------------------------------------------- 2879# ParserReflect() 2880# 2881# This class represents information extracted for building a parser including 2882# start symbol, error function, tokens, precedence list, action functions, 2883# etc. 2884# ----------------------------------------------------------------------------- 2885class ParserReflect(object): 2886 def __init__(self, pdict, log=None): 2887 self.pdict = pdict 2888 self.start = None 2889 self.error_func = None 2890 self.tokens = None 2891 self.modules = set() 2892 self.grammar = [] 2893 self.error = False 2894 2895 if log is None: 2896 self.log = PlyLogger(sys.stderr) 2897 else: 2898 self.log = log 2899 2900 # Get all of the basic information 2901 def get_all(self): 2902 self.get_start() 2903 self.get_error_func() 2904 self.get_tokens() 2905 self.get_precedence() 2906 self.get_pfunctions() 2907 2908 # Validate all of the information 2909 def validate_all(self): 2910 self.validate_start() 2911 self.validate_error_func() 2912 self.validate_tokens() 2913 self.validate_precedence() 2914 self.validate_pfunctions() 2915 self.validate_modules() 2916 return self.error 2917 2918 # Compute a signature over the grammar 2919 def signature(self): 2920 try: 2921 from hashlib import md5 2922 except ImportError: 2923 from md5 import md5 2924 try: 2925 sig = md5() 2926 if self.start: 2927 sig.update(self.start.encode('latin-1')) 2928 if self.prec: 2929 sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1')) 2930 if self.tokens: 2931 sig.update(' '.join(self.tokens).encode('latin-1')) 2932 for f in self.pfuncs: 2933 if f[3]: 2934 sig.update(f[3].encode('latin-1')) 2935 except (TypeError, ValueError): 2936 pass 2937 2938 digest = base64.b16encode(sig.digest()) 2939 if sys.version_info[0] >= 3: 2940 digest = digest.decode('latin-1') 2941 return digest 2942 2943 # ----------------------------------------------------------------------------- 2944 # validate_modules() 2945 # 2946 # This method checks to see if there are duplicated p_rulename() functions 2947 # in the parser module file. Without this function, it is really easy for 2948 # users to make mistakes by cutting and pasting code fragments (and it's a real 2949 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 2950 # we just do a little regular expression pattern matching of def statements 2951 # to try and detect duplicates. 2952 # ----------------------------------------------------------------------------- 2953 2954 def validate_modules(self): 2955 # Match def p_funcname( 2956 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 2957 2958 for module in self.modules: 2959 lines, linen = inspect.getsourcelines(module) 2960 2961 counthash = {} 2962 for linen, line in enumerate(lines): 2963 linen += 1 2964 m = fre.match(line) 2965 if m: 2966 name = m.group(1) 2967 prev = counthash.get(name) 2968 if not prev: 2969 counthash[name] = linen 2970 else: 2971 filename = inspect.getsourcefile(module) 2972 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', 2973 filename, linen, name, prev) 2974 2975 # Get the start symbol 2976 def get_start(self): 2977 self.start = self.pdict.get('start') 2978 2979 # Validate the start symbol 2980 def validate_start(self): 2981 if self.start is not None: 2982 if not isinstance(self.start, string_types): 2983 self.log.error("'start' must be a string") 2984 2985 # Look for error handler 2986 def get_error_func(self): 2987 self.error_func = self.pdict.get('p_error') 2988 2989 # Validate the error function 2990 def validate_error_func(self): 2991 if self.error_func: 2992 if isinstance(self.error_func, types.FunctionType): 2993 ismethod = 0 2994 elif isinstance(self.error_func, types.MethodType): 2995 ismethod = 1 2996 else: 2997 self.log.error("'p_error' defined, but is not a function or method") 2998 self.error = True 2999 return 3000 3001 eline = self.error_func.__code__.co_firstlineno 3002 efile = self.error_func.__code__.co_filename 3003 module = inspect.getmodule(self.error_func) 3004 self.modules.add(module) 3005 3006 argcount = self.error_func.__code__.co_argcount - ismethod 3007 if argcount != 1: 3008 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) 3009 self.error = True 3010 3011 # Get the tokens map 3012 def get_tokens(self): 3013 tokens = self.pdict.get('tokens') 3014 if not tokens: 3015 self.log.error('No token list is defined') 3016 self.error = True 3017 return 3018 3019 if not isinstance(tokens, (list, tuple)): 3020 self.log.error('tokens must be a list or tuple') 3021 self.error = True 3022 return 3023 3024 if not tokens: 3025 self.log.error('tokens is empty') 3026 self.error = True 3027 return 3028 3029 self.tokens = tokens 3030 3031 # Validate the tokens 3032 def validate_tokens(self): 3033 # Validate the tokens. 3034 if 'error' in self.tokens: 3035 self.log.error("Illegal token name 'error'. Is a reserved word") 3036 self.error = True 3037 return 3038 3039 terminals = set() 3040 for n in self.tokens: 3041 if n in terminals: 3042 self.log.warning('Token %r multiply defined', n) 3043 terminals.add(n) 3044 3045 # Get the precedence map (if any) 3046 def get_precedence(self): 3047 self.prec = self.pdict.get('precedence') 3048 3049 # Validate and parse the precedence map 3050 def validate_precedence(self): 3051 preclist = [] 3052 if self.prec: 3053 if not isinstance(self.prec, (list, tuple)): 3054 self.log.error('precedence must be a list or tuple') 3055 self.error = True 3056 return 3057 for level, p in enumerate(self.prec): 3058 if not isinstance(p, (list, tuple)): 3059 self.log.error('Bad precedence table') 3060 self.error = True 3061 return 3062 3063 if len(p) < 2: 3064 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) 3065 self.error = True 3066 return 3067 assoc = p[0] 3068 if not isinstance(assoc, string_types): 3069 self.log.error('precedence associativity must be a string') 3070 self.error = True 3071 return 3072 for term in p[1:]: 3073 if not isinstance(term, string_types): 3074 self.log.error('precedence items must be strings') 3075 self.error = True 3076 return 3077 preclist.append((term, assoc, level+1)) 3078 self.preclist = preclist 3079 3080 # Get all p_functions from the grammar 3081 def get_pfunctions(self): 3082 p_functions = [] 3083 for name, item in self.pdict.items(): 3084 if not name.startswith('p_') or name == 'p_error': 3085 continue 3086 if isinstance(item, (types.FunctionType, types.MethodType)): 3087 line = item.__code__.co_firstlineno 3088 module = inspect.getmodule(item) 3089 p_functions.append((line, module, name, item.__doc__)) 3090 3091 # Sort all of the actions by line number; make sure to stringify 3092 # modules to make them sortable, since `line` may not uniquely sort all 3093 # p functions 3094 p_functions.sort(key=lambda p_function: ( 3095 p_function[0], 3096 str(p_function[1]), 3097 p_function[2], 3098 p_function[3])) 3099 self.pfuncs = p_functions 3100 3101 # Validate all of the p_functions 3102 def validate_pfunctions(self): 3103 grammar = [] 3104 # Check for non-empty symbols 3105 if len(self.pfuncs) == 0: 3106 self.log.error('no rules of the form p_rulename are defined') 3107 self.error = True 3108 return 3109 3110 for line, module, name, doc in self.pfuncs: 3111 file = inspect.getsourcefile(module) 3112 func = self.pdict[name] 3113 if isinstance(func, types.MethodType): 3114 reqargs = 2 3115 else: 3116 reqargs = 1 3117 if func.__code__.co_argcount > reqargs: 3118 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) 3119 self.error = True 3120 elif func.__code__.co_argcount < reqargs: 3121 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) 3122 self.error = True 3123 elif not func.__doc__: 3124 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', 3125 file, line, func.__name__) 3126 else: 3127 try: 3128 parsed_g = parse_grammar(doc, file, line) 3129 for g in parsed_g: 3130 grammar.append((name, g)) 3131 except SyntaxError as e: 3132 self.log.error(str(e)) 3133 self.error = True 3134 3135 # Looks like a valid grammar rule 3136 # Mark the file in which defined. 3137 self.modules.add(module) 3138 3139 # Secondary validation step that looks for p_ definitions that are not functions 3140 # or functions that look like they might be grammar rules. 3141 3142 for n, v in self.pdict.items(): 3143 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): 3144 continue 3145 if n.startswith('t_'): 3146 continue 3147 if n.startswith('p_') and n != 'p_error': 3148 self.log.warning('%r not defined as a function', n) 3149 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or 3150 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): 3151 if v.__doc__: 3152 try: 3153 doc = v.__doc__.split(' ') 3154 if doc[1] == ':': 3155 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', 3156 v.__code__.co_filename, v.__code__.co_firstlineno, n) 3157 except IndexError: 3158 pass 3159 3160 self.grammar = grammar 3161 3162# ----------------------------------------------------------------------------- 3163# yacc(module) 3164# 3165# Build a parser 3166# ----------------------------------------------------------------------------- 3167 3168def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 3169 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, 3170 outputdir=None, debuglog=None, errorlog=None, picklefile=None): 3171 3172 if tabmodule is None: 3173 tabmodule = tab_module 3174 3175 # Reference to the parsing method of the last built parser 3176 global parse 3177 3178 # If pickling is enabled, table files are not created 3179 if picklefile: 3180 write_tables = 0 3181 3182 if errorlog is None: 3183 errorlog = PlyLogger(sys.stderr) 3184 3185 # Get the module dictionary used for the parser 3186 if module: 3187 _items = [(k, getattr(module, k)) for k in dir(module)] 3188 pdict = dict(_items) 3189 # If no __file__ attribute is available, try to obtain it from the __module__ instead 3190 if '__file__' not in pdict: 3191 pdict['__file__'] = sys.modules[pdict['__module__']].__file__ 3192 else: 3193 pdict = get_caller_module_dict(2) 3194 3195 if outputdir is None: 3196 # If no output directory is set, the location of the output files 3197 # is determined according to the following rules: 3198 # - If tabmodule specifies a package, files go into that package directory 3199 # - Otherwise, files go in the same directory as the specifying module 3200 if isinstance(tabmodule, types.ModuleType): 3201 srcfile = tabmodule.__file__ 3202 else: 3203 if '.' not in tabmodule: 3204 srcfile = pdict['__file__'] 3205 else: 3206 parts = tabmodule.split('.') 3207 pkgname = '.'.join(parts[:-1]) 3208 exec('import %s' % pkgname) 3209 srcfile = getattr(sys.modules[pkgname], '__file__', '') 3210 outputdir = os.path.dirname(srcfile) 3211 3212 # Determine if the module is package of a package or not. 3213 # If so, fix the tabmodule setting so that tables load correctly 3214 pkg = pdict.get('__package__') 3215 if pkg and isinstance(tabmodule, str): 3216 if '.' not in tabmodule: 3217 tabmodule = pkg + '.' + tabmodule 3218 3219 3220 3221 # Set start symbol if it's specified directly using an argument 3222 if start is not None: 3223 pdict['start'] = start 3224 3225 # Collect parser information from the dictionary 3226 pinfo = ParserReflect(pdict, log=errorlog) 3227 pinfo.get_all() 3228 3229 if pinfo.error: 3230 raise YaccError('Unable to build parser') 3231 3232 # Check signature against table files (if any) 3233 signature = pinfo.signature() 3234 3235 # Read the tables 3236 try: 3237 lr = LRTable() 3238 if picklefile: 3239 read_signature = lr.read_pickle(picklefile) 3240 else: 3241 read_signature = lr.read_table(tabmodule) 3242 if optimize or (read_signature == signature): 3243 try: 3244 lr.bind_callables(pinfo.pdict) 3245 parser = LRParser(lr, pinfo.error_func) 3246 parse = parser.parse 3247 return parser 3248 except Exception as e: 3249 errorlog.warning('There was a problem loading the table file: %r', e) 3250 except VersionError as e: 3251 errorlog.warning(str(e)) 3252 except ImportError: 3253 pass 3254 3255 if debuglog is None: 3256 if debug: 3257 try: 3258 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) 3259 except IOError as e: 3260 errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) 3261 debuglog = NullLogger() 3262 else: 3263 debuglog = NullLogger() 3264 3265 debuglog.info('Created by PLY version %s (https://www.dabeaz.com/ply/)', __version__) 3266 3267 errors = False 3268 3269 # Validate the parser information 3270 if pinfo.validate_all(): 3271 raise YaccError('Unable to build parser') 3272 3273 if not pinfo.error_func: 3274 errorlog.warning('no p_error() function is defined') 3275 3276 # Create a grammar object 3277 grammar = Grammar(pinfo.tokens) 3278 3279 # Set precedence level for terminals 3280 for term, assoc, level in pinfo.preclist: 3281 try: 3282 grammar.set_precedence(term, assoc, level) 3283 except GrammarError as e: 3284 errorlog.warning('%s', e) 3285 3286 # Add productions to the grammar 3287 for funcname, gram in pinfo.grammar: 3288 file, line, prodname, syms = gram 3289 try: 3290 grammar.add_production(prodname, syms, funcname, file, line) 3291 except GrammarError as e: 3292 errorlog.error('%s', e) 3293 errors = True 3294 3295 # Set the grammar start symbols 3296 try: 3297 if start is None: 3298 grammar.set_start(pinfo.start) 3299 else: 3300 grammar.set_start(start) 3301 except GrammarError as e: 3302 errorlog.error(str(e)) 3303 errors = True 3304 3305 if errors: 3306 raise YaccError('Unable to build parser') 3307 3308 # Verify the grammar structure 3309 undefined_symbols = grammar.undefined_symbols() 3310 for sym, prod in undefined_symbols: 3311 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) 3312 errors = True 3313 3314 unused_terminals = grammar.unused_terminals() 3315 if unused_terminals: 3316 debuglog.info('') 3317 debuglog.info('Unused terminals:') 3318 debuglog.info('') 3319 for term in unused_terminals: 3320 errorlog.warning('Token %r defined, but not used', term) 3321 debuglog.info(' %s', term) 3322 3323 # Print out all productions to the debug log 3324 if debug: 3325 debuglog.info('') 3326 debuglog.info('Grammar') 3327 debuglog.info('') 3328 for n, p in enumerate(grammar.Productions): 3329 debuglog.info('Rule %-5d %s', n, p) 3330 3331 # Find unused non-terminals 3332 unused_rules = grammar.unused_rules() 3333 for prod in unused_rules: 3334 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) 3335 3336 if len(unused_terminals) == 1: 3337 errorlog.warning('There is 1 unused token') 3338 if len(unused_terminals) > 1: 3339 errorlog.warning('There are %d unused tokens', len(unused_terminals)) 3340 3341 if len(unused_rules) == 1: 3342 errorlog.warning('There is 1 unused rule') 3343 if len(unused_rules) > 1: 3344 errorlog.warning('There are %d unused rules', len(unused_rules)) 3345 3346 if debug: 3347 debuglog.info('') 3348 debuglog.info('Terminals, with rules where they appear') 3349 debuglog.info('') 3350 terms = list(grammar.Terminals) 3351 terms.sort() 3352 for term in terms: 3353 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) 3354 3355 debuglog.info('') 3356 debuglog.info('Nonterminals, with rules where they appear') 3357 debuglog.info('') 3358 nonterms = list(grammar.Nonterminals) 3359 nonterms.sort() 3360 for nonterm in nonterms: 3361 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) 3362 debuglog.info('') 3363 3364 if check_recursion: 3365 unreachable = grammar.find_unreachable() 3366 for u in unreachable: 3367 errorlog.warning('Symbol %r is unreachable', u) 3368 3369 infinite = grammar.infinite_cycles() 3370 for inf in infinite: 3371 errorlog.error('Infinite recursion detected for symbol %r', inf) 3372 errors = True 3373 3374 unused_prec = grammar.unused_precedence() 3375 for term, assoc in unused_prec: 3376 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) 3377 errors = True 3378 3379 if errors: 3380 raise YaccError('Unable to build parser') 3381 3382 # Run the LRGeneratedTable on the grammar 3383 if debug: 3384 errorlog.debug('Generating %s tables', method) 3385 3386 lr = LRGeneratedTable(grammar, method, debuglog) 3387 3388 if debug: 3389 num_sr = len(lr.sr_conflicts) 3390 3391 # Report shift/reduce and reduce/reduce conflicts 3392 if num_sr == 1: 3393 errorlog.warning('1 shift/reduce conflict') 3394 elif num_sr > 1: 3395 errorlog.warning('%d shift/reduce conflicts', num_sr) 3396 3397 num_rr = len(lr.rr_conflicts) 3398 if num_rr == 1: 3399 errorlog.warning('1 reduce/reduce conflict') 3400 elif num_rr > 1: 3401 errorlog.warning('%d reduce/reduce conflicts', num_rr) 3402 3403 # Write out conflicts to the output file 3404 if debug and (lr.sr_conflicts or lr.rr_conflicts): 3405 debuglog.warning('') 3406 debuglog.warning('Conflicts:') 3407 debuglog.warning('') 3408 3409 for state, tok, resolution in lr.sr_conflicts: 3410 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) 3411 3412 already_reported = set() 3413 for state, rule, rejected in lr.rr_conflicts: 3414 if (state, id(rule), id(rejected)) in already_reported: 3415 continue 3416 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 3417 debuglog.warning('rejected rule (%s) in state %d', rejected, state) 3418 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 3419 errorlog.warning('rejected rule (%s) in state %d', rejected, state) 3420 already_reported.add((state, id(rule), id(rejected))) 3421 3422 warned_never = [] 3423 for state, rule, rejected in lr.rr_conflicts: 3424 if not rejected.reduced and (rejected not in warned_never): 3425 debuglog.warning('Rule (%s) is never reduced', rejected) 3426 errorlog.warning('Rule (%s) is never reduced', rejected) 3427 warned_never.append(rejected) 3428 3429 # Write the table file if requested 3430 if write_tables: 3431 try: 3432 lr.write_table(tabmodule, outputdir, signature) 3433 except IOError as e: 3434 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) 3435 3436 # Write a pickled version of the tables 3437 if picklefile: 3438 try: 3439 lr.pickle_table(picklefile, signature) 3440 except IOError as e: 3441 errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) 3442 3443 # Build the parser 3444 lr.bind_callables(pinfo.pdict) 3445 parser = LRParser(lr, pinfo.error_func) 3446 3447 parse = parser.parse 3448 return parser 3449