1# ----------------------------------------------------------------------------- 2# ply: yacc.py 3# 4# Copyright (C) 2001-2018 5# David M. Beazley (Dabeaz LLC) 6# All rights reserved. 7# 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the following conditions are 10# met: 11# 12# * Redistributions of source code must retain the above copyright notice, 13# this list of conditions and the following disclaimer. 14# * Redistributions in binary form must reproduce the above copyright notice, 15# this list of conditions and the following disclaimer in the documentation 16# and/or other materials provided with the distribution. 17# * Neither the name of the David Beazley or Dabeaz LLC may be used to 18# endorse or promote products derived from this software without 19# specific prior written permission. 20# 21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32# ----------------------------------------------------------------------------- 33# 34# This implements an LR parser that is constructed from grammar rules defined 35# as Python functions. The grammar is specified by supplying the BNF inside 36# Python documentation strings. The inspiration for this technique was borrowed 37# from John Aycock's Spark parsing system. PLY might be viewed as cross between 38# Spark and the GNU bison utility. 39# 40# The current implementation is only somewhat object-oriented. The 41# LR parser itself is defined in terms of an object (which allows multiple 42# parsers to co-exist). However, most of the variables used during table 43# construction are defined in terms of global variables. Users shouldn't 44# notice unless they are trying to define multiple parsers at the same 45# time using threads (in which case they should have their head examined). 46# 47# This implementation supports both SLR and LALR(1) parsing. LALR(1) 48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 51# by the more efficient DeRemer and Pennello algorithm. 52# 53# :::::::: WARNING ::::::: 54# 55# Construction of LR parsing tables is fairly complicated and expensive. 56# To make this module run fast, a *LOT* of work has been put into 57# optimization---often at the expensive of readability and what might 58# consider to be good Python "coding style." Modify the code at your 59# own risk! 60# ---------------------------------------------------------------------------- 61 62import re 63import types 64import sys 65import os.path 66import inspect 67import warnings 68 69__version__ = '3.11' 70__tabversion__ = '3.10' 71 72#----------------------------------------------------------------------------- 73# === User configurable parameters === 74# 75# Change these to modify the default behavior of yacc (if you wish) 76#----------------------------------------------------------------------------- 77 78yaccdebug = True # Debugging mode. If set, yacc generates a 79 # a 'parser.out' file in the current directory 80 81debug_file = 'parser.out' # Default name of the debugging file 82tab_module = 'parsetab' # Default name of the table module 83default_lr = 'LALR' # Default LR table generation method 84 85error_count = 3 # Number of symbols that must be shifted to leave recovery mode 86 87yaccdevel = False # Set to True if developing yacc. This turns off optimized 88 # implementations of certain functions. 89 90resultlimit = 40 # Size limit of results when running in debug mode. 91 92pickle_protocol = 0 # Protocol to use when writing pickle files 93 94# String type-checking compatibility 95if sys.version_info[0] < 3: 96 string_types = basestring 97else: 98 string_types = str 99 100MAXINT = sys.maxsize 101 102# This object is a stand-in for a logging object created by the 103# logging module. PLY will use this by default to create things 104# such as the parser.out file. If a user wants more detailed 105# information, they can create their own logging object and pass 106# it into PLY. 107 108class PlyLogger(object): 109 def __init__(self, f): 110 self.f = f 111 112 def debug(self, msg, *args, **kwargs): 113 self.f.write((msg % args) + '\n') 114 115 info = debug 116 117 def warning(self, msg, *args, **kwargs): 118 self.f.write('WARNING: ' + (msg % args) + '\n') 119 120 def error(self, msg, *args, **kwargs): 121 self.f.write('ERROR: ' + (msg % args) + '\n') 122 123 critical = debug 124 125# Null logger is used when no output is generated. Does nothing. 126class NullLogger(object): 127 def __getattribute__(self, name): 128 return self 129 130 def __call__(self, *args, **kwargs): 131 return self 132 133# Exception raised for yacc-related errors 134class YaccError(Exception): 135 pass 136 137# Format the result message that the parser produces when running in debug mode. 138def format_result(r): 139 repr_str = repr(r) 140 if '\n' in repr_str: 141 repr_str = repr(repr_str) 142 if len(repr_str) > resultlimit: 143 repr_str = repr_str[:resultlimit] + ' ...' 144 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) 145 return result 146 147# Format stack entries when the parser is running in debug mode 148def format_stack_entry(r): 149 repr_str = repr(r) 150 if '\n' in repr_str: 151 repr_str = repr(repr_str) 152 if len(repr_str) < 16: 153 return repr_str 154 else: 155 return '<%s @ 0x%x>' % (type(r).__name__, id(r)) 156 157# Panic mode error recovery support. This feature is being reworked--much of the 158# code here is to offer a deprecation/backwards compatible transition 159 160_errok = None 161_token = None 162_restart = None 163_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). 164Instead, invoke the methods on the associated parser instance: 165 166 def p_error(p): 167 ... 168 # Use parser.errok(), parser.token(), parser.restart() 169 ... 170 171 parser = yacc.yacc() 172''' 173 174def errok(): 175 warnings.warn(_warnmsg) 176 return _errok() 177 178def restart(): 179 warnings.warn(_warnmsg) 180 return _restart() 181 182def token(): 183 warnings.warn(_warnmsg) 184 return _token() 185 186# Utility function to call the p_error() function with some deprecation hacks 187def call_errorfunc(errorfunc, token, parser): 188 global _errok, _token, _restart 189 _errok = parser.errok 190 _token = parser.token 191 _restart = parser.restart 192 r = errorfunc(token) 193 try: 194 del _errok, _token, _restart 195 except NameError: 196 pass 197 return r 198 199#----------------------------------------------------------------------------- 200# === LR Parsing Engine === 201# 202# The following classes are used for the LR parser itself. These are not 203# used during table construction and are independent of the actual LR 204# table generation algorithm 205#----------------------------------------------------------------------------- 206 207# This class is used to hold non-terminal grammar symbols during parsing. 208# It normally has the following attributes set: 209# .type = Grammar symbol type 210# .value = Symbol value 211# .lineno = Starting line number 212# .endlineno = Ending line number (optional, set automatically) 213# .lexpos = Starting lex position 214# .endlexpos = Ending lex position (optional, set automatically) 215 216class YaccSymbol: 217 def __str__(self): 218 return self.type 219 220 def __repr__(self): 221 return str(self) 222 223# This class is a wrapper around the objects actually passed to each 224# grammar rule. Index lookup and assignment actually assign the 225# .value attribute of the underlying YaccSymbol object. 226# The lineno() method returns the line number of a given 227# item (or 0 if not defined). The linespan() method returns 228# a tuple of (startline,endline) representing the range of lines 229# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 230# representing the range of positional information for a symbol. 231 232class YaccProduction: 233 def __init__(self, s, stack=None): 234 self.slice = s 235 self.stack = stack 236 self.lexer = None 237 self.parser = None 238 239 def __getitem__(self, n): 240 if isinstance(n, slice): 241 return [s.value for s in self.slice[n]] 242 elif n >= 0: 243 return self.slice[n].value 244 else: 245 return self.stack[n].value 246 247 def __setitem__(self, n, v): 248 self.slice[n].value = v 249 250 def __getslice__(self, i, j): 251 return [s.value for s in self.slice[i:j]] 252 253 def __len__(self): 254 return len(self.slice) 255 256 def lineno(self, n): 257 return getattr(self.slice[n], 'lineno', 0) 258 259 def set_lineno(self, n, lineno): 260 self.slice[n].lineno = lineno 261 262 def linespan(self, n): 263 startline = getattr(self.slice[n], 'lineno', 0) 264 endline = getattr(self.slice[n], 'endlineno', startline) 265 return startline, endline 266 267 def lexpos(self, n): 268 return getattr(self.slice[n], 'lexpos', 0) 269 270 def set_lexpos(self, n, lexpos): 271 self.slice[n].lexpos = lexpos 272 273 def lexspan(self, n): 274 startpos = getattr(self.slice[n], 'lexpos', 0) 275 endpos = getattr(self.slice[n], 'endlexpos', startpos) 276 return startpos, endpos 277 278 def error(self): 279 raise SyntaxError 280 281# ----------------------------------------------------------------------------- 282# == LRParser == 283# 284# The LR Parsing engine. 285# ----------------------------------------------------------------------------- 286 287class LRParser: 288 def __init__(self, lrtab, errorf): 289 self.productions = lrtab.lr_productions 290 self.action = lrtab.lr_action 291 self.goto = lrtab.lr_goto 292 self.errorfunc = errorf 293 self.set_defaulted_states() 294 self.errorok = True 295 296 def errok(self): 297 self.errorok = True 298 299 def restart(self): 300 del self.statestack[:] 301 del self.symstack[:] 302 sym = YaccSymbol() 303 sym.type = '$end' 304 self.symstack.append(sym) 305 self.statestack.append(0) 306 307 # Defaulted state support. 308 # This method identifies parser states where there is only one possible reduction action. 309 # For such states, the parser can make a choose to make a rule reduction without consuming 310 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in 311 # certain kinds of advanced parsing situations where the lexer and parser interact with 312 # each other or change states (i.e., manipulation of scope, lexer states, etc.). 313 # 314 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions 315 def set_defaulted_states(self): 316 self.defaulted_states = {} 317 for state, actions in self.action.items(): 318 rules = list(actions.values()) 319 if len(rules) == 1 and rules[0] < 0: 320 self.defaulted_states[state] = rules[0] 321 322 def disable_defaulted_states(self): 323 self.defaulted_states = {} 324 325 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 326 if debug or yaccdevel: 327 if isinstance(debug, int): 328 debug = PlyLogger(sys.stderr) 329 return self.parsedebug(input, lexer, debug, tracking, tokenfunc) 330 elif tracking: 331 return self.parseopt(input, lexer, debug, tracking, tokenfunc) 332 else: 333 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) 334 335 336 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 337 # parsedebug(). 338 # 339 # This is the debugging enabled version of parse(). All changes made to the 340 # parsing engine should be made here. Optimized versions of this function 341 # are automatically created by the ply/ygen.py script. This script cuts out 342 # sections enclosed in markers such as this: 343 # 344 # #--! DEBUG 345 # statements 346 # #--! DEBUG 347 # 348 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 349 350 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 351 #--! parsedebug-start 352 lookahead = None # Current lookahead symbol 353 lookaheadstack = [] # Stack of lookahead symbols 354 actions = self.action # Local reference to action table (to avoid lookup on self.) 355 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 356 prod = self.productions # Local reference to production list (to avoid lookup on self.) 357 defaulted_states = self.defaulted_states # Local reference to defaulted states 358 pslice = YaccProduction(None) # Production object passed to grammar rules 359 errorcount = 0 # Used during error recovery 360 361 #--! DEBUG 362 debug.info('PLY: PARSE DEBUG START') 363 #--! DEBUG 364 365 # If no lexer was given, we will try to use the lex module 366 if not lexer: 367 from . import lex 368 lexer = lex.lexer 369 370 # Set up the lexer and parser objects on pslice 371 pslice.lexer = lexer 372 pslice.parser = self 373 374 # If input was supplied, pass to lexer 375 if input is not None: 376 lexer.input(input) 377 378 if tokenfunc is None: 379 # Tokenize function 380 get_token = lexer.token 381 else: 382 get_token = tokenfunc 383 384 # Set the parser() token method (sometimes used in error recovery) 385 self.token = get_token 386 387 # Set up the state and symbol stacks 388 389 statestack = [] # Stack of parsing states 390 self.statestack = statestack 391 symstack = [] # Stack of grammar symbols 392 self.symstack = symstack 393 394 pslice.stack = symstack # Put in the production 395 errtoken = None # Err token 396 397 # The start state is assumed to be (0,$end) 398 399 statestack.append(0) 400 sym = YaccSymbol() 401 sym.type = '$end' 402 symstack.append(sym) 403 state = 0 404 while True: 405 # Get the next symbol on the input. If a lookahead symbol 406 # is already set, we just use that. Otherwise, we'll pull 407 # the next token off of the lookaheadstack or from the lexer 408 409 #--! DEBUG 410 debug.debug('') 411 debug.debug('State : %s', state) 412 #--! DEBUG 413 414 if state not in defaulted_states: 415 if not lookahead: 416 if not lookaheadstack: 417 lookahead = get_token() # Get the next token 418 else: 419 lookahead = lookaheadstack.pop() 420 if not lookahead: 421 lookahead = YaccSymbol() 422 lookahead.type = '$end' 423 424 # Check the action table 425 ltype = lookahead.type 426 t = actions[state].get(ltype) 427 else: 428 t = defaulted_states[state] 429 #--! DEBUG 430 debug.debug('Defaulted state %s: Reduce using %d', state, -t) 431 #--! DEBUG 432 433 #--! DEBUG 434 debug.debug('Stack : %s', 435 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 436 #--! DEBUG 437 438 if t is not None: 439 if t > 0: 440 # shift a symbol on the stack 441 statestack.append(t) 442 state = t 443 444 #--! DEBUG 445 debug.debug('Action : Shift and goto state %s', t) 446 #--! DEBUG 447 448 symstack.append(lookahead) 449 lookahead = None 450 451 # Decrease error count on successful shift 452 if errorcount: 453 errorcount -= 1 454 continue 455 456 if t < 0: 457 # reduce a symbol on the stack, emit a production 458 p = prod[-t] 459 pname = p.name 460 plen = p.len 461 462 # Get production function 463 sym = YaccSymbol() 464 sym.type = pname # Production name 465 sym.value = None 466 467 #--! DEBUG 468 if plen: 469 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, 470 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', 471 goto[statestack[-1-plen]][pname]) 472 else: 473 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], 474 goto[statestack[-1]][pname]) 475 476 #--! DEBUG 477 478 if plen: 479 targ = symstack[-plen-1:] 480 targ[0] = sym 481 482 #--! TRACKING 483 if tracking: 484 t1 = targ[1] 485 sym.lineno = t1.lineno 486 sym.lexpos = t1.lexpos 487 t1 = targ[-1] 488 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 489 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 490 #--! TRACKING 491 492 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 493 # The code enclosed in this section is duplicated 494 # below as a performance optimization. Make sure 495 # changes get made in both locations. 496 497 pslice.slice = targ 498 499 try: 500 # Call the grammar rule with our special slice object 501 del symstack[-plen:] 502 self.state = state 503 p.callable(pslice) 504 del statestack[-plen:] 505 #--! DEBUG 506 debug.info('Result : %s', format_result(pslice[0])) 507 #--! DEBUG 508 symstack.append(sym) 509 state = goto[statestack[-1]][pname] 510 statestack.append(state) 511 except SyntaxError: 512 # If an error was set. Enter error recovery state 513 lookaheadstack.append(lookahead) # Save the current lookahead token 514 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 515 statestack.pop() # Pop back one state (before the reduce) 516 state = statestack[-1] 517 sym.type = 'error' 518 sym.value = 'error' 519 lookahead = sym 520 errorcount = error_count 521 self.errorok = False 522 523 continue 524 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 525 526 else: 527 528 #--! TRACKING 529 if tracking: 530 sym.lineno = lexer.lineno 531 sym.lexpos = lexer.lexpos 532 #--! TRACKING 533 534 targ = [sym] 535 536 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 537 # The code enclosed in this section is duplicated 538 # above as a performance optimization. Make sure 539 # changes get made in both locations. 540 541 pslice.slice = targ 542 543 try: 544 # Call the grammar rule with our special slice object 545 self.state = state 546 p.callable(pslice) 547 #--! DEBUG 548 debug.info('Result : %s', format_result(pslice[0])) 549 #--! DEBUG 550 symstack.append(sym) 551 state = goto[statestack[-1]][pname] 552 statestack.append(state) 553 except SyntaxError: 554 # If an error was set. Enter error recovery state 555 lookaheadstack.append(lookahead) # Save the current lookahead token 556 statestack.pop() # Pop back one state (before the reduce) 557 state = statestack[-1] 558 sym.type = 'error' 559 sym.value = 'error' 560 lookahead = sym 561 errorcount = error_count 562 self.errorok = False 563 564 continue 565 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 566 567 if t == 0: 568 n = symstack[-1] 569 result = getattr(n, 'value', None) 570 #--! DEBUG 571 debug.info('Done : Returning %s', format_result(result)) 572 debug.info('PLY: PARSE DEBUG END') 573 #--! DEBUG 574 return result 575 576 if t is None: 577 578 #--! DEBUG 579 debug.error('Error : %s', 580 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 581 #--! DEBUG 582 583 # We have some kind of parsing error here. To handle 584 # this, we are going to push the current token onto 585 # the tokenstack and replace it with an 'error' token. 586 # If there are any synchronization rules, they may 587 # catch it. 588 # 589 # In addition to pushing the error token, we call call 590 # the user defined p_error() function if this is the 591 # first syntax error. This function is only called if 592 # errorcount == 0. 593 if errorcount == 0 or self.errorok: 594 errorcount = error_count 595 self.errorok = False 596 errtoken = lookahead 597 if errtoken.type == '$end': 598 errtoken = None # End of file! 599 if self.errorfunc: 600 if errtoken and not hasattr(errtoken, 'lexer'): 601 errtoken.lexer = lexer 602 self.state = state 603 tok = call_errorfunc(self.errorfunc, errtoken, self) 604 if self.errorok: 605 # User must have done some kind of panic 606 # mode recovery on their own. The 607 # returned token is the next lookahead 608 lookahead = tok 609 errtoken = None 610 continue 611 else: 612 if errtoken: 613 if hasattr(errtoken, 'lineno'): 614 lineno = lookahead.lineno 615 else: 616 lineno = 0 617 if lineno: 618 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 619 else: 620 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 621 else: 622 sys.stderr.write('yacc: Parse error in input. EOF\n') 623 return 624 625 else: 626 errorcount = error_count 627 628 # case 1: the statestack only has 1 entry on it. If we're in this state, the 629 # entire parse has been rolled back and we're completely hosed. The token is 630 # discarded and we just keep going. 631 632 if len(statestack) <= 1 and lookahead.type != '$end': 633 lookahead = None 634 errtoken = None 635 state = 0 636 # Nuke the pushback stack 637 del lookaheadstack[:] 638 continue 639 640 # case 2: the statestack has a couple of entries on it, but we're 641 # at the end of the file. nuke the top entry and generate an error token 642 643 # Start nuking entries on the stack 644 if lookahead.type == '$end': 645 # Whoa. We're really hosed here. Bail out 646 return 647 648 if lookahead.type != 'error': 649 sym = symstack[-1] 650 if sym.type == 'error': 651 # Hmmm. Error is on top of stack, we'll just nuke input 652 # symbol and continue 653 #--! TRACKING 654 if tracking: 655 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 656 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 657 #--! TRACKING 658 lookahead = None 659 continue 660 661 # Create the error symbol for the first time and make it the new lookahead symbol 662 t = YaccSymbol() 663 t.type = 'error' 664 665 if hasattr(lookahead, 'lineno'): 666 t.lineno = t.endlineno = lookahead.lineno 667 if hasattr(lookahead, 'lexpos'): 668 t.lexpos = t.endlexpos = lookahead.lexpos 669 t.value = lookahead 670 lookaheadstack.append(lookahead) 671 lookahead = t 672 else: 673 sym = symstack.pop() 674 #--! TRACKING 675 if tracking: 676 lookahead.lineno = sym.lineno 677 lookahead.lexpos = sym.lexpos 678 #--! TRACKING 679 statestack.pop() 680 state = statestack[-1] 681 682 continue 683 684 # Call an error function here 685 raise RuntimeError('yacc: internal parser error!!!\n') 686 687 #--! parsedebug-end 688 689 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 690 # parseopt(). 691 # 692 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! 693 # This code is automatically generated by the ply/ygen.py script. Make 694 # changes to the parsedebug() method instead. 695 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 696 697 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 698 #--! parseopt-start 699 lookahead = None # Current lookahead symbol 700 lookaheadstack = [] # Stack of lookahead symbols 701 actions = self.action # Local reference to action table (to avoid lookup on self.) 702 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 703 prod = self.productions # Local reference to production list (to avoid lookup on self.) 704 defaulted_states = self.defaulted_states # Local reference to defaulted states 705 pslice = YaccProduction(None) # Production object passed to grammar rules 706 errorcount = 0 # Used during error recovery 707 708 709 # If no lexer was given, we will try to use the lex module 710 if not lexer: 711 from . import lex 712 lexer = lex.lexer 713 714 # Set up the lexer and parser objects on pslice 715 pslice.lexer = lexer 716 pslice.parser = self 717 718 # If input was supplied, pass to lexer 719 if input is not None: 720 lexer.input(input) 721 722 if tokenfunc is None: 723 # Tokenize function 724 get_token = lexer.token 725 else: 726 get_token = tokenfunc 727 728 # Set the parser() token method (sometimes used in error recovery) 729 self.token = get_token 730 731 # Set up the state and symbol stacks 732 733 statestack = [] # Stack of parsing states 734 self.statestack = statestack 735 symstack = [] # Stack of grammar symbols 736 self.symstack = symstack 737 738 pslice.stack = symstack # Put in the production 739 errtoken = None # Err token 740 741 # The start state is assumed to be (0,$end) 742 743 statestack.append(0) 744 sym = YaccSymbol() 745 sym.type = '$end' 746 symstack.append(sym) 747 state = 0 748 while True: 749 # Get the next symbol on the input. If a lookahead symbol 750 # is already set, we just use that. Otherwise, we'll pull 751 # the next token off of the lookaheadstack or from the lexer 752 753 754 if state not in defaulted_states: 755 if not lookahead: 756 if not lookaheadstack: 757 lookahead = get_token() # Get the next token 758 else: 759 lookahead = lookaheadstack.pop() 760 if not lookahead: 761 lookahead = YaccSymbol() 762 lookahead.type = '$end' 763 764 # Check the action table 765 ltype = lookahead.type 766 t = actions[state].get(ltype) 767 else: 768 t = defaulted_states[state] 769 770 771 if t is not None: 772 if t > 0: 773 # shift a symbol on the stack 774 statestack.append(t) 775 state = t 776 777 778 symstack.append(lookahead) 779 lookahead = None 780 781 # Decrease error count on successful shift 782 if errorcount: 783 errorcount -= 1 784 continue 785 786 if t < 0: 787 # reduce a symbol on the stack, emit a production 788 p = prod[-t] 789 pname = p.name 790 plen = p.len 791 792 # Get production function 793 sym = YaccSymbol() 794 sym.type = pname # Production name 795 sym.value = None 796 797 798 if plen: 799 targ = symstack[-plen-1:] 800 targ[0] = sym 801 802 #--! TRACKING 803 if tracking: 804 t1 = targ[1] 805 sym.lineno = t1.lineno 806 sym.lexpos = t1.lexpos 807 t1 = targ[-1] 808 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 809 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 810 #--! TRACKING 811 812 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 813 # The code enclosed in this section is duplicated 814 # below as a performance optimization. Make sure 815 # changes get made in both locations. 816 817 pslice.slice = targ 818 819 try: 820 # Call the grammar rule with our special slice object 821 del symstack[-plen:] 822 self.state = state 823 p.callable(pslice) 824 del statestack[-plen:] 825 symstack.append(sym) 826 state = goto[statestack[-1]][pname] 827 statestack.append(state) 828 except SyntaxError: 829 # If an error was set. Enter error recovery state 830 lookaheadstack.append(lookahead) # Save the current lookahead token 831 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 832 statestack.pop() # Pop back one state (before the reduce) 833 state = statestack[-1] 834 sym.type = 'error' 835 sym.value = 'error' 836 lookahead = sym 837 errorcount = error_count 838 self.errorok = False 839 840 continue 841 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 842 843 else: 844 845 #--! TRACKING 846 if tracking: 847 sym.lineno = lexer.lineno 848 sym.lexpos = lexer.lexpos 849 #--! TRACKING 850 851 targ = [sym] 852 853 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 854 # The code enclosed in this section is duplicated 855 # above as a performance optimization. Make sure 856 # changes get made in both locations. 857 858 pslice.slice = targ 859 860 try: 861 # Call the grammar rule with our special slice object 862 self.state = state 863 p.callable(pslice) 864 symstack.append(sym) 865 state = goto[statestack[-1]][pname] 866 statestack.append(state) 867 except SyntaxError: 868 # If an error was set. Enter error recovery state 869 lookaheadstack.append(lookahead) # Save the current lookahead token 870 statestack.pop() # Pop back one state (before the reduce) 871 state = statestack[-1] 872 sym.type = 'error' 873 sym.value = 'error' 874 lookahead = sym 875 errorcount = error_count 876 self.errorok = False 877 878 continue 879 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 880 881 if t == 0: 882 n = symstack[-1] 883 result = getattr(n, 'value', None) 884 return result 885 886 if t is None: 887 888 889 # We have some kind of parsing error here. To handle 890 # this, we are going to push the current token onto 891 # the tokenstack and replace it with an 'error' token. 892 # If there are any synchronization rules, they may 893 # catch it. 894 # 895 # In addition to pushing the error token, we call call 896 # the user defined p_error() function if this is the 897 # first syntax error. This function is only called if 898 # errorcount == 0. 899 if errorcount == 0 or self.errorok: 900 errorcount = error_count 901 self.errorok = False 902 errtoken = lookahead 903 if errtoken.type == '$end': 904 errtoken = None # End of file! 905 if self.errorfunc: 906 if errtoken and not hasattr(errtoken, 'lexer'): 907 errtoken.lexer = lexer 908 self.state = state 909 tok = call_errorfunc(self.errorfunc, errtoken, self) 910 if self.errorok: 911 # User must have done some kind of panic 912 # mode recovery on their own. The 913 # returned token is the next lookahead 914 lookahead = tok 915 errtoken = None 916 continue 917 else: 918 if errtoken: 919 if hasattr(errtoken, 'lineno'): 920 lineno = lookahead.lineno 921 else: 922 lineno = 0 923 if lineno: 924 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 925 else: 926 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 927 else: 928 sys.stderr.write('yacc: Parse error in input. EOF\n') 929 return 930 931 else: 932 errorcount = error_count 933 934 # case 1: the statestack only has 1 entry on it. If we're in this state, the 935 # entire parse has been rolled back and we're completely hosed. The token is 936 # discarded and we just keep going. 937 938 if len(statestack) <= 1 and lookahead.type != '$end': 939 lookahead = None 940 errtoken = None 941 state = 0 942 # Nuke the pushback stack 943 del lookaheadstack[:] 944 continue 945 946 # case 2: the statestack has a couple of entries on it, but we're 947 # at the end of the file. nuke the top entry and generate an error token 948 949 # Start nuking entries on the stack 950 if lookahead.type == '$end': 951 # Whoa. We're really hosed here. Bail out 952 return 953 954 if lookahead.type != 'error': 955 sym = symstack[-1] 956 if sym.type == 'error': 957 # Hmmm. Error is on top of stack, we'll just nuke input 958 # symbol and continue 959 #--! TRACKING 960 if tracking: 961 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 962 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 963 #--! TRACKING 964 lookahead = None 965 continue 966 967 # Create the error symbol for the first time and make it the new lookahead symbol 968 t = YaccSymbol() 969 t.type = 'error' 970 971 if hasattr(lookahead, 'lineno'): 972 t.lineno = t.endlineno = lookahead.lineno 973 if hasattr(lookahead, 'lexpos'): 974 t.lexpos = t.endlexpos = lookahead.lexpos 975 t.value = lookahead 976 lookaheadstack.append(lookahead) 977 lookahead = t 978 else: 979 sym = symstack.pop() 980 #--! TRACKING 981 if tracking: 982 lookahead.lineno = sym.lineno 983 lookahead.lexpos = sym.lexpos 984 #--! TRACKING 985 statestack.pop() 986 state = statestack[-1] 987 988 continue 989 990 # Call an error function here 991 raise RuntimeError('yacc: internal parser error!!!\n') 992 993 #--! parseopt-end 994 995 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 996 # parseopt_notrack(). 997 # 998 # Optimized version of parseopt() with line number tracking removed. 999 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated 1000 # by the ply/ygen.py script. Make changes to the parsedebug() method instead. 1001 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1002 1003 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 1004 #--! parseopt-notrack-start 1005 lookahead = None # Current lookahead symbol 1006 lookaheadstack = [] # Stack of lookahead symbols 1007 actions = self.action # Local reference to action table (to avoid lookup on self.) 1008 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 1009 prod = self.productions # Local reference to production list (to avoid lookup on self.) 1010 defaulted_states = self.defaulted_states # Local reference to defaulted states 1011 pslice = YaccProduction(None) # Production object passed to grammar rules 1012 errorcount = 0 # Used during error recovery 1013 1014 1015 # If no lexer was given, we will try to use the lex module 1016 if not lexer: 1017 from . import lex 1018 lexer = lex.lexer 1019 1020 # Set up the lexer and parser objects on pslice 1021 pslice.lexer = lexer 1022 pslice.parser = self 1023 1024 # If input was supplied, pass to lexer 1025 if input is not None: 1026 lexer.input(input) 1027 1028 if tokenfunc is None: 1029 # Tokenize function 1030 get_token = lexer.token 1031 else: 1032 get_token = tokenfunc 1033 1034 # Set the parser() token method (sometimes used in error recovery) 1035 self.token = get_token 1036 1037 # Set up the state and symbol stacks 1038 1039 statestack = [] # Stack of parsing states 1040 self.statestack = statestack 1041 symstack = [] # Stack of grammar symbols 1042 self.symstack = symstack 1043 1044 pslice.stack = symstack # Put in the production 1045 errtoken = None # Err token 1046 1047 # The start state is assumed to be (0,$end) 1048 1049 statestack.append(0) 1050 sym = YaccSymbol() 1051 sym.type = '$end' 1052 symstack.append(sym) 1053 state = 0 1054 while True: 1055 # Get the next symbol on the input. If a lookahead symbol 1056 # is already set, we just use that. Otherwise, we'll pull 1057 # the next token off of the lookaheadstack or from the lexer 1058 1059 1060 if state not in defaulted_states: 1061 if not lookahead: 1062 if not lookaheadstack: 1063 lookahead = get_token() # Get the next token 1064 else: 1065 lookahead = lookaheadstack.pop() 1066 if not lookahead: 1067 lookahead = YaccSymbol() 1068 lookahead.type = '$end' 1069 1070 # Check the action table 1071 ltype = lookahead.type 1072 t = actions[state].get(ltype) 1073 else: 1074 t = defaulted_states[state] 1075 1076 1077 if t is not None: 1078 if t > 0: 1079 # shift a symbol on the stack 1080 statestack.append(t) 1081 state = t 1082 1083 1084 symstack.append(lookahead) 1085 lookahead = None 1086 1087 # Decrease error count on successful shift 1088 if errorcount: 1089 errorcount -= 1 1090 continue 1091 1092 if t < 0: 1093 # reduce a symbol on the stack, emit a production 1094 p = prod[-t] 1095 pname = p.name 1096 plen = p.len 1097 1098 # Get production function 1099 sym = YaccSymbol() 1100 sym.type = pname # Production name 1101 sym.value = None 1102 1103 1104 if plen: 1105 targ = symstack[-plen-1:] 1106 targ[0] = sym 1107 1108 1109 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1110 # The code enclosed in this section is duplicated 1111 # below as a performance optimization. Make sure 1112 # changes get made in both locations. 1113 1114 pslice.slice = targ 1115 1116 try: 1117 # Call the grammar rule with our special slice object 1118 del symstack[-plen:] 1119 self.state = state 1120 p.callable(pslice) 1121 del statestack[-plen:] 1122 symstack.append(sym) 1123 state = goto[statestack[-1]][pname] 1124 statestack.append(state) 1125 except SyntaxError: 1126 # If an error was set. Enter error recovery state 1127 lookaheadstack.append(lookahead) # Save the current lookahead token 1128 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 1129 statestack.pop() # Pop back one state (before the reduce) 1130 state = statestack[-1] 1131 sym.type = 'error' 1132 sym.value = 'error' 1133 lookahead = sym 1134 errorcount = error_count 1135 self.errorok = False 1136 1137 continue 1138 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1139 1140 else: 1141 1142 1143 targ = [sym] 1144 1145 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1146 # The code enclosed in this section is duplicated 1147 # above as a performance optimization. Make sure 1148 # changes get made in both locations. 1149 1150 pslice.slice = targ 1151 1152 try: 1153 # Call the grammar rule with our special slice object 1154 self.state = state 1155 p.callable(pslice) 1156 symstack.append(sym) 1157 state = goto[statestack[-1]][pname] 1158 statestack.append(state) 1159 except SyntaxError: 1160 # If an error was set. Enter error recovery state 1161 lookaheadstack.append(lookahead) # Save the current lookahead token 1162 statestack.pop() # Pop back one state (before the reduce) 1163 state = statestack[-1] 1164 sym.type = 'error' 1165 sym.value = 'error' 1166 lookahead = sym 1167 errorcount = error_count 1168 self.errorok = False 1169 1170 continue 1171 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1172 1173 if t == 0: 1174 n = symstack[-1] 1175 result = getattr(n, 'value', None) 1176 return result 1177 1178 if t is None: 1179 1180 1181 # We have some kind of parsing error here. To handle 1182 # this, we are going to push the current token onto 1183 # the tokenstack and replace it with an 'error' token. 1184 # If there are any synchronization rules, they may 1185 # catch it. 1186 # 1187 # In addition to pushing the error token, we call call 1188 # the user defined p_error() function if this is the 1189 # first syntax error. This function is only called if 1190 # errorcount == 0. 1191 if errorcount == 0 or self.errorok: 1192 errorcount = error_count 1193 self.errorok = False 1194 errtoken = lookahead 1195 if errtoken.type == '$end': 1196 errtoken = None # End of file! 1197 if self.errorfunc: 1198 if errtoken and not hasattr(errtoken, 'lexer'): 1199 errtoken.lexer = lexer 1200 self.state = state 1201 tok = call_errorfunc(self.errorfunc, errtoken, self) 1202 if self.errorok: 1203 # User must have done some kind of panic 1204 # mode recovery on their own. The 1205 # returned token is the next lookahead 1206 lookahead = tok 1207 errtoken = None 1208 continue 1209 else: 1210 if errtoken: 1211 if hasattr(errtoken, 'lineno'): 1212 lineno = lookahead.lineno 1213 else: 1214 lineno = 0 1215 if lineno: 1216 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 1217 else: 1218 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 1219 else: 1220 sys.stderr.write('yacc: Parse error in input. EOF\n') 1221 return 1222 1223 else: 1224 errorcount = error_count 1225 1226 # case 1: the statestack only has 1 entry on it. If we're in this state, the 1227 # entire parse has been rolled back and we're completely hosed. The token is 1228 # discarded and we just keep going. 1229 1230 if len(statestack) <= 1 and lookahead.type != '$end': 1231 lookahead = None 1232 errtoken = None 1233 state = 0 1234 # Nuke the pushback stack 1235 del lookaheadstack[:] 1236 continue 1237 1238 # case 2: the statestack has a couple of entries on it, but we're 1239 # at the end of the file. nuke the top entry and generate an error token 1240 1241 # Start nuking entries on the stack 1242 if lookahead.type == '$end': 1243 # Whoa. We're really hosed here. Bail out 1244 return 1245 1246 if lookahead.type != 'error': 1247 sym = symstack[-1] 1248 if sym.type == 'error': 1249 # Hmmm. Error is on top of stack, we'll just nuke input 1250 # symbol and continue 1251 lookahead = None 1252 continue 1253 1254 # Create the error symbol for the first time and make it the new lookahead symbol 1255 t = YaccSymbol() 1256 t.type = 'error' 1257 1258 if hasattr(lookahead, 'lineno'): 1259 t.lineno = t.endlineno = lookahead.lineno 1260 if hasattr(lookahead, 'lexpos'): 1261 t.lexpos = t.endlexpos = lookahead.lexpos 1262 t.value = lookahead 1263 lookaheadstack.append(lookahead) 1264 lookahead = t 1265 else: 1266 sym = symstack.pop() 1267 statestack.pop() 1268 state = statestack[-1] 1269 1270 continue 1271 1272 # Call an error function here 1273 raise RuntimeError('yacc: internal parser error!!!\n') 1274 1275 #--! parseopt-notrack-end 1276 1277# ----------------------------------------------------------------------------- 1278# === Grammar Representation === 1279# 1280# The following functions, classes, and variables are used to represent and 1281# manipulate the rules that make up a grammar. 1282# ----------------------------------------------------------------------------- 1283 1284# regex matching identifiers 1285_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 1286 1287# ----------------------------------------------------------------------------- 1288# class Production: 1289# 1290# This class stores the raw information about a single production or grammar rule. 1291# A grammar rule refers to a specification such as this: 1292# 1293# expr : expr PLUS term 1294# 1295# Here are the basic attributes defined on all productions 1296# 1297# name - Name of the production. For example 'expr' 1298# prod - A list of symbols on the right side ['expr','PLUS','term'] 1299# prec - Production precedence level 1300# number - Production number. 1301# func - Function that executes on reduce 1302# file - File where production function is defined 1303# lineno - Line number where production function is defined 1304# 1305# The following attributes are defined or optional. 1306# 1307# len - Length of the production (number of symbols on right hand side) 1308# usyms - Set of unique symbols found in the production 1309# ----------------------------------------------------------------------------- 1310 1311class Production(object): 1312 reduced = 0 1313 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): 1314 self.name = name 1315 self.prod = tuple(prod) 1316 self.number = number 1317 self.func = func 1318 self.callable = None 1319 self.file = file 1320 self.line = line 1321 self.prec = precedence 1322 1323 # Internal settings used during table construction 1324 1325 self.len = len(self.prod) # Length of the production 1326 1327 # Create a list of unique production symbols used in the production 1328 self.usyms = [] 1329 for s in self.prod: 1330 if s not in self.usyms: 1331 self.usyms.append(s) 1332 1333 # List of all LR items for the production 1334 self.lr_items = [] 1335 self.lr_next = None 1336 1337 # Create a string representation 1338 if self.prod: 1339 self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) 1340 else: 1341 self.str = '%s -> <empty>' % self.name 1342 1343 def __str__(self): 1344 return self.str 1345 1346 def __repr__(self): 1347 return 'Production(' + str(self) + ')' 1348 1349 def __len__(self): 1350 return len(self.prod) 1351 1352 def __nonzero__(self): 1353 return 1 1354 1355 def __getitem__(self, index): 1356 return self.prod[index] 1357 1358 # Return the nth lr_item from the production (or None if at the end) 1359 def lr_item(self, n): 1360 if n > len(self.prod): 1361 return None 1362 p = LRItem(self, n) 1363 # Precompute the list of productions immediately following. 1364 try: 1365 p.lr_after = self.Prodnames[p.prod[n+1]] 1366 except (IndexError, KeyError): 1367 p.lr_after = [] 1368 try: 1369 p.lr_before = p.prod[n-1] 1370 except IndexError: 1371 p.lr_before = None 1372 return p 1373 1374 # Bind the production function name to a callable 1375 def bind(self, pdict): 1376 if self.func: 1377 self.callable = pdict[self.func] 1378 1379# This class serves as a minimal standin for Production objects when 1380# reading table data from files. It only contains information 1381# actually used by the LR parsing engine, plus some additional 1382# debugging information. 1383class MiniProduction(object): 1384 def __init__(self, str, name, len, func, file, line): 1385 self.name = name 1386 self.len = len 1387 self.func = func 1388 self.callable = None 1389 self.file = file 1390 self.line = line 1391 self.str = str 1392 1393 def __str__(self): 1394 return self.str 1395 1396 def __repr__(self): 1397 return 'MiniProduction(%s)' % self.str 1398 1399 # Bind the production function name to a callable 1400 def bind(self, pdict): 1401 if self.func: 1402 self.callable = pdict[self.func] 1403 1404 1405# ----------------------------------------------------------------------------- 1406# class LRItem 1407# 1408# This class represents a specific stage of parsing a production rule. For 1409# example: 1410# 1411# expr : expr . PLUS term 1412# 1413# In the above, the "." represents the current location of the parse. Here 1414# basic attributes: 1415# 1416# name - Name of the production. For example 'expr' 1417# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 1418# number - Production number. 1419# 1420# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 1421# then lr_next refers to 'expr -> expr PLUS . term' 1422# lr_index - LR item index (location of the ".") in the prod list. 1423# lookaheads - LALR lookahead symbols for this item 1424# len - Length of the production (number of symbols on right hand side) 1425# lr_after - List of all productions that immediately follow 1426# lr_before - Grammar symbol immediately before 1427# ----------------------------------------------------------------------------- 1428 1429class LRItem(object): 1430 def __init__(self, p, n): 1431 self.name = p.name 1432 self.prod = list(p.prod) 1433 self.number = p.number 1434 self.lr_index = n 1435 self.lookaheads = {} 1436 self.prod.insert(n, '.') 1437 self.prod = tuple(self.prod) 1438 self.len = len(self.prod) 1439 self.usyms = p.usyms 1440 1441 def __str__(self): 1442 if self.prod: 1443 s = '%s -> %s' % (self.name, ' '.join(self.prod)) 1444 else: 1445 s = '%s -> <empty>' % self.name 1446 return s 1447 1448 def __repr__(self): 1449 return 'LRItem(' + str(self) + ')' 1450 1451# ----------------------------------------------------------------------------- 1452# rightmost_terminal() 1453# 1454# Return the rightmost terminal from a list of symbols. Used in add_production() 1455# ----------------------------------------------------------------------------- 1456def rightmost_terminal(symbols, terminals): 1457 i = len(symbols) - 1 1458 while i >= 0: 1459 if symbols[i] in terminals: 1460 return symbols[i] 1461 i -= 1 1462 return None 1463 1464# ----------------------------------------------------------------------------- 1465# === GRAMMAR CLASS === 1466# 1467# The following class represents the contents of the specified grammar along 1468# with various computed properties such as first sets, follow sets, LR items, etc. 1469# This data is used for critical parts of the table generation process later. 1470# ----------------------------------------------------------------------------- 1471 1472class GrammarError(YaccError): 1473 pass 1474 1475class Grammar(object): 1476 def __init__(self, terminals): 1477 self.Productions = [None] # A list of all of the productions. The first 1478 # entry is always reserved for the purpose of 1479 # building an augmented grammar 1480 1481 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all 1482 # productions of that nonterminal. 1483 1484 self.Prodmap = {} # A dictionary that is only used to detect duplicate 1485 # productions. 1486 1487 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a 1488 # list of the rules where they are used. 1489 1490 for term in terminals: 1491 self.Terminals[term] = [] 1492 1493 self.Terminals['error'] = [] 1494 1495 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list 1496 # of rule numbers where they are used. 1497 1498 self.First = {} # A dictionary of precomputed FIRST(x) symbols 1499 1500 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols 1501 1502 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the 1503 # form ('right',level) or ('nonassoc', level) or ('left',level) 1504 1505 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. 1506 # This is only used to provide error checking and to generate 1507 # a warning about unused precedence rules. 1508 1509 self.Start = None # Starting symbol for the grammar 1510 1511 1512 def __len__(self): 1513 return len(self.Productions) 1514 1515 def __getitem__(self, index): 1516 return self.Productions[index] 1517 1518 # ----------------------------------------------------------------------------- 1519 # set_precedence() 1520 # 1521 # Sets the precedence for a given terminal. assoc is the associativity such as 1522 # 'left','right', or 'nonassoc'. level is a numeric level. 1523 # 1524 # ----------------------------------------------------------------------------- 1525 1526 def set_precedence(self, term, assoc, level): 1527 assert self.Productions == [None], 'Must call set_precedence() before add_production()' 1528 if term in self.Precedence: 1529 raise GrammarError('Precedence already specified for terminal %r' % term) 1530 if assoc not in ['left', 'right', 'nonassoc']: 1531 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 1532 self.Precedence[term] = (assoc, level) 1533 1534 # ----------------------------------------------------------------------------- 1535 # add_production() 1536 # 1537 # Given an action function, this function assembles a production rule and 1538 # computes its precedence level. 1539 # 1540 # The production rule is supplied as a list of symbols. For example, 1541 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 1542 # symbols ['expr','PLUS','term']. 1543 # 1544 # Precedence is determined by the precedence of the right-most non-terminal 1545 # or the precedence of a terminal specified by %prec. 1546 # 1547 # A variety of error checks are performed to make sure production symbols 1548 # are valid and that %prec is used correctly. 1549 # ----------------------------------------------------------------------------- 1550 1551 def add_production(self, prodname, syms, func=None, file='', line=0): 1552 1553 if prodname in self.Terminals: 1554 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) 1555 if prodname == 'error': 1556 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) 1557 if not _is_identifier.match(prodname): 1558 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) 1559 1560 # Look for literal tokens 1561 for n, s in enumerate(syms): 1562 if s[0] in "'\"": 1563 try: 1564 c = eval(s) 1565 if (len(c) > 1): 1566 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % 1567 (file, line, s, prodname)) 1568 if c not in self.Terminals: 1569 self.Terminals[c] = [] 1570 syms[n] = c 1571 continue 1572 except SyntaxError: 1573 pass 1574 if not _is_identifier.match(s) and s != '%prec': 1575 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) 1576 1577 # Determine the precedence level 1578 if '%prec' in syms: 1579 if syms[-1] == '%prec': 1580 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) 1581 if syms[-2] != '%prec': 1582 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % 1583 (file, line)) 1584 precname = syms[-1] 1585 prodprec = self.Precedence.get(precname) 1586 if not prodprec: 1587 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) 1588 else: 1589 self.UsedPrecedence.add(precname) 1590 del syms[-2:] # Drop %prec from the rule 1591 else: 1592 # If no %prec, precedence is determined by the rightmost terminal symbol 1593 precname = rightmost_terminal(syms, self.Terminals) 1594 prodprec = self.Precedence.get(precname, ('right', 0)) 1595 1596 # See if the rule is already in the rulemap 1597 map = '%s -> %s' % (prodname, syms) 1598 if map in self.Prodmap: 1599 m = self.Prodmap[map] 1600 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + 1601 'Previous definition at %s:%d' % (m.file, m.line)) 1602 1603 # From this point on, everything is valid. Create a new Production instance 1604 pnumber = len(self.Productions) 1605 if prodname not in self.Nonterminals: 1606 self.Nonterminals[prodname] = [] 1607 1608 # Add the production number to Terminals and Nonterminals 1609 for t in syms: 1610 if t in self.Terminals: 1611 self.Terminals[t].append(pnumber) 1612 else: 1613 if t not in self.Nonterminals: 1614 self.Nonterminals[t] = [] 1615 self.Nonterminals[t].append(pnumber) 1616 1617 # Create a production and add it to the list of productions 1618 p = Production(pnumber, prodname, syms, prodprec, func, file, line) 1619 self.Productions.append(p) 1620 self.Prodmap[map] = p 1621 1622 # Add to the global productions list 1623 try: 1624 self.Prodnames[prodname].append(p) 1625 except KeyError: 1626 self.Prodnames[prodname] = [p] 1627 1628 # ----------------------------------------------------------------------------- 1629 # set_start() 1630 # 1631 # Sets the starting symbol and creates the augmented grammar. Production 1632 # rule 0 is S' -> start where start is the start symbol. 1633 # ----------------------------------------------------------------------------- 1634 1635 def set_start(self, start=None): 1636 if not start: 1637 start = self.Productions[1].name 1638 if start not in self.Nonterminals: 1639 raise GrammarError('start symbol %s undefined' % start) 1640 self.Productions[0] = Production(0, "S'", [start]) 1641 self.Nonterminals[start].append(0) 1642 self.Start = start 1643 1644 # ----------------------------------------------------------------------------- 1645 # find_unreachable() 1646 # 1647 # Find all of the nonterminal symbols that can't be reached from the starting 1648 # symbol. Returns a list of nonterminals that can't be reached. 1649 # ----------------------------------------------------------------------------- 1650 1651 def find_unreachable(self): 1652 1653 # Mark all symbols that are reachable from a symbol s 1654 def mark_reachable_from(s): 1655 if s in reachable: 1656 return 1657 reachable.add(s) 1658 for p in self.Prodnames.get(s, []): 1659 for r in p.prod: 1660 mark_reachable_from(r) 1661 1662 reachable = set() 1663 mark_reachable_from(self.Productions[0].prod[0]) 1664 return [s for s in self.Nonterminals if s not in reachable] 1665 1666 # ----------------------------------------------------------------------------- 1667 # infinite_cycles() 1668 # 1669 # This function looks at the various parsing rules and tries to detect 1670 # infinite recursion cycles (grammar rules where there is no possible way 1671 # to derive a string of only terminals). 1672 # ----------------------------------------------------------------------------- 1673 1674 def infinite_cycles(self): 1675 terminates = {} 1676 1677 # Terminals: 1678 for t in self.Terminals: 1679 terminates[t] = True 1680 1681 terminates['$end'] = True 1682 1683 # Nonterminals: 1684 1685 # Initialize to false: 1686 for n in self.Nonterminals: 1687 terminates[n] = False 1688 1689 # Then propagate termination until no change: 1690 while True: 1691 some_change = False 1692 for (n, pl) in self.Prodnames.items(): 1693 # Nonterminal n terminates iff any of its productions terminates. 1694 for p in pl: 1695 # Production p terminates iff all of its rhs symbols terminate. 1696 for s in p.prod: 1697 if not terminates[s]: 1698 # The symbol s does not terminate, 1699 # so production p does not terminate. 1700 p_terminates = False 1701 break 1702 else: 1703 # didn't break from the loop, 1704 # so every symbol s terminates 1705 # so production p terminates. 1706 p_terminates = True 1707 1708 if p_terminates: 1709 # symbol n terminates! 1710 if not terminates[n]: 1711 terminates[n] = True 1712 some_change = True 1713 # Don't need to consider any more productions for this n. 1714 break 1715 1716 if not some_change: 1717 break 1718 1719 infinite = [] 1720 for (s, term) in terminates.items(): 1721 if not term: 1722 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 1723 # s is used-but-not-defined, and we've already warned of that, 1724 # so it would be overkill to say that it's also non-terminating. 1725 pass 1726 else: 1727 infinite.append(s) 1728 1729 return infinite 1730 1731 # ----------------------------------------------------------------------------- 1732 # undefined_symbols() 1733 # 1734 # Find all symbols that were used the grammar, but not defined as tokens or 1735 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 1736 # and prod is the production where the symbol was used. 1737 # ----------------------------------------------------------------------------- 1738 def undefined_symbols(self): 1739 result = [] 1740 for p in self.Productions: 1741 if not p: 1742 continue 1743 1744 for s in p.prod: 1745 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 1746 result.append((s, p)) 1747 return result 1748 1749 # ----------------------------------------------------------------------------- 1750 # unused_terminals() 1751 # 1752 # Find all terminals that were defined, but not used by the grammar. Returns 1753 # a list of all symbols. 1754 # ----------------------------------------------------------------------------- 1755 def unused_terminals(self): 1756 unused_tok = [] 1757 for s, v in self.Terminals.items(): 1758 if s != 'error' and not v: 1759 unused_tok.append(s) 1760 1761 return unused_tok 1762 1763 # ------------------------------------------------------------------------------ 1764 # unused_rules() 1765 # 1766 # Find all grammar rules that were defined, but not used (maybe not reachable) 1767 # Returns a list of productions. 1768 # ------------------------------------------------------------------------------ 1769 1770 def unused_rules(self): 1771 unused_prod = [] 1772 for s, v in self.Nonterminals.items(): 1773 if not v: 1774 p = self.Prodnames[s][0] 1775 unused_prod.append(p) 1776 return unused_prod 1777 1778 # ----------------------------------------------------------------------------- 1779 # unused_precedence() 1780 # 1781 # Returns a list of tuples (term,precedence) corresponding to precedence 1782 # rules that were never used by the grammar. term is the name of the terminal 1783 # on which precedence was applied and precedence is a string such as 'left' or 1784 # 'right' corresponding to the type of precedence. 1785 # ----------------------------------------------------------------------------- 1786 1787 def unused_precedence(self): 1788 unused = [] 1789 for termname in self.Precedence: 1790 if not (termname in self.Terminals or termname in self.UsedPrecedence): 1791 unused.append((termname, self.Precedence[termname][0])) 1792 1793 return unused 1794 1795 # ------------------------------------------------------------------------- 1796 # _first() 1797 # 1798 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1799 # 1800 # During execution of compute_first1, the result may be incomplete. 1801 # Afterward (e.g., when called from compute_follow()), it will be complete. 1802 # ------------------------------------------------------------------------- 1803 def _first(self, beta): 1804 1805 # We are computing First(x1,x2,x3,...,xn) 1806 result = [] 1807 for x in beta: 1808 x_produces_empty = False 1809 1810 # Add all the non-<empty> symbols of First[x] to the result. 1811 for f in self.First[x]: 1812 if f == '<empty>': 1813 x_produces_empty = True 1814 else: 1815 if f not in result: 1816 result.append(f) 1817 1818 if x_produces_empty: 1819 # We have to consider the next x in beta, 1820 # i.e. stay in the loop. 1821 pass 1822 else: 1823 # We don't have to consider any further symbols in beta. 1824 break 1825 else: 1826 # There was no 'break' from the loop, 1827 # so x_produces_empty was true for all x in beta, 1828 # so beta produces empty as well. 1829 result.append('<empty>') 1830 1831 return result 1832 1833 # ------------------------------------------------------------------------- 1834 # compute_first() 1835 # 1836 # Compute the value of FIRST1(X) for all symbols 1837 # ------------------------------------------------------------------------- 1838 def compute_first(self): 1839 if self.First: 1840 return self.First 1841 1842 # Terminals: 1843 for t in self.Terminals: 1844 self.First[t] = [t] 1845 1846 self.First['$end'] = ['$end'] 1847 1848 # Nonterminals: 1849 1850 # Initialize to the empty set: 1851 for n in self.Nonterminals: 1852 self.First[n] = [] 1853 1854 # Then propagate symbols until no change: 1855 while True: 1856 some_change = False 1857 for n in self.Nonterminals: 1858 for p in self.Prodnames[n]: 1859 for f in self._first(p.prod): 1860 if f not in self.First[n]: 1861 self.First[n].append(f) 1862 some_change = True 1863 if not some_change: 1864 break 1865 1866 return self.First 1867 1868 # --------------------------------------------------------------------- 1869 # compute_follow() 1870 # 1871 # Computes all of the follow sets for every non-terminal symbol. The 1872 # follow set is the set of all symbols that might follow a given 1873 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 1874 # --------------------------------------------------------------------- 1875 def compute_follow(self, start=None): 1876 # If already computed, return the result 1877 if self.Follow: 1878 return self.Follow 1879 1880 # If first sets not computed yet, do that first. 1881 if not self.First: 1882 self.compute_first() 1883 1884 # Add '$end' to the follow list of the start symbol 1885 for k in self.Nonterminals: 1886 self.Follow[k] = [] 1887 1888 if not start: 1889 start = self.Productions[1].name 1890 1891 self.Follow[start] = ['$end'] 1892 1893 while True: 1894 didadd = False 1895 for p in self.Productions[1:]: 1896 # Here is the production set 1897 for i, B in enumerate(p.prod): 1898 if B in self.Nonterminals: 1899 # Okay. We got a non-terminal in a production 1900 fst = self._first(p.prod[i+1:]) 1901 hasempty = False 1902 for f in fst: 1903 if f != '<empty>' and f not in self.Follow[B]: 1904 self.Follow[B].append(f) 1905 didadd = True 1906 if f == '<empty>': 1907 hasempty = True 1908 if hasempty or i == (len(p.prod)-1): 1909 # Add elements of follow(a) to follow(b) 1910 for f in self.Follow[p.name]: 1911 if f not in self.Follow[B]: 1912 self.Follow[B].append(f) 1913 didadd = True 1914 if not didadd: 1915 break 1916 return self.Follow 1917 1918 1919 # ----------------------------------------------------------------------------- 1920 # build_lritems() 1921 # 1922 # This function walks the list of productions and builds a complete set of the 1923 # LR items. The LR items are stored in two ways: First, they are uniquely 1924 # numbered and placed in the list _lritems. Second, a linked list of LR items 1925 # is built for each production. For example: 1926 # 1927 # E -> E PLUS E 1928 # 1929 # Creates the list 1930 # 1931 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 1932 # ----------------------------------------------------------------------------- 1933 1934 def build_lritems(self): 1935 for p in self.Productions: 1936 lastlri = p 1937 i = 0 1938 lr_items = [] 1939 while True: 1940 if i > len(p): 1941 lri = None 1942 else: 1943 lri = LRItem(p, i) 1944 # Precompute the list of productions immediately following 1945 try: 1946 lri.lr_after = self.Prodnames[lri.prod[i+1]] 1947 except (IndexError, KeyError): 1948 lri.lr_after = [] 1949 try: 1950 lri.lr_before = lri.prod[i-1] 1951 except IndexError: 1952 lri.lr_before = None 1953 1954 lastlri.lr_next = lri 1955 if not lri: 1956 break 1957 lr_items.append(lri) 1958 lastlri = lri 1959 i += 1 1960 p.lr_items = lr_items 1961 1962# ----------------------------------------------------------------------------- 1963# == Class LRTable == 1964# 1965# This basic class represents a basic table of LR parsing information. 1966# Methods for generating the tables are not defined here. They are defined 1967# in the derived class LRGeneratedTable. 1968# ----------------------------------------------------------------------------- 1969 1970class VersionError(YaccError): 1971 pass 1972 1973class LRTable(object): 1974 def __init__(self): 1975 self.lr_action = None 1976 self.lr_goto = None 1977 self.lr_productions = None 1978 self.lr_method = None 1979 1980 def read_table(self, module): 1981 if isinstance(module, types.ModuleType): 1982 parsetab = module 1983 else: 1984 exec('import %s' % module) 1985 parsetab = sys.modules[module] 1986 1987 if parsetab._tabversion != __tabversion__: 1988 raise VersionError('yacc table file version is out of date') 1989 1990 self.lr_action = parsetab._lr_action 1991 self.lr_goto = parsetab._lr_goto 1992 1993 self.lr_productions = [] 1994 for p in parsetab._lr_productions: 1995 self.lr_productions.append(MiniProduction(*p)) 1996 1997 self.lr_method = parsetab._lr_method 1998 return parsetab._lr_signature 1999 2000 def read_pickle(self, filename): 2001 try: 2002 import cPickle as pickle 2003 except ImportError: 2004 import pickle 2005 2006 if not os.path.exists(filename): 2007 raise ImportError 2008 2009 in_f = open(filename, 'rb') 2010 2011 tabversion = pickle.load(in_f) 2012 if tabversion != __tabversion__: 2013 raise VersionError('yacc table file version is out of date') 2014 self.lr_method = pickle.load(in_f) 2015 signature = pickle.load(in_f) 2016 self.lr_action = pickle.load(in_f) 2017 self.lr_goto = pickle.load(in_f) 2018 productions = pickle.load(in_f) 2019 2020 self.lr_productions = [] 2021 for p in productions: 2022 self.lr_productions.append(MiniProduction(*p)) 2023 2024 in_f.close() 2025 return signature 2026 2027 # Bind all production function names to callable objects in pdict 2028 def bind_callables(self, pdict): 2029 for p in self.lr_productions: 2030 p.bind(pdict) 2031 2032 2033# ----------------------------------------------------------------------------- 2034# === LR Generator === 2035# 2036# The following classes and functions are used to generate LR parsing tables on 2037# a grammar. 2038# ----------------------------------------------------------------------------- 2039 2040# ----------------------------------------------------------------------------- 2041# digraph() 2042# traverse() 2043# 2044# The following two functions are used to compute set valued functions 2045# of the form: 2046# 2047# F(x) = F'(x) U U{F(y) | x R y} 2048# 2049# This is used to compute the values of Read() sets as well as FOLLOW sets 2050# in LALR(1) generation. 2051# 2052# Inputs: X - An input set 2053# R - A relation 2054# FP - Set-valued function 2055# ------------------------------------------------------------------------------ 2056 2057def digraph(X, R, FP): 2058 N = {} 2059 for x in X: 2060 N[x] = 0 2061 stack = [] 2062 F = {} 2063 for x in X: 2064 if N[x] == 0: 2065 traverse(x, N, stack, F, X, R, FP) 2066 return F 2067 2068def traverse(x, N, stack, F, X, R, FP): 2069 stack.append(x) 2070 d = len(stack) 2071 N[x] = d 2072 F[x] = FP(x) # F(X) <- F'(x) 2073 2074 rel = R(x) # Get y's related to x 2075 for y in rel: 2076 if N[y] == 0: 2077 traverse(y, N, stack, F, X, R, FP) 2078 N[x] = min(N[x], N[y]) 2079 for a in F.get(y, []): 2080 if a not in F[x]: 2081 F[x].append(a) 2082 if N[x] == d: 2083 N[stack[-1]] = MAXINT 2084 F[stack[-1]] = F[x] 2085 element = stack.pop() 2086 while element != x: 2087 N[stack[-1]] = MAXINT 2088 F[stack[-1]] = F[x] 2089 element = stack.pop() 2090 2091class LALRError(YaccError): 2092 pass 2093 2094# ----------------------------------------------------------------------------- 2095# == LRGeneratedTable == 2096# 2097# This class implements the LR table generation algorithm. There are no 2098# public methods except for write() 2099# ----------------------------------------------------------------------------- 2100 2101class LRGeneratedTable(LRTable): 2102 def __init__(self, grammar, method='LALR', log=None): 2103 if method not in ['SLR', 'LALR']: 2104 raise LALRError('Unsupported method %s' % method) 2105 2106 self.grammar = grammar 2107 self.lr_method = method 2108 2109 # Set up the logger 2110 if not log: 2111 log = NullLogger() 2112 self.log = log 2113 2114 # Internal attributes 2115 self.lr_action = {} # Action table 2116 self.lr_goto = {} # Goto table 2117 self.lr_productions = grammar.Productions # Copy of grammar Production array 2118 self.lr_goto_cache = {} # Cache of computed gotos 2119 self.lr0_cidhash = {} # Cache of closures 2120 2121 self._add_count = 0 # Internal counter used to detect cycles 2122 2123 # Diagonistic information filled in by the table generator 2124 self.sr_conflict = 0 2125 self.rr_conflict = 0 2126 self.conflicts = [] # List of conflicts 2127 2128 self.sr_conflicts = [] 2129 self.rr_conflicts = [] 2130 2131 # Build the tables 2132 self.grammar.build_lritems() 2133 self.grammar.compute_first() 2134 self.grammar.compute_follow() 2135 self.lr_parse_table() 2136 2137 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 2138 2139 def lr0_closure(self, I): 2140 self._add_count += 1 2141 2142 # Add everything in I to J 2143 J = I[:] 2144 didadd = True 2145 while didadd: 2146 didadd = False 2147 for j in J: 2148 for x in j.lr_after: 2149 if getattr(x, 'lr0_added', 0) == self._add_count: 2150 continue 2151 # Add B --> .G to J 2152 J.append(x.lr_next) 2153 x.lr0_added = self._add_count 2154 didadd = True 2155 2156 return J 2157 2158 # Compute the LR(0) goto function goto(I,X) where I is a set 2159 # of LR(0) items and X is a grammar symbol. This function is written 2160 # in a way that guarantees uniqueness of the generated goto sets 2161 # (i.e. the same goto set will never be returned as two different Python 2162 # objects). With uniqueness, we can later do fast set comparisons using 2163 # id(obj) instead of element-wise comparison. 2164 2165 def lr0_goto(self, I, x): 2166 # First we look for a previously cached entry 2167 g = self.lr_goto_cache.get((id(I), x)) 2168 if g: 2169 return g 2170 2171 # Now we generate the goto set in a way that guarantees uniqueness 2172 # of the result 2173 2174 s = self.lr_goto_cache.get(x) 2175 if not s: 2176 s = {} 2177 self.lr_goto_cache[x] = s 2178 2179 gs = [] 2180 for p in I: 2181 n = p.lr_next 2182 if n and n.lr_before == x: 2183 s1 = s.get(id(n)) 2184 if not s1: 2185 s1 = {} 2186 s[id(n)] = s1 2187 gs.append(n) 2188 s = s1 2189 g = s.get('$end') 2190 if not g: 2191 if gs: 2192 g = self.lr0_closure(gs) 2193 s['$end'] = g 2194 else: 2195 s['$end'] = gs 2196 self.lr_goto_cache[(id(I), x)] = g 2197 return g 2198 2199 # Compute the LR(0) sets of item function 2200 def lr0_items(self): 2201 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] 2202 i = 0 2203 for I in C: 2204 self.lr0_cidhash[id(I)] = i 2205 i += 1 2206 2207 # Loop over the items in C and each grammar symbols 2208 i = 0 2209 while i < len(C): 2210 I = C[i] 2211 i += 1 2212 2213 # Collect all of the symbols that could possibly be in the goto(I,X) sets 2214 asyms = {} 2215 for ii in I: 2216 for s in ii.usyms: 2217 asyms[s] = None 2218 2219 for x in asyms: 2220 g = self.lr0_goto(I, x) 2221 if not g or id(g) in self.lr0_cidhash: 2222 continue 2223 self.lr0_cidhash[id(g)] = len(C) 2224 C.append(g) 2225 2226 return C 2227 2228 # ----------------------------------------------------------------------------- 2229 # ==== LALR(1) Parsing ==== 2230 # 2231 # LALR(1) parsing is almost exactly the same as SLR except that instead of 2232 # relying upon Follow() sets when performing reductions, a more selective 2233 # lookahead set that incorporates the state of the LR(0) machine is utilized. 2234 # Thus, we mainly just have to focus on calculating the lookahead sets. 2235 # 2236 # The method used here is due to DeRemer and Pennelo (1982). 2237 # 2238 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 2239 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 2240 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 2241 # 2242 # Further details can also be found in: 2243 # 2244 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 2245 # McGraw-Hill Book Company, (1985). 2246 # 2247 # ----------------------------------------------------------------------------- 2248 2249 # ----------------------------------------------------------------------------- 2250 # compute_nullable_nonterminals() 2251 # 2252 # Creates a dictionary containing all of the non-terminals that might produce 2253 # an empty production. 2254 # ----------------------------------------------------------------------------- 2255 2256 def compute_nullable_nonterminals(self): 2257 nullable = set() 2258 num_nullable = 0 2259 while True: 2260 for p in self.grammar.Productions[1:]: 2261 if p.len == 0: 2262 nullable.add(p.name) 2263 continue 2264 for t in p.prod: 2265 if t not in nullable: 2266 break 2267 else: 2268 nullable.add(p.name) 2269 if len(nullable) == num_nullable: 2270 break 2271 num_nullable = len(nullable) 2272 return nullable 2273 2274 # ----------------------------------------------------------------------------- 2275 # find_nonterminal_trans(C) 2276 # 2277 # Given a set of LR(0) items, this functions finds all of the non-terminal 2278 # transitions. These are transitions in which a dot appears immediately before 2279 # a non-terminal. Returns a list of tuples of the form (state,N) where state 2280 # is the state number and N is the nonterminal symbol. 2281 # 2282 # The input C is the set of LR(0) items. 2283 # ----------------------------------------------------------------------------- 2284 2285 def find_nonterminal_transitions(self, C): 2286 trans = [] 2287 for stateno, state in enumerate(C): 2288 for p in state: 2289 if p.lr_index < p.len - 1: 2290 t = (stateno, p.prod[p.lr_index+1]) 2291 if t[1] in self.grammar.Nonterminals: 2292 if t not in trans: 2293 trans.append(t) 2294 return trans 2295 2296 # ----------------------------------------------------------------------------- 2297 # dr_relation() 2298 # 2299 # Computes the DR(p,A) relationships for non-terminal transitions. The input 2300 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 2301 # 2302 # Returns a list of terminals. 2303 # ----------------------------------------------------------------------------- 2304 2305 def dr_relation(self, C, trans, nullable): 2306 state, N = trans 2307 terms = [] 2308 2309 g = self.lr0_goto(C[state], N) 2310 for p in g: 2311 if p.lr_index < p.len - 1: 2312 a = p.prod[p.lr_index+1] 2313 if a in self.grammar.Terminals: 2314 if a not in terms: 2315 terms.append(a) 2316 2317 # This extra bit is to handle the start state 2318 if state == 0 and N == self.grammar.Productions[0].prod[0]: 2319 terms.append('$end') 2320 2321 return terms 2322 2323 # ----------------------------------------------------------------------------- 2324 # reads_relation() 2325 # 2326 # Computes the READS() relation (p,A) READS (t,C). 2327 # ----------------------------------------------------------------------------- 2328 2329 def reads_relation(self, C, trans, empty): 2330 # Look for empty transitions 2331 rel = [] 2332 state, N = trans 2333 2334 g = self.lr0_goto(C[state], N) 2335 j = self.lr0_cidhash.get(id(g), -1) 2336 for p in g: 2337 if p.lr_index < p.len - 1: 2338 a = p.prod[p.lr_index + 1] 2339 if a in empty: 2340 rel.append((j, a)) 2341 2342 return rel 2343 2344 # ----------------------------------------------------------------------------- 2345 # compute_lookback_includes() 2346 # 2347 # Determines the lookback and includes relations 2348 # 2349 # LOOKBACK: 2350 # 2351 # This relation is determined by running the LR(0) state machine forward. 2352 # For example, starting with a production "N : . A B C", we run it forward 2353 # to obtain "N : A B C ." We then build a relationship between this final 2354 # state and the starting state. These relationships are stored in a dictionary 2355 # lookdict. 2356 # 2357 # INCLUDES: 2358 # 2359 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 2360 # 2361 # This relation is used to determine non-terminal transitions that occur 2362 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 2363 # if the following holds: 2364 # 2365 # B -> LAT, where T -> epsilon and p' -L-> p 2366 # 2367 # L is essentially a prefix (which may be empty), T is a suffix that must be 2368 # able to derive an empty string. State p' must lead to state p with the string L. 2369 # 2370 # ----------------------------------------------------------------------------- 2371 2372 def compute_lookback_includes(self, C, trans, nullable): 2373 lookdict = {} # Dictionary of lookback relations 2374 includedict = {} # Dictionary of include relations 2375 2376 # Make a dictionary of non-terminal transitions 2377 dtrans = {} 2378 for t in trans: 2379 dtrans[t] = 1 2380 2381 # Loop over all transitions and compute lookbacks and includes 2382 for state, N in trans: 2383 lookb = [] 2384 includes = [] 2385 for p in C[state]: 2386 if p.name != N: 2387 continue 2388 2389 # Okay, we have a name match. We now follow the production all the way 2390 # through the state machine until we get the . on the right hand side 2391 2392 lr_index = p.lr_index 2393 j = state 2394 while lr_index < p.len - 1: 2395 lr_index = lr_index + 1 2396 t = p.prod[lr_index] 2397 2398 # Check to see if this symbol and state are a non-terminal transition 2399 if (j, t) in dtrans: 2400 # Yes. Okay, there is some chance that this is an includes relation 2401 # the only way to know for certain is whether the rest of the 2402 # production derives empty 2403 2404 li = lr_index + 1 2405 while li < p.len: 2406 if p.prod[li] in self.grammar.Terminals: 2407 break # No forget it 2408 if p.prod[li] not in nullable: 2409 break 2410 li = li + 1 2411 else: 2412 # Appears to be a relation between (j,t) and (state,N) 2413 includes.append((j, t)) 2414 2415 g = self.lr0_goto(C[j], t) # Go to next set 2416 j = self.lr0_cidhash.get(id(g), -1) # Go to next state 2417 2418 # When we get here, j is the final state, now we have to locate the production 2419 for r in C[j]: 2420 if r.name != p.name: 2421 continue 2422 if r.len != p.len: 2423 continue 2424 i = 0 2425 # This look is comparing a production ". A B C" with "A B C ." 2426 while i < r.lr_index: 2427 if r.prod[i] != p.prod[i+1]: 2428 break 2429 i = i + 1 2430 else: 2431 lookb.append((j, r)) 2432 for i in includes: 2433 if i not in includedict: 2434 includedict[i] = [] 2435 includedict[i].append((state, N)) 2436 lookdict[(state, N)] = lookb 2437 2438 return lookdict, includedict 2439 2440 # ----------------------------------------------------------------------------- 2441 # compute_read_sets() 2442 # 2443 # Given a set of LR(0) items, this function computes the read sets. 2444 # 2445 # Inputs: C = Set of LR(0) items 2446 # ntrans = Set of nonterminal transitions 2447 # nullable = Set of empty transitions 2448 # 2449 # Returns a set containing the read sets 2450 # ----------------------------------------------------------------------------- 2451 2452 def compute_read_sets(self, C, ntrans, nullable): 2453 FP = lambda x: self.dr_relation(C, x, nullable) 2454 R = lambda x: self.reads_relation(C, x, nullable) 2455 F = digraph(ntrans, R, FP) 2456 return F 2457 2458 # ----------------------------------------------------------------------------- 2459 # compute_follow_sets() 2460 # 2461 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 2462 # and an include set, this function computes the follow sets 2463 # 2464 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 2465 # 2466 # Inputs: 2467 # ntrans = Set of nonterminal transitions 2468 # readsets = Readset (previously computed) 2469 # inclsets = Include sets (previously computed) 2470 # 2471 # Returns a set containing the follow sets 2472 # ----------------------------------------------------------------------------- 2473 2474 def compute_follow_sets(self, ntrans, readsets, inclsets): 2475 FP = lambda x: readsets[x] 2476 R = lambda x: inclsets.get(x, []) 2477 F = digraph(ntrans, R, FP) 2478 return F 2479 2480 # ----------------------------------------------------------------------------- 2481 # add_lookaheads() 2482 # 2483 # Attaches the lookahead symbols to grammar rules. 2484 # 2485 # Inputs: lookbacks - Set of lookback relations 2486 # followset - Computed follow set 2487 # 2488 # This function directly attaches the lookaheads to productions contained 2489 # in the lookbacks set 2490 # ----------------------------------------------------------------------------- 2491 2492 def add_lookaheads(self, lookbacks, followset): 2493 for trans, lb in lookbacks.items(): 2494 # Loop over productions in lookback 2495 for state, p in lb: 2496 if state not in p.lookaheads: 2497 p.lookaheads[state] = [] 2498 f = followset.get(trans, []) 2499 for a in f: 2500 if a not in p.lookaheads[state]: 2501 p.lookaheads[state].append(a) 2502 2503 # ----------------------------------------------------------------------------- 2504 # add_lalr_lookaheads() 2505 # 2506 # This function does all of the work of adding lookahead information for use 2507 # with LALR parsing 2508 # ----------------------------------------------------------------------------- 2509 2510 def add_lalr_lookaheads(self, C): 2511 # Determine all of the nullable nonterminals 2512 nullable = self.compute_nullable_nonterminals() 2513 2514 # Find all non-terminal transitions 2515 trans = self.find_nonterminal_transitions(C) 2516 2517 # Compute read sets 2518 readsets = self.compute_read_sets(C, trans, nullable) 2519 2520 # Compute lookback/includes relations 2521 lookd, included = self.compute_lookback_includes(C, trans, nullable) 2522 2523 # Compute LALR FOLLOW sets 2524 followsets = self.compute_follow_sets(trans, readsets, included) 2525 2526 # Add all of the lookaheads 2527 self.add_lookaheads(lookd, followsets) 2528 2529 # ----------------------------------------------------------------------------- 2530 # lr_parse_table() 2531 # 2532 # This function constructs the parse tables for SLR or LALR 2533 # ----------------------------------------------------------------------------- 2534 def lr_parse_table(self): 2535 Productions = self.grammar.Productions 2536 Precedence = self.grammar.Precedence 2537 goto = self.lr_goto # Goto array 2538 action = self.lr_action # Action array 2539 log = self.log # Logger for output 2540 2541 actionp = {} # Action production array (temporary) 2542 2543 log.info('Parsing method: %s', self.lr_method) 2544 2545 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 2546 # This determines the number of states 2547 2548 C = self.lr0_items() 2549 2550 if self.lr_method == 'LALR': 2551 self.add_lalr_lookaheads(C) 2552 2553 # Build the parser table, state by state 2554 st = 0 2555 for I in C: 2556 # Loop over each production in I 2557 actlist = [] # List of actions 2558 st_action = {} 2559 st_actionp = {} 2560 st_goto = {} 2561 log.info('') 2562 log.info('state %d', st) 2563 log.info('') 2564 for p in I: 2565 log.info(' (%d) %s', p.number, p) 2566 log.info('') 2567 2568 for p in I: 2569 if p.len == p.lr_index + 1: 2570 if p.name == "S'": 2571 # Start symbol. Accept! 2572 st_action['$end'] = 0 2573 st_actionp['$end'] = p 2574 else: 2575 # We are at the end of a production. Reduce! 2576 if self.lr_method == 'LALR': 2577 laheads = p.lookaheads[st] 2578 else: 2579 laheads = self.grammar.Follow[p.name] 2580 for a in laheads: 2581 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) 2582 r = st_action.get(a) 2583 if r is not None: 2584 # Whoa. Have a shift/reduce or reduce/reduce conflict 2585 if r > 0: 2586 # Need to decide on shift or reduce here 2587 # By default we favor shifting. Need to add 2588 # some precedence rules here. 2589 2590 # Shift precedence comes from the token 2591 sprec, slevel = Precedence.get(a, ('right', 0)) 2592 2593 # Reduce precedence comes from rule being reduced (p) 2594 rprec, rlevel = Productions[p.number].prec 2595 2596 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 2597 # We really need to reduce here. 2598 st_action[a] = -p.number 2599 st_actionp[a] = p 2600 if not slevel and not rlevel: 2601 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 2602 self.sr_conflicts.append((st, a, 'reduce')) 2603 Productions[p.number].reduced += 1 2604 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2605 st_action[a] = None 2606 else: 2607 # Hmmm. Guess we'll keep the shift 2608 if not rlevel: 2609 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 2610 self.sr_conflicts.append((st, a, 'shift')) 2611 elif r < 0: 2612 # Reduce/reduce conflict. In this case, we favor the rule 2613 # that was defined first in the grammar file 2614 oldp = Productions[-r] 2615 pp = Productions[p.number] 2616 if oldp.line > pp.line: 2617 st_action[a] = -p.number 2618 st_actionp[a] = p 2619 chosenp, rejectp = pp, oldp 2620 Productions[p.number].reduced += 1 2621 Productions[oldp.number].reduced -= 1 2622 else: 2623 chosenp, rejectp = oldp, pp 2624 self.rr_conflicts.append((st, chosenp, rejectp)) 2625 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', 2626 a, st_actionp[a].number, st_actionp[a]) 2627 else: 2628 raise LALRError('Unknown conflict in state %d' % st) 2629 else: 2630 st_action[a] = -p.number 2631 st_actionp[a] = p 2632 Productions[p.number].reduced += 1 2633 else: 2634 i = p.lr_index 2635 a = p.prod[i+1] # Get symbol right after the "." 2636 if a in self.grammar.Terminals: 2637 g = self.lr0_goto(I, a) 2638 j = self.lr0_cidhash.get(id(g), -1) 2639 if j >= 0: 2640 # We are in a shift state 2641 actlist.append((a, p, 'shift and go to state %d' % j)) 2642 r = st_action.get(a) 2643 if r is not None: 2644 # Whoa have a shift/reduce or shift/shift conflict 2645 if r > 0: 2646 if r != j: 2647 raise LALRError('Shift/shift conflict in state %d' % st) 2648 elif r < 0: 2649 # Do a precedence check. 2650 # - if precedence of reduce rule is higher, we reduce. 2651 # - if precedence of reduce is same and left assoc, we reduce. 2652 # - otherwise we shift 2653 2654 # Shift precedence comes from the token 2655 sprec, slevel = Precedence.get(a, ('right', 0)) 2656 2657 # Reduce precedence comes from the rule that could have been reduced 2658 rprec, rlevel = Productions[st_actionp[a].number].prec 2659 2660 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 2661 # We decide to shift here... highest precedence to shift 2662 Productions[st_actionp[a].number].reduced -= 1 2663 st_action[a] = j 2664 st_actionp[a] = p 2665 if not rlevel: 2666 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 2667 self.sr_conflicts.append((st, a, 'shift')) 2668 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2669 st_action[a] = None 2670 else: 2671 # Hmmm. Guess we'll keep the reduce 2672 if not slevel and not rlevel: 2673 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 2674 self.sr_conflicts.append((st, a, 'reduce')) 2675 2676 else: 2677 raise LALRError('Unknown conflict in state %d' % st) 2678 else: 2679 st_action[a] = j 2680 st_actionp[a] = p 2681 2682 # Print the actions associated with each terminal 2683 _actprint = {} 2684 for a, p, m in actlist: 2685 if a in st_action: 2686 if p is st_actionp[a]: 2687 log.info(' %-15s %s', a, m) 2688 _actprint[(a, m)] = 1 2689 log.info('') 2690 # Print the actions that were not used. (debugging) 2691 not_used = 0 2692 for a, p, m in actlist: 2693 if a in st_action: 2694 if p is not st_actionp[a]: 2695 if not (a, m) in _actprint: 2696 log.debug(' ! %-15s [ %s ]', a, m) 2697 not_used = 1 2698 _actprint[(a, m)] = 1 2699 if not_used: 2700 log.debug('') 2701 2702 # Construct the goto table for this state 2703 2704 nkeys = {} 2705 for ii in I: 2706 for s in ii.usyms: 2707 if s in self.grammar.Nonterminals: 2708 nkeys[s] = None 2709 for n in nkeys: 2710 g = self.lr0_goto(I, n) 2711 j = self.lr0_cidhash.get(id(g), -1) 2712 if j >= 0: 2713 st_goto[n] = j 2714 log.info(' %-30s shift and go to state %d', n, j) 2715 2716 action[st] = st_action 2717 actionp[st] = st_actionp 2718 goto[st] = st_goto 2719 st += 1 2720 2721 # ----------------------------------------------------------------------------- 2722 # write() 2723 # 2724 # This function writes the LR parsing tables to a file 2725 # ----------------------------------------------------------------------------- 2726 2727 def write_table(self, tabmodule, outputdir='', signature=''): 2728 if isinstance(tabmodule, types.ModuleType): 2729 raise IOError("Won't overwrite existing tabmodule") 2730 2731 basemodulename = tabmodule.split('.')[-1] 2732 filename = os.path.join(outputdir, basemodulename) + '.py' 2733 try: 2734 f = open(filename, 'w') 2735 2736 f.write(''' 2737# %s 2738# This file is automatically generated. Do not edit. 2739# pylint: disable=W,C,R 2740_tabversion = %r 2741 2742_lr_method = %r 2743 2744_lr_signature = %r 2745 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) 2746 2747 # Change smaller to 0 to go back to original tables 2748 smaller = 1 2749 2750 # Factor out names to try and make smaller 2751 if smaller: 2752 items = {} 2753 2754 for s, nd in self.lr_action.items(): 2755 for name, v in nd.items(): 2756 i = items.get(name) 2757 if not i: 2758 i = ([], []) 2759 items[name] = i 2760 i[0].append(s) 2761 i[1].append(v) 2762 2763 f.write('\n_lr_action_items = {') 2764 for k, v in items.items(): 2765 f.write('%r:([' % k) 2766 for i in v[0]: 2767 f.write('%r,' % i) 2768 f.write('],[') 2769 for i in v[1]: 2770 f.write('%r,' % i) 2771 2772 f.write(']),') 2773 f.write('}\n') 2774 2775 f.write(''' 2776_lr_action = {} 2777for _k, _v in _lr_action_items.items(): 2778 for _x,_y in zip(_v[0],_v[1]): 2779 if not _x in _lr_action: _lr_action[_x] = {} 2780 _lr_action[_x][_k] = _y 2781del _lr_action_items 2782''') 2783 2784 else: 2785 f.write('\n_lr_action = { ') 2786 for k, v in self.lr_action.items(): 2787 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 2788 f.write('}\n') 2789 2790 if smaller: 2791 # Factor out names to try and make smaller 2792 items = {} 2793 2794 for s, nd in self.lr_goto.items(): 2795 for name, v in nd.items(): 2796 i = items.get(name) 2797 if not i: 2798 i = ([], []) 2799 items[name] = i 2800 i[0].append(s) 2801 i[1].append(v) 2802 2803 f.write('\n_lr_goto_items = {') 2804 for k, v in items.items(): 2805 f.write('%r:([' % k) 2806 for i in v[0]: 2807 f.write('%r,' % i) 2808 f.write('],[') 2809 for i in v[1]: 2810 f.write('%r,' % i) 2811 2812 f.write(']),') 2813 f.write('}\n') 2814 2815 f.write(''' 2816_lr_goto = {} 2817for _k, _v in _lr_goto_items.items(): 2818 for _x, _y in zip(_v[0], _v[1]): 2819 if not _x in _lr_goto: _lr_goto[_x] = {} 2820 _lr_goto[_x][_k] = _y 2821del _lr_goto_items 2822''') 2823 else: 2824 f.write('\n_lr_goto = { ') 2825 for k, v in self.lr_goto.items(): 2826 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 2827 f.write('}\n') 2828 2829 # Write production table 2830 f.write('_lr_productions = [\n') 2831 for p in self.lr_productions: 2832 if p.func: 2833 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, 2834 p.func, os.path.basename(p.file), p.line)) 2835 else: 2836 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) 2837 f.write(']\n') 2838 f.close() 2839 2840 except IOError as e: 2841 raise 2842 2843 2844 # ----------------------------------------------------------------------------- 2845 # pickle_table() 2846 # 2847 # This function pickles the LR parsing tables to a supplied file object 2848 # ----------------------------------------------------------------------------- 2849 2850 def pickle_table(self, filename, signature=''): 2851 try: 2852 import cPickle as pickle 2853 except ImportError: 2854 import pickle 2855 with open(filename, 'wb') as outf: 2856 pickle.dump(__tabversion__, outf, pickle_protocol) 2857 pickle.dump(self.lr_method, outf, pickle_protocol) 2858 pickle.dump(signature, outf, pickle_protocol) 2859 pickle.dump(self.lr_action, outf, pickle_protocol) 2860 pickle.dump(self.lr_goto, outf, pickle_protocol) 2861 2862 outp = [] 2863 for p in self.lr_productions: 2864 if p.func: 2865 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) 2866 else: 2867 outp.append((str(p), p.name, p.len, None, None, None)) 2868 pickle.dump(outp, outf, pickle_protocol) 2869 2870# ----------------------------------------------------------------------------- 2871# === INTROSPECTION === 2872# 2873# The following functions and classes are used to implement the PLY 2874# introspection features followed by the yacc() function itself. 2875# ----------------------------------------------------------------------------- 2876 2877# ----------------------------------------------------------------------------- 2878# get_caller_module_dict() 2879# 2880# This function returns a dictionary containing all of the symbols defined within 2881# a caller further down the call stack. This is used to get the environment 2882# associated with the yacc() call if none was provided. 2883# ----------------------------------------------------------------------------- 2884 2885def get_caller_module_dict(levels): 2886 f = sys._getframe(levels) 2887 ldict = f.f_globals.copy() 2888 if f.f_globals != f.f_locals: 2889 ldict.update(f.f_locals) 2890 return ldict 2891 2892# ----------------------------------------------------------------------------- 2893# parse_grammar() 2894# 2895# This takes a raw grammar rule string and parses it into production data 2896# ----------------------------------------------------------------------------- 2897def parse_grammar(doc, file, line): 2898 grammar = [] 2899 # Split the doc string into lines 2900 pstrings = doc.splitlines() 2901 lastp = None 2902 dline = line 2903 for ps in pstrings: 2904 dline += 1 2905 p = ps.split() 2906 if not p: 2907 continue 2908 try: 2909 if p[0] == '|': 2910 # This is a continuation of a previous rule 2911 if not lastp: 2912 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) 2913 prodname = lastp 2914 syms = p[1:] 2915 else: 2916 prodname = p[0] 2917 lastp = prodname 2918 syms = p[2:] 2919 assign = p[1] 2920 if assign != ':' and assign != '::=': 2921 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) 2922 2923 grammar.append((file, dline, prodname, syms)) 2924 except SyntaxError: 2925 raise 2926 except Exception: 2927 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) 2928 2929 return grammar 2930 2931# ----------------------------------------------------------------------------- 2932# ParserReflect() 2933# 2934# This class represents information extracted for building a parser including 2935# start symbol, error function, tokens, precedence list, action functions, 2936# etc. 2937# ----------------------------------------------------------------------------- 2938class ParserReflect(object): 2939 def __init__(self, pdict, log=None): 2940 self.pdict = pdict 2941 self.start = None 2942 self.error_func = None 2943 self.tokens = None 2944 self.modules = set() 2945 self.grammar = [] 2946 self.error = False 2947 2948 if log is None: 2949 self.log = PlyLogger(sys.stderr) 2950 else: 2951 self.log = log 2952 2953 # Get all of the basic information 2954 def get_all(self): 2955 self.get_start() 2956 self.get_error_func() 2957 self.get_tokens() 2958 self.get_precedence() 2959 self.get_pfunctions() 2960 2961 # Validate all of the information 2962 def validate_all(self): 2963 self.validate_start() 2964 self.validate_error_func() 2965 self.validate_tokens() 2966 self.validate_precedence() 2967 self.validate_pfunctions() 2968 self.validate_modules() 2969 return self.error 2970 2971 # Compute a signature over the grammar 2972 def signature(self): 2973 parts = [] 2974 try: 2975 if self.start: 2976 parts.append(self.start) 2977 if self.prec: 2978 parts.append(''.join([''.join(p) for p in self.prec])) 2979 if self.tokens: 2980 parts.append(' '.join(self.tokens)) 2981 for f in self.pfuncs: 2982 if f[3]: 2983 parts.append(f[3]) 2984 except (TypeError, ValueError): 2985 pass 2986 return ''.join(parts) 2987 2988 # ----------------------------------------------------------------------------- 2989 # validate_modules() 2990 # 2991 # This method checks to see if there are duplicated p_rulename() functions 2992 # in the parser module file. Without this function, it is really easy for 2993 # users to make mistakes by cutting and pasting code fragments (and it's a real 2994 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 2995 # we just do a little regular expression pattern matching of def statements 2996 # to try and detect duplicates. 2997 # ----------------------------------------------------------------------------- 2998 2999 def validate_modules(self): 3000 # Match def p_funcname( 3001 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 3002 3003 for module in self.modules: 3004 try: 3005 lines, linen = inspect.getsourcelines(module) 3006 except IOError: 3007 continue 3008 3009 counthash = {} 3010 for linen, line in enumerate(lines): 3011 linen += 1 3012 m = fre.match(line) 3013 if m: 3014 name = m.group(1) 3015 prev = counthash.get(name) 3016 if not prev: 3017 counthash[name] = linen 3018 else: 3019 filename = inspect.getsourcefile(module) 3020 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', 3021 filename, linen, name, prev) 3022 3023 # Get the start symbol 3024 def get_start(self): 3025 self.start = self.pdict.get('start') 3026 3027 # Validate the start symbol 3028 def validate_start(self): 3029 if self.start is not None: 3030 if not isinstance(self.start, string_types): 3031 self.log.error("'start' must be a string") 3032 3033 # Look for error handler 3034 def get_error_func(self): 3035 self.error_func = self.pdict.get('p_error') 3036 3037 # Validate the error function 3038 def validate_error_func(self): 3039 if self.error_func: 3040 if isinstance(self.error_func, types.FunctionType): 3041 ismethod = 0 3042 elif isinstance(self.error_func, types.MethodType): 3043 ismethod = 1 3044 else: 3045 self.log.error("'p_error' defined, but is not a function or method") 3046 self.error = True 3047 return 3048 3049 eline = self.error_func.__code__.co_firstlineno 3050 efile = self.error_func.__code__.co_filename 3051 module = inspect.getmodule(self.error_func) 3052 self.modules.add(module) 3053 3054 argcount = self.error_func.__code__.co_argcount - ismethod 3055 if argcount != 1: 3056 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) 3057 self.error = True 3058 3059 # Get the tokens map 3060 def get_tokens(self): 3061 tokens = self.pdict.get('tokens') 3062 if not tokens: 3063 self.log.error('No token list is defined') 3064 self.error = True 3065 return 3066 3067 if not isinstance(tokens, (list, tuple)): 3068 self.log.error('tokens must be a list or tuple') 3069 self.error = True 3070 return 3071 3072 if not tokens: 3073 self.log.error('tokens is empty') 3074 self.error = True 3075 return 3076 3077 self.tokens = sorted(tokens) 3078 3079 # Validate the tokens 3080 def validate_tokens(self): 3081 # Validate the tokens. 3082 if 'error' in self.tokens: 3083 self.log.error("Illegal token name 'error'. Is a reserved word") 3084 self.error = True 3085 return 3086 3087 terminals = set() 3088 for n in self.tokens: 3089 if n in terminals: 3090 self.log.warning('Token %r multiply defined', n) 3091 terminals.add(n) 3092 3093 # Get the precedence map (if any) 3094 def get_precedence(self): 3095 self.prec = self.pdict.get('precedence') 3096 3097 # Validate and parse the precedence map 3098 def validate_precedence(self): 3099 preclist = [] 3100 if self.prec: 3101 if not isinstance(self.prec, (list, tuple)): 3102 self.log.error('precedence must be a list or tuple') 3103 self.error = True 3104 return 3105 for level, p in enumerate(self.prec): 3106 if not isinstance(p, (list, tuple)): 3107 self.log.error('Bad precedence table') 3108 self.error = True 3109 return 3110 3111 if len(p) < 2: 3112 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) 3113 self.error = True 3114 return 3115 assoc = p[0] 3116 if not isinstance(assoc, string_types): 3117 self.log.error('precedence associativity must be a string') 3118 self.error = True 3119 return 3120 for term in p[1:]: 3121 if not isinstance(term, string_types): 3122 self.log.error('precedence items must be strings') 3123 self.error = True 3124 return 3125 preclist.append((term, assoc, level+1)) 3126 self.preclist = preclist 3127 3128 # Get all p_functions from the grammar 3129 def get_pfunctions(self): 3130 p_functions = [] 3131 for name, item in self.pdict.items(): 3132 if not name.startswith('p_') or name == 'p_error': 3133 continue 3134 if isinstance(item, (types.FunctionType, types.MethodType)): 3135 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) 3136 module = inspect.getmodule(item) 3137 p_functions.append((line, module, name, item.__doc__)) 3138 3139 # Sort all of the actions by line number; make sure to stringify 3140 # modules to make them sortable, since `line` may not uniquely sort all 3141 # p functions 3142 p_functions.sort(key=lambda p_function: ( 3143 p_function[0], 3144 str(p_function[1]), 3145 p_function[2], 3146 p_function[3])) 3147 self.pfuncs = p_functions 3148 3149 # Validate all of the p_functions 3150 def validate_pfunctions(self): 3151 grammar = [] 3152 # Check for non-empty symbols 3153 if len(self.pfuncs) == 0: 3154 self.log.error('no rules of the form p_rulename are defined') 3155 self.error = True 3156 return 3157 3158 for line, module, name, doc in self.pfuncs: 3159 file = inspect.getsourcefile(module) 3160 func = self.pdict[name] 3161 if isinstance(func, types.MethodType): 3162 reqargs = 2 3163 else: 3164 reqargs = 1 3165 if func.__code__.co_argcount > reqargs: 3166 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) 3167 self.error = True 3168 elif func.__code__.co_argcount < reqargs: 3169 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) 3170 self.error = True 3171 elif not func.__doc__: 3172 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', 3173 file, line, func.__name__) 3174 else: 3175 try: 3176 parsed_g = parse_grammar(doc, file, line) 3177 for g in parsed_g: 3178 grammar.append((name, g)) 3179 except SyntaxError as e: 3180 self.log.error(str(e)) 3181 self.error = True 3182 3183 # Looks like a valid grammar rule 3184 # Mark the file in which defined. 3185 self.modules.add(module) 3186 3187 # Secondary validation step that looks for p_ definitions that are not functions 3188 # or functions that look like they might be grammar rules. 3189 3190 for n, v in self.pdict.items(): 3191 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): 3192 continue 3193 if n.startswith('t_'): 3194 continue 3195 if n.startswith('p_') and n != 'p_error': 3196 self.log.warning('%r not defined as a function', n) 3197 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or 3198 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): 3199 if v.__doc__: 3200 try: 3201 doc = v.__doc__.split(' ') 3202 if doc[1] == ':': 3203 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', 3204 v.__code__.co_filename, v.__code__.co_firstlineno, n) 3205 except IndexError: 3206 pass 3207 3208 self.grammar = grammar 3209 3210# ----------------------------------------------------------------------------- 3211# yacc(module) 3212# 3213# Build a parser 3214# ----------------------------------------------------------------------------- 3215 3216def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 3217 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, 3218 outputdir=None, debuglog=None, errorlog=None, picklefile=None): 3219 3220 if tabmodule is None: 3221 tabmodule = tab_module 3222 3223 # Reference to the parsing method of the last built parser 3224 global parse 3225 3226 # If pickling is enabled, table files are not created 3227 if picklefile: 3228 write_tables = 0 3229 3230 if errorlog is None: 3231 errorlog = PlyLogger(sys.stderr) 3232 3233 # Get the module dictionary used for the parser 3234 if module: 3235 _items = [(k, getattr(module, k)) for k in dir(module)] 3236 pdict = dict(_items) 3237 # If no __file__ or __package__ attributes are available, try to obtain them 3238 # from the __module__ instead 3239 if '__file__' not in pdict: 3240 pdict['__file__'] = sys.modules[pdict['__module__']].__file__ 3241 if '__package__' not in pdict and '__module__' in pdict: 3242 if hasattr(sys.modules[pdict['__module__']], '__package__'): 3243 pdict['__package__'] = sys.modules[pdict['__module__']].__package__ 3244 else: 3245 pdict = get_caller_module_dict(2) 3246 3247 if outputdir is None: 3248 # If no output directory is set, the location of the output files 3249 # is determined according to the following rules: 3250 # - If tabmodule specifies a package, files go into that package directory 3251 # - Otherwise, files go in the same directory as the specifying module 3252 if isinstance(tabmodule, types.ModuleType): 3253 srcfile = tabmodule.__file__ 3254 else: 3255 if '.' not in tabmodule: 3256 srcfile = pdict['__file__'] 3257 else: 3258 parts = tabmodule.split('.') 3259 pkgname = '.'.join(parts[:-1]) 3260 exec('import %s' % pkgname) 3261 srcfile = getattr(sys.modules[pkgname], '__file__', '') 3262 outputdir = os.path.dirname(srcfile) 3263 3264 # Determine if the module is package of a package or not. 3265 # If so, fix the tabmodule setting so that tables load correctly 3266 pkg = pdict.get('__package__') 3267 if pkg and isinstance(tabmodule, str): 3268 if '.' not in tabmodule: 3269 tabmodule = pkg + '.' + tabmodule 3270 3271 3272 3273 # Set start symbol if it's specified directly using an argument 3274 if start is not None: 3275 pdict['start'] = start 3276 3277 # Collect parser information from the dictionary 3278 pinfo = ParserReflect(pdict, log=errorlog) 3279 pinfo.get_all() 3280 3281 if pinfo.error: 3282 raise YaccError('Unable to build parser') 3283 3284 # Check signature against table files (if any) 3285 signature = pinfo.signature() 3286 3287 # Read the tables 3288 try: 3289 lr = LRTable() 3290 if picklefile: 3291 read_signature = lr.read_pickle(picklefile) 3292 else: 3293 read_signature = lr.read_table(tabmodule) 3294 if optimize or (read_signature == signature): 3295 try: 3296 lr.bind_callables(pinfo.pdict) 3297 parser = LRParser(lr, pinfo.error_func) 3298 parse = parser.parse 3299 return parser 3300 except Exception as e: 3301 errorlog.warning('There was a problem loading the table file: %r', e) 3302 except VersionError as e: 3303 errorlog.warning(str(e)) 3304 except ImportError: 3305 pass 3306 3307 if debuglog is None: 3308 if debug: 3309 try: 3310 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) 3311 except IOError as e: 3312 errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) 3313 debuglog = NullLogger() 3314 else: 3315 debuglog = NullLogger() 3316 3317 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) 3318 3319 errors = False 3320 3321 # Validate the parser information 3322 if pinfo.validate_all(): 3323 raise YaccError('Unable to build parser') 3324 3325 if not pinfo.error_func: 3326 errorlog.warning('no p_error() function is defined') 3327 3328 # Create a grammar object 3329 grammar = Grammar(pinfo.tokens) 3330 3331 # Set precedence level for terminals 3332 for term, assoc, level in pinfo.preclist: 3333 try: 3334 grammar.set_precedence(term, assoc, level) 3335 except GrammarError as e: 3336 errorlog.warning('%s', e) 3337 3338 # Add productions to the grammar 3339 for funcname, gram in pinfo.grammar: 3340 file, line, prodname, syms = gram 3341 try: 3342 grammar.add_production(prodname, syms, funcname, file, line) 3343 except GrammarError as e: 3344 errorlog.error('%s', e) 3345 errors = True 3346 3347 # Set the grammar start symbols 3348 try: 3349 if start is None: 3350 grammar.set_start(pinfo.start) 3351 else: 3352 grammar.set_start(start) 3353 except GrammarError as e: 3354 errorlog.error(str(e)) 3355 errors = True 3356 3357 if errors: 3358 raise YaccError('Unable to build parser') 3359 3360 # Verify the grammar structure 3361 undefined_symbols = grammar.undefined_symbols() 3362 for sym, prod in undefined_symbols: 3363 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) 3364 errors = True 3365 3366 unused_terminals = grammar.unused_terminals() 3367 if unused_terminals: 3368 debuglog.info('') 3369 debuglog.info('Unused terminals:') 3370 debuglog.info('') 3371 for term in unused_terminals: 3372 errorlog.warning('Token %r defined, but not used', term) 3373 debuglog.info(' %s', term) 3374 3375 # Print out all productions to the debug log 3376 if debug: 3377 debuglog.info('') 3378 debuglog.info('Grammar') 3379 debuglog.info('') 3380 for n, p in enumerate(grammar.Productions): 3381 debuglog.info('Rule %-5d %s', n, p) 3382 3383 # Find unused non-terminals 3384 unused_rules = grammar.unused_rules() 3385 for prod in unused_rules: 3386 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) 3387 3388 if len(unused_terminals) == 1: 3389 errorlog.warning('There is 1 unused token') 3390 if len(unused_terminals) > 1: 3391 errorlog.warning('There are %d unused tokens', len(unused_terminals)) 3392 3393 if len(unused_rules) == 1: 3394 errorlog.warning('There is 1 unused rule') 3395 if len(unused_rules) > 1: 3396 errorlog.warning('There are %d unused rules', len(unused_rules)) 3397 3398 if debug: 3399 debuglog.info('') 3400 debuglog.info('Terminals, with rules where they appear') 3401 debuglog.info('') 3402 terms = list(grammar.Terminals) 3403 terms.sort() 3404 for term in terms: 3405 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) 3406 3407 debuglog.info('') 3408 debuglog.info('Nonterminals, with rules where they appear') 3409 debuglog.info('') 3410 nonterms = list(grammar.Nonterminals) 3411 nonterms.sort() 3412 for nonterm in nonterms: 3413 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) 3414 debuglog.info('') 3415 3416 if check_recursion: 3417 unreachable = grammar.find_unreachable() 3418 for u in unreachable: 3419 errorlog.warning('Symbol %r is unreachable', u) 3420 3421 infinite = grammar.infinite_cycles() 3422 for inf in infinite: 3423 errorlog.error('Infinite recursion detected for symbol %r', inf) 3424 errors = True 3425 3426 unused_prec = grammar.unused_precedence() 3427 for term, assoc in unused_prec: 3428 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) 3429 errors = True 3430 3431 if errors: 3432 raise YaccError('Unable to build parser') 3433 3434 # Run the LRGeneratedTable on the grammar 3435 if debug: 3436 errorlog.debug('Generating %s tables', method) 3437 3438 lr = LRGeneratedTable(grammar, method, debuglog) 3439 3440 if debug: 3441 num_sr = len(lr.sr_conflicts) 3442 3443 # Report shift/reduce and reduce/reduce conflicts 3444 if num_sr == 1: 3445 errorlog.warning('1 shift/reduce conflict') 3446 elif num_sr > 1: 3447 errorlog.warning('%d shift/reduce conflicts', num_sr) 3448 3449 num_rr = len(lr.rr_conflicts) 3450 if num_rr == 1: 3451 errorlog.warning('1 reduce/reduce conflict') 3452 elif num_rr > 1: 3453 errorlog.warning('%d reduce/reduce conflicts', num_rr) 3454 3455 # Write out conflicts to the output file 3456 if debug and (lr.sr_conflicts or lr.rr_conflicts): 3457 debuglog.warning('') 3458 debuglog.warning('Conflicts:') 3459 debuglog.warning('') 3460 3461 for state, tok, resolution in lr.sr_conflicts: 3462 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) 3463 3464 already_reported = set() 3465 for state, rule, rejected in lr.rr_conflicts: 3466 if (state, id(rule), id(rejected)) in already_reported: 3467 continue 3468 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 3469 debuglog.warning('rejected rule (%s) in state %d', rejected, state) 3470 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 3471 errorlog.warning('rejected rule (%s) in state %d', rejected, state) 3472 already_reported.add((state, id(rule), id(rejected))) 3473 3474 warned_never = [] 3475 for state, rule, rejected in lr.rr_conflicts: 3476 if not rejected.reduced and (rejected not in warned_never): 3477 debuglog.warning('Rule (%s) is never reduced', rejected) 3478 errorlog.warning('Rule (%s) is never reduced', rejected) 3479 warned_never.append(rejected) 3480 3481 # Write the table file if requested 3482 if write_tables: 3483 try: 3484 lr.write_table(tabmodule, outputdir, signature) 3485 if tabmodule in sys.modules: 3486 del sys.modules[tabmodule] 3487 except IOError as e: 3488 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) 3489 3490 # Write a pickled version of the tables 3491 if picklefile: 3492 try: 3493 lr.pickle_table(picklefile, signature) 3494 except IOError as e: 3495 errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) 3496 3497 # Build the parser 3498 lr.bind_callables(pinfo.pdict) 3499 parser = LRParser(lr, pinfo.error_func) 3500 3501 parse = parser.parse 3502 return parser 3503